
    3j                    D&   % S SK r S SKrS SKJr  S SKJrJrJrJr  S SK	r	S SK
r	S SKJr  S SKJrJrJrJr  \(       a  S SKJr  S SKJr  \ R,                  R/                  SS	5      S	:H  rS
rS\S-  4S jrS\S-  4S jrS\S-  4S jrS\S-  4S jrS\S-  4S jrS\S-  4S jr S\4S jr!S\4S jr"\ R,                  R/                  SS	5      S	:H  r#S
r$Sr%S
r&\" 5       (       a  SOSr'\(" \ R,                  R/                  SS5      5      r)\(\*S'   \" SSSSS9r+\\*S'   S
r,\\*S'   \" 5       r-\S-  \*S '   \" 5       r.\S-  \*S!'   \" S"S#SS$9r/\\*S%'   Sr0\\*S&'   \" 5       r1\S-  \*S''   \" 5       r2\S-  \*S('   \" S)S*9r3\\*S+'   S
r4\\*S,'   0 r5\6\7\74   \*S-'   Sr8\(S-  \*S.'   S/r9\S0   \*S1'   \ R,                  R/                  S2S35      S	:H  r:\\*S4'   \ R,                  R/                  S5S35      S	:H  r;\\*S6'   \ R,                  R/                  S7S35      S	:H  r<\\*S8'   \ R,                  R/                  S9\" 5       (       a  S3OS	5      S	:H  r=\\*S:'   \ R,                  R/                  S;S	5      S	:H  r>\ R,                  R/                  S<S	5      S	:H  r?S
r@SrA\ R,                  R/                  S=S	5      S	:H  rB\ R,                  R/                  S>5      S	:H  rC\ R,                  R/                  S?5      S	:H  rD\ R,                  R/                  S@S	5      S	:H  rE\ R,                  R/                  SA\" 5       (       a  S3OS	5      S	:H  rFSrGSrHSrI\ R,                  R/                  SBS35      S	:H  rJ\ R,                  R/                  SC5      S	:H  rK\ R,                  R/                  SDSE5      rL\SF   \*SG'   SrMSrN\"" 5       rOS
rPS
rQSrRS
rSSrT\	R                  R                  R                  \*SH'   SrX\	R                  R                  R                  \*SI'   SrY\	R                  R                  R                  \*SJ'   Sr[\	R                  R                  R                  \*SK'   Sr\\	R                  R                  R                  \*SL'   Sr]\	R                  R                  R                  \*SM'   Sr^\\_SN   /\_SN   4   S-  \*SO'   Sr`\\_SN   /\_SN   4   S-  \*SP'   SraS
rbS
rcS
rdSre0 rf\6\7\6\7\4   4   \*SQ'   0 rg\6\7\6\7\4   4   \*SR'   Srh\ R,                  R/                  SSS	5      S	:H  riS
rjSrkSrlS
STSUSSV.rm\6\7\4   \*SW'   SXrn\SY   \*SZ'   S
ro/ rp\_\7\\_SN   /\_SN   4   -     \*S['   Srq\(S-  \*S\'   SrrS
rsS rt\(\*S]'   S^ru\S_   \*S`'   Srv\\(/\(4   S-  \*Sa'   Sbrw\Sc   \*Sd'   S^rx\Se   \*Sf'   Sry\\(/\(4   S-  \*Sg'   Sbrz\Sc   \*Sh'   S^r{\Se   \*Si'   Sr|\\(/\(4   S-  \*Sj'   Sbr}S
r~\\*Sk'   SrSlrSmr\" SSnSoSp9r\\*Sq'   \ R,                  R/                  Sr5      S	:H  r\ R,                  R/                  Ss5      S	:H  r\ R,                  R/                  St5      S	:H  r\ R,                  R/                  Su5      S	:H  r\ R,                  R/                  Sv5      S	:H  r\(" \ GR                  " SwSl5      5      r\(" \ GR                  " SxSy5      5      rS\(S-  4Sz jr\" 5       r\(S-  \*S{'   \ R,                  R/                  S|S	5      S	:H  r\ R,                  R/                  S}S35      S	:H  r\ R,                  R/                  S~S35      S	:H  r\ R,                  R/                  S\" 5       (       d  S	OS35      S	:H  r\\*S'   SrS\*S'   / r\_\7   \*S'   / r\_\7   \*S'   S
r\\*S'   \" SSS
S$9r\\*S'   / r\_\(   \*S'   \ R,                  R/                  SS5      GR/                  5       rS\(S-  4S jr\" 5       r\(S-  \*S'   \ R,                  R/                  SS5      GR/                  5       r\ R,                  R/                  SS5      GR/                  5       r\S   \*S'   \ R,                  R/                  SS5      GR/                  5       r\S   \*S'   S
r\\*S'   S
r\\*S'   Sr\\*S'   Sr\\*S'   Sbr\S   \*S'   Sr\\*S'   Sr\\*S'   \ R,                  R/                  SS35      S	:H  r\\*S'   S
rSrS
r\ R,                  R/                  S5      S	:H  r\ R,                  R/                  S5      S	:H  rSrSrSr\ R,                  R/                  S5      S	:H  r\(" \ R,                  R/                  SS5      5      r\" \ R,                  R/                  SS5      5      r\ R,                  R/                  S5      S	:H  r\ R,                  R/                  S5      S	:H  r\(" \ R,                  R/                  SS	5      5      rS rS r " S S5      r " S S5      r\ R,                  R/                  SS35      S	:H  rS\7S\4S jrS\7S\4S jrS\7S\4S jr\ R,                  R/                  SS5      r\	GR|                  GR~                  (       d  S	OS3r\ R,                  R/                  S\5      S	:H  r\ R,                  R/                  SS35      S	:H  r\ R,                  R/                  SS35      S	:H  r\ R,                  R/                  SS	5      S	:H  r\ R,                  R/                  S5      S	:H  rSrSrSrSr\(S-  \*S'   \" SSS9r\\*S'   S
rS
rS
rSr\ R,                  R/                  S5      S	:H  rSr\/ S4   S-  \*S'   S
r\ R,                  R/                  S5      S	:H  r\\*S'   \ R,                  R/                  S5      S	:H  r\\*S'   \ R,                  R/                  SS5      r\ R,                  R/                  S\" 5       (       a  S3OS	5      S	:H  r\\*S'   Sr\\*S'   Sr\ R,                  R/                  SS	5      S	:H  rSUrSrSrSrSr\(S-  \*S'   SrS
rSrS
rS
r\ GR                  " SS	5      S	:H  r\ GR                  " S5      S	:H  r\(" \ R,                  R/                  SS 5      5      r\ R,                  R/                  SS35      S	:H  rSrS
rSrS
rS
rS
rS
rSUrSUrSrSrS
rS
rSrS
rS\	GR                  ;   =(       d    S\	GR                  ;   r\" 5       =(       d    \r\ R,                  R/                  SS	5      S	:H  rSr\7S-  \*S'   Sr\7S-  \*S'   \" SSS9r\7\*S'   S\74S jr\" 5       r\7\*S'   Sr\(\*S'   \" SSSS$9Gr \\*S'   \" SS
S9Gr\\*S'   S
GrSlGrSS/Gr\_\S   \7-     \*S'   S
Gr\\*S'   \ R,                  R/                  SS35      S	:H  GrSGr\(\*S'   SGr\(\*S'   SGr	\\*S'    " S S5      Gr
 " S S5      GrS\4S jGrS\(4S jGr\" 5       (       a  SOG\" 5       Gr\(S-  \*S'   \" SSSS$9Gr\\*GS '   \" GSGS9Gr\(\*GS'   \!" 5       Gr\\*GS'   \" GSS*9Gr\\*GS'   \" GSGSS
S$9Gr\\*GS	'   \ R,                  R/                  GS
S35      S	:H  Gr\\*GS'   \" GSS*9Gr\\*GS'   \7S-  \*GS'   \" 5       (       a{   S GSGKGJGr  G\(       aQ  G\GR2                  " \ GR4                  GR7                  G\GR9                  GS\ GR:                  5      GS5      5      GrOG\GR2                  " GS5      GrOSGrSGr!\ R,                  R/                  GSS	5      S	:H  Gr"\ R,                  R/                  GSS	5      S	:H  Gr#S
Gr$S
Gr%SGr&S
Gr'SGr(GSGr)S
Gr*SGr+S
Gr,\\*GS'   \ R,                  R/                  GSS35      S	:H  Gr-S
Gr.S
Gr/S
Gr0S
Gr1\" 5       (       + Gr2\\*GS'   \ R,                  R/                  GSS5      Gr3G\3S:g  Gr4G\3S	:X  a  SOG\3Gr5\ R,                  R/                  GSS5      Gr6\7S-  \*GS'   \ R,                  R/                  GS5      S	:H  Gr7S
Gr8\ R,                  R/                  GSS35      S	:H  Gr9\\*GS'   S
Gr:\\*GS'   S
Gr;\\*GS'   S
Gr<\\*GS '   S
Gr=\\*GS!'   S
Gr>\\*GS"'   S
Gr?\\*GS#'   S
Gr@\\*GS$'   S
GrA\\*GS%'   \ R,                  R/                  GS&S35      S	:H  GrB\ R,                  R/                  GS'S35      S	:H  GrC\\*GS('   SGrD\\*GS)'   0 GrE\6\7\6\7\4   4   \*GS*'   \(" \ R,                  R/                  GS+GS,5      5      GrF\(\*GS-'   S
GrG\\*GS.'   S\7S-  4GS/ jGrH\" GS0SS9GrI\7\*GS1'    " GS2 GS35      GrJ " GS4 GS55      GrK " GS6 GS75      GrL " GS8 GS95      GrM " GS: GS;5      GrN " GS< GS=5      GrO\" G\O5       " GS> GS?G\O5      5       GrP\" G\O5       " GS@ GSAG\O5      5       GrQ " GSB GSC5      GrRGS5GrS\GSD   \*GSE'   GS7GrT\GSF   \*GSG'   GSHGrU\GSH   \*GSI'   GS7GrV\GS7   \*GSJ'    " GSK GSL5      GrW " GSM GSN5      GrX " GSO GSP5      GrY/ GSQQGrZ\_\7   \*GSR'   / GSSQGr[\_\7   \*GST'   S/Gr\\_\7   \*GSU'   / Gr]\_\\	GR                  \	GR                  \	GR                  /S4      \*GSV'   \ GR                  " GSWS	5      S	:H  Gr_ " GSX GSY5      Gr` " GSZ GS[5      Gra\(       a  S GS\GKb7   " GS] GS^5      Grc\ R,                  R/                  GS_S35      S	:H  Grd\\*GS`'   \" \GR                  G\f   5        g! G\G\ 4 a    SGr GN2f = f(a      N)Callable)AnycastLiteralTYPE_CHECKING)	is_fbcode)Configget_tristate_envinherit_fields_frominstall_config_module)InductorChoices)CUDAGraphPolicyTORCHINDUCTOR_INPLACE_PADDING1Freturnc                      [        S5      $ )N#TORCHINDUCTOR_FX_GRAPH_REMOTE_CACHEr
        P/home/wildlama/miniconda3/lib/python3.13/site-packages/torch/_inductor/config.pyfx_graph_remote_cache_defaultr          ABBr   c                      [         R                  R                  S5      S:X  a  g[         R                  R                  S5      S:X  a  gg )NTORCHINDUCTOR_VEC_ISA_OKr   T0F)osenvirongetr   r   r   vec_isa_ok_defaultr       s7    	zz~~01S8	zz~~01S8r   c                      [        S5      $ )N#TORCHINDUCTOR_AUTOTUNE_REMOTE_CACHEr   r   r   r   autotune_remote_cache_defaultr#   %   r   r   c                      [        S5      $ )N+TORCHINDUCTOR_BUNDLED_AUTOTUNE_REMOTE_CACHEr   r   r   r   %bundled_autotune_remote_cache_defaultr&   )   s    IJJr   c                  D    [        S[        5       (       d  S5      $ S 5      $ )N/TORCHINDUCTOR_BUNDLE_TRITON_INTO_FX_GRAPH_CACHET)r
   r   r   r   r   )bundle_triton_into_fx_graph_cache_defaultr)   -   s(    9KK %) r   c                      [        S5      $ )N&TORCHINDUCTOR_AUTOTUNE_AT_COMPILE_TIMEr   r   r   r    autotune_at_compile_time_defaultr,   4   s    DEEr   c                      Sn S[         R                  ;   a"  [         R                  R                  S5      S:H  $ [        5       (       a#  [        R
                  R                  S5      nX:*  $ g)N   &TORCHINDUCTOR_USE_STATIC_CUDA_LAUNCHERr   z-pytorch/inductor:static_cuda_launcher_versionTr   r   r   r   torch_utils_internaljustknobs_getval_int)STATIC_CUDA_LAUNCHER_VERSIONversions     r   static_cuda_launcher_defaultr6   8   s]    #$ /2::=zz~~FG3NN	''<<;
 66 r   c                      Sn S[         R                  ;   a"  [         R                  R                  S5      S:H  $ [        5       (       a%  Sn[        R
                  R                  U5      nX :*  $ g)Nr   TORCHINDUCTOR_PROLOGUE_FUSIONr   z(pytorch/inductor:prologue_fusion_versionTr0   )ENABLE_PROLOGUE_FUSION_VERSIONjk_namer5   s      r   prologue_fusion_enabledr;   G   s[    %&"&"**4zz~~=>#EE	<''<<WE88r   "TORCHDYNAMO_AUTO_FUNCTIONALIZED_V2T-/logs/dedicated_log_torch_compile_worker_rank,TORCHINDUCTOR_PRECOMPILATION_TIMEOUT_SECONDSi,  precompilation_timeout_secondsz0pytorch/remote_cache:enable_local_fx_graph_cache$TORCHINDUCTOR_FX_GRAPH_CACHE_DEFAULTTORCHINDUCTOR_FX_GRAPH_CACHE)justknobenv_name_defaultenv_name_forcedefaultfx_graph_cacheremote_gemm_autotune_cachefx_graph_remote_cache!bundle_triton_into_fx_graph_cachez>pytorch/remote_cache:enable_non_blocking_remote_cache_write_v2-TORCHINDUCTOR_NON_BLOCKING_REMOTE_CACHE_WRITE)rB   rD   rE   non_blocking_remote_cache_writeautotune_local_cacheautotune_remote_cachebundled_autotune_remote_cachez*torch.compiler.config.force_disable_caches)aliasforce_disable_caches&unsafe_skip_cache_dynamic_shape_guards!unsafe_marked_cacheable_functionssleep_sec_TESTING_ONLYneeds_fixed_stride_order)rT   flexible_layout'triton_kernel_default_layout_constraintTORCHINDUCTOR_CPP_WRAPPERr   cpp_wrapper(TORCHINDUCTOR_CPP_WRAPPER_BUILD_SEPARATEcpp_wrapper_build_separateTORCHINDUCTOR_FX_WRAPPER
fx_wrapper*TORCHINDUCTOR_CPP_CACHE_PRECOMPILE_HEADERScpp_cache_precompile_headersTORCHINDUCTOR_ONLINE_SOFTMAX$TORCHINDUCTOR_APPLY_GUMBEL_MAX_TRICKTORCHINDUCTOR_SIZE_ASSERTSTORCHINDUCTOR_NAN_ASSERTS(TORCHINDUCTOR_RUNTIME_TRITON_NAN_ASSERTSTORCHINDUCTOR_SCALAR_ASSERTSTORCHINDUCTOR_ALIGNMENT_ASSERTSTORCHINDUCTOR_MEMORY_PLANNINGTORCHINDUCTOR_USE_FAST_MATHTORCHINDUCTOR_MEMORY_POOLintermediates)noneri   outputscombinedmemory_poolpost_grad_custom_pre_passpost_grad_custom_post_passcustom_partitioner_fnjoint_custom_pre_passjoint_custom_post_passpre_grad_custom_passz+torch._inductor.scheduler.BaseSchedulerNode_pre_fusion_custom_pass_post_fusion_custom_passpre_grad_fusion_optionspost_grad_fusion_options"TORCHINDUCTOR_DYNAMIC_SCALE_RBLOCKg-C6?   )pre_grad	precisionnum_iterationsrequires_optimizerfx_passes_numeric_check	heuristic)rE   tritonatenr   mixed_mm_choice'reorder_for_compute_comm_overlap_passesreorder_prefetch_limit&size_threshold_for_succ_based_strategyrj   )rj   all	only_fsdpbucket_all_gathers_fx.bucket_all_gathers_fx_bucket_size_determinatorrE   )rE   
custom_opscustom_ops_multidtypebucket_all_gathers_bucket_moderj   r   bucket_reduce_scatters_fx2bucket_reduce_scatters_fx_bucket_size_determinator"bucket_reduce_scatters_bucket_modebucket_all_reduces_fx.bucket_all_reduces_fx_bucket_size_determinator!runtime_estimations_mms_benchmark   g      I@*TORCHINDUCTOR_USE_EXPERIMENTAL_BENCHMARKERz-pytorch/inductor:use_experimental_benchmarker)rE   rD   rB   use_experimental_benchmarker+TORCHINDUCTOR_DISTRIBUTED_MAX_AUTOTUNE_GEMM&TORCHINDUCTOR_PIPELINE_GEMM_AUTOTUNINGTORCHINDUCTOR_MAX_AUTOTUNE$TORCHINDUCTOR_MAX_AUTOTUNE_POINTWISETORCHINDUCTOR_MAX_AUTOTUNE_GEMM%TORCHINDUCTOR_DEFAULT_AUTOTUNE_WARMUP"TORCHINDUCTOR_DEFAULT_AUTOTUNE_REPd   c                      [         R                  R                  S5      n U c  gU R                  5       S;   a  g [	        U 5      $ )N,TORCHINDUCTOR_AUTOTUNE_NUM_CHOICES_DISPLAYED
   r   r   r   r   lowerintenv_vals    r   '_autotune_num_choices_displayed_defaultr     s8    jjnnKLG}}/)w<r   autotune_num_choices_displayed/TORCHINDUCTOR_MAX_AUTOTUNE_REPORT_CHOICES_STATS<TORCHINDUCTOR_MAX_AUTOTUNE_PRUNE_CHOICES_BASED_ON_SHARED_MEM-TORCHINDUCTOR_TRITON_DISABLE_DEVICE_DETECTIONTORCHINDUCTOR_GRAPH_PARTITIONgraph_partitionzCUDAGraphPolicy | Nonecudagraph_policycustom_should_partition_opscudagraph_unsafe_unbacked_ops#max_autotune_allow_flexible_layoutsz%pytorch/compiler:force_same_precision"TORCHINDUCTOR_FORCE_SAME_PRECISIONforce_same_precisionmulti_kernel_hints(TORCHINDUCTOR_MAX_AUTOTUNE_GEMM_BACKENDSzATEN,TRITON,CPPc                      [         R                  R                  SS5      n U R                  5       S;   a  g [	        U 5      $ )N*TORCHINDUCTOR_NVGEMM_MAX_PROFILING_CONFIGS5r   r   r   s    r   %_nvgemm_max_profiling_configs_defaultr   n  s2    jjnnI3OG}}/)w<r   nvgemm_max_profiling_configs(TORCHINDUCTOR_MAX_AUTOTUNE_CONV_BACKENDSzATEN,TRITON,TORCHINDUCTOR_MAX_AUTOTUNE_GEMM_SEARCH_SPACEDEFAULT)r   
EXHAUSTIVEmax_autotune_gemm_search_space,TORCHINDUCTOR_MAX_AUTOTUNE_FLEX_SEARCH_SPACEmax_autotune_flex_search_spacefallback_by_defaultselective_decomposeuse_dceuse_pre_grad_passes)earlylaterE   pre_grad_pass_timinguse_joint_graph_passesuse_post_grad_passesCUTEDSL_ENABLE_AUTOTUNINGcutedsl_enable_autotuningi    TORCHINDUCTOR_SAVE_ARGS!TORCHINDUCTOR_AUTOTUNE_IN_SUBPROCg      N@g        #TORCHINDUCTOR_AUTOTUNE_MULTI_DEVICE(TORCHINDUCTOR_COLLECTIVE_BENCHMARK_NRUNS50*TORCHINDUCTOR_COLLECTIVE_BENCHMARK_TIMEOUT30'TORCHINDUCTOR_COORDINATE_DESCENT_TUNING5TORCHINDUCTOR_COORDINATE_DESCENT_CHECK_ALL_DIRECTIONS'TORCHINDUCTOR_COORDINATE_DESCENT_RADIUSc                  d    [         R                  R                  SS5      R                  S5      n U $ )N#TORCHINDUCTOR_AUTOHEURISTIC_COLLECT ,r   r   r   split)collect_envs    r    _parse_autoheuristic_collect_envr     s)    **..!FKQQRUVKr   c                  d    [         R                  R                  SS5      R                  S5      n U $ )NTORCHINDUCTOR_AUTOHEURISTIC_USEmixed_mmr   r   )use_envs    r   _parse_autoheuristic_use_envr     s(    jjnn>
KQQRUVGNr   c                   @    \ rS rSrSrS\" 5       ;   rS\" 5       ;   rSrg)autoheuristic_collecti  zL
Config for which autoheuristic optimizations should collect training data.
pad_mmr   r   N)	__name__
__module____qualname____firstlineno____doc__r   r   r   __static_attributes__r   r   r   r   r     s%     9;;F=??Hr   r   c                   @    \ rS rSrSrS\" 5       ;   rS\" 5       ;   rSr	g)autoheuristic_usei  zM
Config for which autoheuristic optimizations should use learned heuristics.
r   r   r   N)
r   r   r   r   r   r   r   r   r   r   r   r   r   r   r     s%     577F=??Hr   r   'TORCHINDUCTOR_RUN_JIT_POST_COMPILE_HOOKnamec                 <    [        U 5      =(       d    [        U 5      $ N)collect_autoheuristicuse_autoheuristicr   s    r   run_autoheuristicr     s     &A*;D*AAr   c                 t    U S:X  a  [         R                  $ U S:X  a  [         R                  $ U [        5       ;   $ Nr   r   )r   r   r   r   r   s    r   r   r     s;    x$+++		$--- 7999r   c                 t    U S:X  a  [         R                  $ U S:X  a  [         R                  $ U [        5       ;   $ r   )r   r   r   r   r   s    r   r   r     s;    x '''		 ))) 3555r   $TORCHINDUCTOR_AUTOHEURISTIC_LOG_PATH!TORCHINDUCTOR_LAYOUT_OPTIMIZATIONTORCHINDUCTOR_FORCE_LAYOUT_OPT#TORCHINDUCTOR_CACHE_SDPA_CONSTRAINT TORCHINDUCTOR_KEEP_OUTPUT_STRIDETORCHINDUCTOR_WARN_MIX_LAYOUT          realize_acc_reads_size_threshold)TORCHINDUCTOR_DELAY_REALIZE_CHEAP_OUTPUTSrD   rE   delay_realize_cheap_outputs.TORCHINDUCTOR_ASSUME_UNALIGNED_FALLBACK_OUTPUTr   inductor_choices_classTORCHINDUCTOR_DEBUG_FUSIONdebug_fusionTORCHINDUCTOR_BENCHMARK_FUSIONbenchmark_fusion#TORCHINDUCTOR_ENABLED_METRIC_TABLESr   (TORCHINDUCTOR_LOOP_ORDERING_AFTER_FUSIONloop_ordering_after_fusionloop_index_inversion_in_fusionr   'TORCHINDUCTOR_BENCHMARK_EPILOGUE_FUSION@   g?max_fusion_unique_io_buffersTORCHINDUCTOR_SPLIT_REDUCTIONSTORCHINDUCTOR_DETERMINISTICTORCHINDUCTOR_MIN_NUM_SPLITTORCHINDUCTOR_BENCHMARK_KERNEL   devgit0TORCHINDUCTOR_OPTIMIZE_SCATTER_UPON_CONST_TENSORadd_pre_grad_passesremove_pre_grad_passesTORCHINDUCTOR_DISABLED_PASSESdisabled_passesc                  z    S[         R                  ;   a  [         R                  S   n OSn U S;   d
   SU  35       eU $ )NTORCHINDUCTOR_WORKER_START
subprocess)r"  forkspawnzInvalid start method: )r   r   )start_methods    r   decide_worker_start_methodr&    sR    #rzz1zz">?#   / 
 ~.	/ 
 r   worker_start_methodi   small_memory_access_thresholdz(pytorch/compiler:worker_suppress_logging%TORCHINDUCTOR_WORKER_SUPPRESS_LOGGINGworker_suppress_loggingLOG_TLPARSElog_tlparsefuse_ddp_with_concat_opschedule_comm_wait).N_fuse_ddp_communication_passes_micro_pipeline_tp)TORCHINDUCTOR_PARTITIONED_SCATTER_ENABLEDr.   "partitioned_scatter_min_partitions   "partitioned_scatter_max_partitionsg?!partitioned_scatter_memory_budgetc                   2    \ rS rSr% Sr\\S'   Sr\\S'   Sr	g)_collectiveif  Fauto_selecti   #one_shot_all_reduce_threshold_bytesr   N)
r   r   r   r   r8  bool__annotations__r9  r   r   r   r   r   r7  r7  f  s    K/9'9r   r7  c                      \ rS rSr% SrSr\\S'   Sr\S-  \S'   Sr	\S-  \S'   Sr
\S-  \S'   Sr\S-  \S	'   Sr\\R"                  R$                  /\S-  4   S-  \S
'   Sr\S   \S'   Sr\S   \S'   Sr\S-  \S'   Sr\S-  \S'   Sr\S-  \S'   Sr\S-  \S'   Sr\\S'   Sr\S-  \S'   Sr\\S'   Sr\S-  \S'   Sr\S   S-  \S'   Sr\\S'   Sr \\S'   Sr!\S   \S'   Sr"\\S '   S!r#g)"aten_distributed_optimizationsik  zDConfiguration for distributed optimization passes on ATen FX graphs.Fenable_overlap_schedulingNcollective_bucketinginsert_overlap_depsmax_compute_pre_fetchcompute_overlap_multiplercustom_runtime_estimation
analytical)rD  	benchmarkcollective_estimatorrE  compute_estimatormax_memory_increase_gbmax_memory_increase_ratiomax_in_flight_gbmax_coll_distance!log_final_collectives_estimationsbucket_exposed_firstbucket_only_internode_commsenable_fusion_regions)rE   r   r   	coalescedbucket_modeT&prioritize_bucketing_during_scheduling
spmd_checkwarn)rT  errorspmd_mismatch!overlap_scheduling_autofix_cyclesr   )$r   r   r   r   r   r>  r:  r;  r?  r@  rA  r   rB  floatrC  r   r1   fxNoderF  r   rG  rH  rI  rJ  rK  rL  rM  rN  rO  rQ  rR  rS  rV  rW  r   r   r   r   r=  r=  k  sz   N ',t+ )-$+, (,+ )-3:,.2ut|2 QUx(EFMT @L'";<K =Hw89G ,0EDL/.2ut|2 &*edl) %)sTz(.3%t3 )-$+, ).-
 *.4$;- 	 MNQUU
 48*D7
 J
 /5M7?+4
 /4%t3r   r=  c                  P    Sn Sn[         R                  R                  U5      nX:  $ )a  
TODO: Remove when parallel compiled is fully enabled internally. For rollout, use a
knob to enable / disable. The justknob should not be performed at import, however.
So for fbcode, we assign compile_threads to 'None' below and initialize lazily in
async_compile.py.
ry   z0pytorch/inductor:enable_parallel_compile_version)r1   r2   r3   )ENABLE_PARALLEL_COMPILE_VERSIONr:   r5   s      r   #parallel_compile_enabled_internallyr]    s.     '(#@G##88AG*55r   c                     SSK n U R                  [        5      nS[        R                  ;   a0  [        [        R                  S   5      nUR                  SU5        U$ [        R                  S:X  a  SnUR                  S5        U$ [        5       (       a$  [        5       (       d  SnUR                  S5        U$ [        R                  R                  5       nU(       d   e[        S	U5      nUR                  S
U5        U$ )a	  
Here are the precedence to decide compile_threads
1. User can override it by TORCHINDUCTOR_COMPILE_THREADS.  One may want to disable async compiling by
   setting this to 1 to make pdb happy.
2. Set to 1 if it's win32 platform
3. decide by the number of CPU cores
r   NTORCHINDUCTOR_COMPILE_THREADSz!compile_threads set to %d via envwin32ry   z"compile_threads set to 1 for win32z"compile_threads set to 1 in fbcode    zcompile_threads set to %d)logging	getLoggerr   r   r   r   infosysplatformr   r]  r1   _utils	cpu_countmin)rb  logcompile_threadsrh  s       r   decide_compile_threadsrl    s      

H
%C&"**4bjj)HIJ4oF  
	 56  
@BB56  LL**,	yb),,o>r   rk  z+pytorch/inductor:quiesce_async_compile_pool(TORCHINDUCTOR_QUIESCE_ASYNC_COMPILE_POOLquiesce_async_compile_pool<   )rE   quiesce_async_compile_timeuse_static_cuda_launcherz/torch._inductor.config.use_static_cuda_launcheruse_static_triton_launcherz:pytorch/inductor:static_launch_user_defined_triton_kernels7TORCHINDUCTOR_STATIC_LAUNCH_USER_DEFINED_TRITON_KERNELS)static_launch_user_defined_triton_kernels)TORCHINDUCTOR_STRICT_STATIC_CUDA_LAUNCHERstrict_static_cuda_launcherz2torch._inductor.config.strict_static_cuda_launcherstrict_static_triton_launcherglobal_cache_dir)parutil.zfb/cacheTORCHINDUCTOR_SHAPE_PADDING#TORCHINDUCTOR_COMPREHENSIVE_PADDING   force_shape_padTORCHINDUCTOR_PERMUTE_FUSION_use_fp64_for_unbacked_floatsTORCHINDUCTOR_PROFILETORCHINDUCTOR_PROFILE_OUTPUTprofile_bandwidth_output3TORCHINDUCTOR_PROFILE_WITH_DO_BENCH_USING_PROFILINGTORCHINDUCTOR_FREEZINGfreezingfreezing_discard_parametersdecompose_mem_bound_mmwrap_inductor_compiled_regionsassume_aligned_inputsassume_32bit_indexing.unsafe_ignore_unsupported_triton_autotune_args"check_stack_no_cycles_TESTING_ONLY*always_complex_memory_overlap_TESTING_ONLY*TORCHINDUCTOR_ENABLE_LINEAR_BINARY_FOLDINGTORCHINDUCTOR_ANNOTATE_TRAININGannotate_training)enable_caching_generated_triton_templatesautotune_lookup_tableTORCHINDUCTOR_FILE_LOCK_TIMEOUT600file_lock_timeoutenable_autograd_for_aotc                      S n [        5       (       aH  [        R                  R                  SS 5      n[        R                  R                  SS5      nUb  SU 3n U $ )NMAST_HPC_JOB_NAME	ROLE_RANKr   r=   )r   r   r   r   )log_locmast_job_nameglobal_ranks      r   get_worker_log_pathr    sN    G{{

':DAjjnn[#6$Ek]SGNr   TORCHINDUCTOR_WORKER_LOGPATHtorchinductor_worker_logpathc                      \ rS rSr% \R
                  R                  S5      S:H  r\\	S'   Sr
\\	S'   Sr\\	S'   \R
                  R                  S	5      b!  \" \R
                  R                  S	5      5      OS
r\S
-  \	S'   Srg
)auto_chunkeri  TORCHINDUCTOR_AUTO_CHUNKERr   enablei   output_size_thresholdr  amplify_ratio_threshold TORCHINDUCTOR_CHUNKER_NUM_CHUNKSN	num_chunkr   )r   r   r   r   r   r   r   r  r:  r;  r  r   r  r  r   r   r   r   r  r    sz    ::>>">?3FFDF "-3, $%S$ ::>><=I 	BJJNN=>? sTz r   r  c                      \ rS rSr% SrSr\R                  R                  SS5      S:H  r	\R                  R                  SS5      S:H  r
Sr\S-  \S	'   \" \R                  R                  S
S5      5      rS\R                  R                  S\R                   S:X  a  SOS5      4r\S\4   \S'   \R                  R                  SS5      S:H  r\R                  R                  SS5      S:H  rSr\S-  \S'   Sr\S-  \S'   \" S5      r\S-  \S'   Sr\S   \S'   \" \R                  R                  SS5      5      r\R                  R                  SS5      S:H  r\R                  R                  SS5      S:H  r\R                  R                  SS5      r \R                  R                  S S5      S:H  r!S!r"\" \R                  R                  S"S5      5      r#\R                  R                  S#S5      r$\R                  R                  S$S5      r%S%r&S!r'\R                  R                  S&S5      S:H  r(S!r)\R                  R                  S'S5      S:H  r*\R                  R                  S(S5      S:H  r+S)r,S*r-g)+cppi  zi
Settings for cpp backend.
This class provides a centralized location for managing cpp backend settings.
$TORCHINDUCTOR_CPP_NO_REDUNDANT_LOOPSr   !TORCHINDUCTOR_CPP_DYNAMIC_THREADSr   Nsimdlen TORCHINDUCTOR_CPP_MIN_CHUNK_SIZE512CXXdarwinzclang++zg++cxx'TORCHINDUCTOR_CPP_ENABLE_KERNEL_PROFILE TORCHINDUCTOR_CPP_WEIGHT_PREPACKinject_relu_bug_TESTING_ONLYinject_log1p_bug_TESTING_ONLYr   
vec_isa_okoriginal_atenr1   r  inductor_nodedescriptive_names,TORCHINDUCTOR_CPP_MAX_HORIZONTAL_FUSION_SIZE16-TORCHINDUCTOR_CPP_FALLBACK_SCATTER_REDUCE_SUM-TORCHINDUCTOR_CPP_ENABLE_UNSAFE_MATH_OPT_FLAG5TORCHINDUCTOR_CPP_ENABLE_FLOATING_POINT_CONTRACT_FLAGoff)TORCHINDUCTOR_CPP_ENABLE_TILING_HEURISTICF#TORCHINDUCTOR_CPP_GEMM_MAX_K_SLICES%TORCHINDUCTOR_CPP_GEMM_CACHE_BLOCKING%TORCHINDUCTOR_CPP_GEMM_THREAD_FACTORST$TORCHINDUCTOR_CPP_USE_DECOMPOSE_TANH%TORCHINDUCTOR_CPP_FORCE_INLINE_KERNEL-TORCHINDUCTOR_CPP_USE_CONSTEXPR_FOR_INT_ARRAYr}  r   ).r   r   r   r   r   threadsr   r   r   no_redundant_loopsdynamic_threadsr  r   r;  min_chunk_sizere  rf  r  tuplestrenable_kernel_profileweight_prepackr  r  r
   r  r:  r  r   max_horizontal_fusion_sizefallback_scatter_reduce_sumenable_unsafe_math_opt_flag#enable_floating_point_contract_flagenable_tiling_heuristicsenable_grouped_gemm_templategemm_max_k_slicesgemm_cache_blockinggemm_thread_factorsenable_loop_tail_vecenable_concat_linearuse_decompose_tanhuse_small_dequant_bufferforce_inline_kerneluse_constexpr_for_int_arrayuse_two_step_variance_thresholdr   r   r   r   r  r    s    G
 	

=sCsJ  jjnn%H#NRUUOGS4Z(JERSN 	


u3<<8+CiOCtSy	  	

@#F#M 
 ZZ^^$FLPSSN
 04 #*304!3:4 //IJJtJ 	 wHI 
 "%


EtL" 	

FLPSS   	

FLPSS   +-**..?+' 	

BCHCO 
 $)  BJJNN+PRUVW **..)PRVW **..)PRVW   !
 	

=sCsJ 
  % 	

>DK  	

FLPSS  
 '+#r   r  c                      \ rS rSr% Sr\R                  R                  S5      S:H  rSr	Sr
Sr\\\\S4   -     S-  \S	'   S
rSrSrSr\" 5       (       + rSrSr\S-  \S'   SrSr\" SSS9r\\S'   Sr\\S'   SrSrSrSr Sr!\R                  R                  S\" 5       (       d  SOS5      S:H  r"\\S'   Sr#\S-  \S'   Sr$\\S'   Sr%Sr&\'" 5       r(\S-  \S'   Sr)\\S'   Sr*\\S'   \RV                  " SS5      S:H  r,\\S'   Sr-Sr.\R                  R                  SS5      S:H  r/\R                  R                  SS5      S:H  r0Sr1\2S   \S '   \R                  R                  S!S5      S:H  r3\R                  R                  S"S5      S:H  r4\\S#'   \R                  R                  S$S5      S:H  r5Sr6\" \R                  R                  S%S5      5      r7\2S&   \S''   \R                  R                  S(S5      S:H  r8S)r9Sr:\;Rx                  Rz                  (       a  S*OS+r>\\S,'   Sr?Sr@SrASrB\CS-  \S-'   SrD\R                  R                  S.S5      S:H  rE\R                  R                  S/S5      S:H  rF\R                  R                  S0S5      S:H  rGSrH\" \R                  R                  S1\;Rx                  Rz                  (       a  SOS25      5      rI\" \R                  R                  S3S45      5      rJ\R                  R                  S5S5      S:H  rK\R                  R                  S6\" 5       (       a  SOS5      S:H  rLS7rMSrN\S-  \S8'   \R                  R                  S9S5      S:H  rOSrP\R                  R                  S:5      S:H  rQ0 rR\S\C\4   \S;'   S<rT\R                  R                  S=S5      S:H  rU\\S>'   \R                  R                  S?5      rV\CS-  \S@'   \R                  R                  SAS5      S:H  rW\\SB'   \R                  R                  SCS5      S:H  rX\\SD'   \R                  R                  SES5      S:H  rY\\SF'   SGrZg)Hr   iy  z&
Config specific to codegen/triton.py
TORCHINDUCTOR_CUDAGRAPHSr   TFN.cudagraph_capture_sizesr   r3  r  "cudagraph_dynamic_shape_warn_limit TORCHINDUCTOR_CUDAGRAPH_OR_ERRORr  cudagraph_or_error%reorder_for_reducing_graph_partitions&TORCHINDUCTOR_COALESCE_TILING_ANALYSISr   coalesce_tiling_analysis	max_tilesprefer_nd_tilingautotune_at_compile_timeautotune_with_sample_inputstile_reductionsTORCHINDUCTOR_NATIVE_MATMULnative_matmul!TORCHINDUCTOR_UNIQUE_KERNEL_NAMES&TORCHINDUCTOR_UNIQUE_USER_KERNEL_NAMESr  r  r  #TORCHINDUCTOR_PERSISTENT_REDUCTIONS TORCHINDUCTOR_DECOMPOSE_SORT_OPSdecompose_sort_ops$TORCHINDUCTOR_COOPERATIVE_REDUCTIONSTORCHINDUCTOR_MULTI_KERNEL)r   ry   r.      multi_kernelTORCHINDUCTOR_DIVISIBLE_BY_16   ra     spill_thresholdr  ENABLE_PERSISTENT_TMA_MATMULENABLE_TEMPLATE_TMA_STORETORCHINDUCTOR_SKIP_L1$TORCHINDUCTOR_NUM_DECOMPOSE_K_SPLITS10#TORCHINDUCTOR_DECOMPOSE_K_THRESHOLD32TORCHINDUCTOR_ENABLE_PDL!TORCHINDUCTOR_MIX_ORDER_REDUCTIONry   mix_order_reduction_split_size5TORCHINDUCTOR_MIX_ORDER_REDUCTION_AUTOTUNE_SPLIT_SIZE4TORCHINDUCTOR_MIX_ORDER_REDUCTION_ALLOW_MULTI_STAGESdebug_dump_kernel_inputsr  %TORCHINDUCTOR_TRITON_PROTON_PROFILINGproton_profiling&TORCHINDUCTOR_TRITON_PROTON_OUTPUT_DIRproton_output_dir'TORCHINDUCTOR_TRITON_PROTON_GROUP_BY_SMproton_group_by_sm-TORCHINDUCTOR_TRITON_PROTON_SPLIT_INVOCATIONSproton_split_invocations-TORCHINDUCTOR_TRITON_PROTON_PER_CTA_OCCUPANCYproton_per_cta_occupancyr   )[r   r   r   r   r   r   r   r   
cudagraphscudagraph_treescudagraph_skip_dynamic_graphsr  r  r   r;  cudagraph_min_partition_sizeslow_path_cudagraph_asserts!cudagraph_trees_history_recordingcudagraph_trees_objgraphr    cudagraph_support_input_mutation#cudagraph_unexpected_rerecord_limitr  force_cudagraph_syncforce_cudagraphs_warmupr	   r  r:  r  fast_path_cudagraph_assertsskip_cudagraph_warmupdebug_sync_graphdebug_sync_kerneldense_indexingr  r  r  autotune_pointwiseautotune_cublasLtr,   r  r  r  getenvr   tiling_prevents_pointwise_fusion tiling_prevents_reduction_fusionunique_kernel_namesunique_user_kernel_namesr  r   persistent_reductionsr  cooperative_reductionsforce_cooperative_reductionsr  divisible_by_16min_split_scan_rblockstore_cubinr1   r5   hipr  use_block_ptruse_tensor_descriptor)transpose_discontiguous_tensor_descriptorr  r  codegen_upcast_to_fp32enable_persistent_tma_matmulenable_template_tma_storeskip_l1_cache.disallow_failing_autotune_kernels_TESTING_ONLYnum_decompose_k_splitsdecompose_k_threshold
enable_pdlmix_order_reduction"mix_order_reduction_initial_xblockr  'mix_order_reduction_autotune_split_size#mix_order_reduction_non_strict_mode&mix_order_reduction_allow_multi_stagesr  dictmax_kernel_dump_occurrencesr  r  r  r  r  r   r   r   r   r   r   y  sE   
  :;sBJ O %*! DHU3sCx#89D@G $%  #' ).%  % ,5;$ +.' 67&d
6 ! $  &9   37)46 #( "   N 	

4cRU	
 	 d  !IsTz  #d"   -M,NdTkN
 ).- "OT!$ ))$A3G3NM4N (,$'+$ 	

:C@CG  	

?EL  	 wHI  	

<cBcI  	

93?3F   	

=sCsJ 
 $)  ),


3S9)L'*% 
 jjnn%DcJcQO   K "'!2!22OS: M "
 15-
 04 #*3 " 	

5s;sB !
 !#

/JC PTW WJJNN#:C@CGM 6;2 !


25==;L;LCRV	
  


<dC  :C@CGJ 	

:9;;CTWX	  *+&15"C$J5


NPST	 , +0' 	

MNRUU + 02d38n1
 #$ 	

>DK d  %'JJNN0%sTz 
 	

@#F#M  
 	

FLPSS d  	

FLPSS d r   r   c                      \ rS rSr% SrSr\R                  R                  SS5      S:H  r	\R                  R                  SS5      S:H  r
\R                  R                  SS5      S:H  r\R                  R                  S	S
5      r\R                  R                  SS5      r\S   \S'   \R                  R                  SS5      rSrSrSr\\S'   Sr\\S'   SrSr\\S'   Sr\S-  \S'   Sr\\S'   0 r\\\4   \S'   \R                  R                  SS5      S:H  r\\S'   Sr\\S'   \R                  R                  SS5      S:H  r\\S'   \ " \R                  R                  SS5      5      r!\ \S'   0 r"\\\#4   \S '   Sr$\\S!'   Sr%\\S"'   \R                  R                  S#S5      S:H  r&\\S$'   Sr'\\S%'   Sr(\S-  \S&'   \)" 5       (       + r*\\S''   Sr+\S-  \S('   Sr,\S-  \S)'   Sr-\S-  \S*'   0 r.\\/R`                  Rb                  \2\   4   \S+'   Sr3\2\   S-  \S,'   \R                  R                  S-S5      S:H  r4Sr5\\S.'   Sr6\S-  \S/'   Sr7\\2\   -  S-  \S0'   Sr8\S-  \S1'   S2r9g)3aot_inductori  z1
Settings for Ahead-Of-Time Inductor Compilation
r   AOT_INDUCTOR_DEBUG_COMPILEr   r   AOT_INDUCTOR_DEBUG_SYMBOLS!AOT_INDUCTOR_ENABLE_FRAME_POINTER&AOT_INDUCTOR_COMPILE_WRAPPER_OPT_LEVELO1-AOT_INDUCTOR_DEBUG_INTERMEDIATE_VALUE_PRINTER)r   r   23 debug_intermediate_value_printer&AOT_INDUCTOR_FILTERED_KERNELS_TO_PRINTNFuse_runtime_constant_foldingforce_mmap_weightsTpackagepackage_cpp_onlydynamic_linkagemetadata/AOTINDUCTOR_RAISE_ERROR_ON_IGNORED_OPTIMIZATION#raise_error_on_ignored_optimizationcheck_lowerboundDUMP_AOTI_MINIFIERdump_aoti_minifierAOTINDUCTOR_REPRO_LEVELr.   repro_levelpresetsallow_stack_allocationuse_minimal_arrayref_interface)AOT_INDUCTOR_WEIGHT_USE_CACHING_ALLOCATORweight_use_caching_allocatorpackage_constants_in_so package_constants_on_disk_formatprecompile_headersembed_kernel_binaryemit_multi_arch_kernelmodel_name_for_generated_filescustom_ops_to_c_shimscustom_op_libsAOT_INDUCTOR_ENABLE_LTOlink_libtorchcross_target_platformaoti_shim_libraryaoti_shim_library_pathr   ):r   r   r   r   r   output_pathr   r   r   debug_compiledebug_symbolsenable_frame_pointercompile_wrapper_opt_levelrP  r   r;  filtered_kernel_namesserialized_in_specserialized_out_specrR  r:  rS  use_consts_asm_buildrT  rU  rV  rW  rD  r  rY  rZ  r\  r   r^  r_  r   r`  ra  rc  rd  re  r   rf  rg  rh  ri  rj  r1   _ops
OpOverloadlistrk  
enable_ltorm  rn  ro  rp  r   r   r   r   rG  rG    s    KJJNN#?ELMJJNN#?ELM 	

:C@CG  !#

0$! EGJJNN7E$g.@&A 
 JJNN0$   */ $.  %$  GT$(dTk( !OT   "Hd38n! 	

H#NRUU (  "d!  "zz~~.BCHCOO 2::>>*CQGHKH !GT#s(^  $)D( ,1"D0 	

BCHCO !$  %)T( 48$cDj7 $-;. (,+ +/D4K. 26"C$J5 EG4

 5 5tCy @AF'+NDI$+  93?3FJ M4 )-3:, 15sT#Y-4)-C$J-r   rG  c                   $    \ rS rSr% Sr\\S'   Srg)aot_inductor_modei  Fcompile_standaloner   N)r   r   r   r   r  r:  r;  r   r   r   r   r  r    s      %$r   r  c                   "   \ rS rSr% SrSr\S   \S'   SrSr	\
R                  R                  \
R                  R                  S\
R                  R                  \
R                  R!                  \R$                  5      S5      5      5      rS	r\S	-  \S
'   / SQr\\   \S'   \" \\\\4   \" S \
R                  R                  SS5      R5                  S5       5       5      5      r\\\\4   \S'   \" \\\\4   \" S \
R                  R                  SSR                  S \ 5       5      5      R5                  S5       5       5      5      r\\\\4   \S'   \
R                  R                  SS5      S:H  rSrSr\\S'   \
R                  R                  SS5      S:H  r \!\S'   \
R                  R                  S5      r"\#S	-  \S'   \
R                  R                  S5      r$\#S	-  \S '   \
R                  R                  S!S5      r%\#\S"'   \
R                  R                  S#S5      S:H  r&\!\S$'   \
R                  R                  S%S5      S:H  r'\!\S&'   \
R                  R                  S'S(5      r(\#\S)'   S*r)\!\S+'   Sr*\!\S,'   Sr+\!\S-'   S*r,\!\S.'   S/r-g	)0cutlassi  z%
Config specific to cutlass backend.
-O1)-O0r  -O2-O3z-OScompile_opt_levelFTORCHINDUCTOR_CUTLASS_DIRz../third_party/cutlass/Ncutlass_max_profiling_configs)ry   r.   r   r  %cutlass_max_profiling_swizzle_optionsc              #   8   #    U  H  n[        U5      v   M     g 7fr   r   .0xs     r   	<genexpr>cutlass.<genexpr>  s!      
 FF   +TORCHINDUCTOR_CUTLASS_DYNAMIC_CLUSTER_SHAPEz2,1,1r   cutlass_dynamic_cluster_shapec              #   8   #    U  H  n[        U5      v   M     g 7fr   r  r  s     r   r  r    s!      
 FFr  .TORCHINDUCTOR_CUTLASS_DYNAMIC_CLUSTER_FALLBACKc              #   8   #    U  H  n[        U5      v   M     g 7fr   )r  )r  vs     r   r  r    s     G)FAQ)Fr   cutlass_dynamic_cluster_fallbackCUTLASS_EPILOGUE_FUSIONr   r   ry   cutlass_backend_min_gemm_size/INDUCTOR_CUDA_BACKEND_GENERATE_TEST_RUNNER_CODEgenerate_test_runnerTORCHINDUCTOR_CUTLASS_ALLOWLISTcutlass_op_allowlist_regexTORCHINDUCTOR_CUTLASS_DENYLISTcutlass_op_denylist_regex)TORCHINDUCTOR_CUTLASS_INSTANTIATION_LEVELcutlass_instantiation_level+TORCHINDUCTOR_CUTLASS_HASH_WITH_COMPILE_CMDcutlass_hash_with_compile_cmd"TORCHINDUCTOR_CUTLASS_PRESCREENINGcutlass_prescreening!TORCHINDUCTOR_CUTLASS_ENABLED_OPSr   cutlass_enabled_opsTuse_binary_remote_cacheupload_to_binary_remote_cachebinary_remote_cache_force_writeenable_caching_codegenr   ).r   r   r   r   r   r  r   r;  enable_debug_infouse_fast_mathr   pathrealpathr   r   joindirnamer1   __file__cutlass_dirr  r   r  r|  r   r  r   r  r  cutlass_epilogue_fusion_enabledcutlass_tma_onlyr  r  r:  r  r  r  r  r  r  r  r  r  r  r  r   r   r   r   r  r    s    EJw@AI  M ''""


'GGLL/)	
K 15!3:4 8D)49C:>c3m 
ZZ^^=weCj
 	
;!5c3#7  >Bc3m 
ZZ^^@G)FGG eCj
 	
	>$eCcM&: 	 	

0#6#= $
  *+!3*
 	

H#NRUU $  .0ZZ^^).d
  -/JJNN(-sTz  (*zz~~3S(  	

DcJcQ "4  	

;SASH $   "zz~~+U  
 %)T( +0!4/ -2#T1 $(D'r   r  c                   n    \ rS rSr% Sr\S-  \S'   Sr\S-  \S'   Sr\S-  \S'   Sr	Sr
Sr\S-  \S'   S	rg)
cudai	  Narchr5   cuda_cxxF   r   r   )r   r   r   r   r  r  r;  r5   r  enable_cuda_ltoenable_ptxas_infor   r   r   r   r   r   r  r  	  sR    
 D#*
 GS4Z  HcDj O  01 #*0r   r  c                       \ rS rSr% Sr\S-  \S'   Sr\S-  \S'   Sr\S-  \S'   \	R                  R                  \	R                  R                  SS5      5      rSrg)	xpui5	  Nr  r5   oneapi_rootr  r   r   )r   r   r   r   r  r  r;  r5   r  r   r  r  r   r   r  r   r   r   r   r  r  5	  sV     D#* GS4Z #Kt"''""2::>>2Mr#RSKr   r  c                      \ rS rSr% / r\\   \S'   / SQr\\	S      \S'   Sr
\	S   \S'   SrSrS	rS	rSrS
r\S
-  \S'   \R$                  R'                  S5      r\R$                  R'                  SS5      S:H  r\\S'   S
r\S
-  \S'   S
r\S
-  \S'   S
r\S
-  \S'   Sr\\S'   S
r\\   S
-  \S'   Sr\\S'   Sr\\S'   Srg
)rocmiD	  r  )gfx90agfx942gfx950ck_supported_archr  )	r  r  r  r  z-Osz-Ozz-Ominz-Ofastz-Omaxr  FTN	rocm_homeTORCHINDUCTOR_CK_DIR-INDUCTOR_CK_BACKEND_GENERATE_TEST_RUNNER_CODEr   r   r  n_max_profiling_configsck_max_profiling_configsck_tile_max_profiling_configsuse_preselected_instanceskBatch_sweepr  split_k_thresholdcontiguous_thresholdr   ) r   r   r   r   r  r|  r  r;  r  r   r  is_debug
save_tempsr  flush_denormalsprint_kernel_resource_usager  r   r   r   ck_dirr  r:  r  r   r  r  r  r  r  r  r   r   r   r   r  r  D	  s%    D$s)FtG$@AB  	 wL 
 H J M O #( !IsTz  ZZ^^23F 	

FLPSS $ 
 +/S4Z. ,0cDj/ 15!3:4 ',t+ &*L$s)d")  s !##"r   r  )r  r   halidepallascpu_backend)r   r  r  cuda_backendr  tpu_backendxpu_backendc                       \ rS rSrSrSrg)mtiai	  Fr   N)r   r   r   r   disable_welford_reductionr   r   r   r   r  r  	  s     %r   r  c                   R    \ rS rSr% SrSrSr\S   \S'   Sr	\S   \S'   S	r
S	rS	rS
rg)r  i	  hostz	host-cudaAnderson2021)r  Li2018	Adams2019Mullapudi2016scheduler_cudar  scheduler_cpuFr   N)r   r   r   r   
cpu_target
gpu_targetr  r   r;  r  assertsdebugscan_kernelsr   r   r   r   r  r  	  sQ    J J
 	 GRS  	 7QR 
 G E Lr   r  c            	          \ rS rSr% \R
                  R                  SS5      S:H  r\R
                  R                  SS5      S:H  rSr	\
S-  \S'   SrSrS	rS	rS	rS	rS	r\R
                  R                  S
S5      S:H  r\R
                  R                  SS5      S:H  r\R
                  R                  SS5      r\R
                  R                  SS5      rSrSr\\
/S4   S-  \S'   \R
                  R                  SS5      S:H  r\" \R
                  R                  S\R
                  R                  SS5      5      5      r\\S'   Srg)tracei	  TORCH_COMPILE_DEBUGr   r   TORCH_COMPILE_DEBUG_SAVE_REALN	debug_dirFTINDUCTOR_POST_FUSION_SVGINDUCTOR_ORIG_FX_SVGINDUCTOR_DOT_GRAPH_SHAPE_SVG INDUCTOR_LOG_URL_FOR_GRAPH_XFORM
upload_tarLOG_AUTOTUNE_RESULTSINDUCTOR_PROVENANCEprovenance_tracking_levelr   )r   r   r   r   r   r   r   enabledsave_real_tensorsr  r  r;  	debug_loginfo_logfx_graphfx_graph_transformedir_pre_fusionir_post_fusionoutput_codegraph_diagramdraw_orig_fx_graphdot_graph_shapelog_url_for_graph_xformcompile_profiler  r   log_autotuning_resultsr   r  r   r   r   r   r  r  	  s\   jjnn2C8C?G 

'FLPSS !IsTz  I H H   M N K JJNN#=sCsJM (>DK jjnn%CTJO !jjnn-OQUV O 04J#%,3ZZ^^,BCHCO &)


!2::>>2G#M	
&s r   r  )ztrace.upload_tarrq   rr   rs   zaot_inductor.repro_levelzaot_inductor.dump_aoti_minifierrn   ro   r/  rt   r   _save_config_ignore)r  zcuda.cutlass_dirzcutlass.cutlass_dirzxpu.cutlass_dirr'  rk  ro   rn   rq   rr   rs   r/  rt   r   r  r   rF   rH   rL   rM   _cache_config_ignore_prefix_cache_config_factory_keysexternal_matmul8TORCHINDUCTOR_WRITE_ARE_DETERMINISTIC_ALGORITHMS_ENABLEDc                   R    \ rS rSr% Sr\\\\\\4      4   S-  \	S'   Sr
\\	S'   Srg)lookup_tableiH
  NtableTcheck_src_hashr   )r   r   r   r   r  rD  r  r|  r   r;  r  r:  r   r   r   r   r  r  H
  s4    48E4T$sCx.))*T18  NDr   r  c                   z   \ rS rSr% Sr\\S'   Sr\S-  \S'   Sr\\S'   Sr	\
S-  \S'   SrSrSr\R                  R!                  S5      r\S-  \S	'   \R                  R!                  S
5      r\S-  \S'   SrSr\S   S-  \S'   SrSr\\S'   \R2                  " S5      S:H  r\R2                  " SS5      rSrSrSrg)test_configsiS
  F%force_extern_kernel_in_multi_templateNforce_custom_op_decompositionforce_no_impl_groupingmax_mm_configs(TORCHINDUCTOR_AUTOTUNE_CHOICE_NAME_REGEXautotune_choice_name_regex(TORCHINDUCTOR_AUTOTUNE_CHOICE_DESC_REGEXautotune_choice_desc_regex)assertrj  track_memory_lifecycleT assume_bucketing_reduces_latency,TORCHINDUCTOR_FORCE_FILTER_REDUCTION_CONFIGSr   )TORCHINDUCTOR_DISTORT_BENCHMARKING_RESULTr   r   )r   r   r   r   r  r:  r;  r  r  r  r   runtime_triton_dtype_assertruntime_triton_shape_assertstatic_cpp_dtype_assertr   r   r   r  r  r  *graphsafe_rng_func_ignores_fallback_randomr  r   use_libtorchr  r(  force_filter_reduction_configsdistort_benchmarking_resultbisect_pre_grad_graph'bisect_keep_custom_backend_for_inductorr   r   r   r   r  r  S
  s    27)47 26!4$;5 $)D(!%NC$J%"'"'# .0ZZ^^2.d
  .0ZZ^^2.d
  27.>BGO4t;B L .2$d1 			@ASH # #%))3R# ".3+r   r  )*c                   z    \ rS rSr% \R
                  R                  SS5      S:H  r\\	S'   Sr
\\	S'   Sr\\	S'   S	rg
)eager_numericsi
  'TORCHINDUCTOR_EMULATE_DIVISION_ROUNDINGr   r   division_roundingFdisable_ftzuse_pytorch_libdevicer   N)r   r   r   r   r   r   r   r.  r:  r;  r/  r0  r   r   r   r   r,  r,  
  sA    
 	

@#F#M t  K
 #(4'r   r,  %TORCHINDUCTOR_EMULATE_PRECISION_CASTSemulate_precision_casts(g  r   re  collections.abcr   typingr   r   r   r   r1   !torch._inductor.custom_graph_passtorch._environmentr   torch.utils._config_moduler	   r
   r   r   torch._inductor.choicesr   torch._inductor.cudagraph_utilsr   r   r   inplace_paddingcan_inplace_pad_graph_inputr:  r   r    r#   r&   r)   r,   r6   r;   enable_auto_functionalized_v2r  disable_progressverbose_progressworker_log_pathr   r?   r;  rF   rG   rH   rI   rK   rL   rM   rN   rP   rQ   rR   rD  r  rS   rV   rX   rZ   r\   r^   online_softmaxapply_gumbel_max_trickdcestatic_weight_shapessize_assertsnan_assertsruntime_triton_nan_assertsscalar_assertsalignment_assertspick_loop_ordersinplace_buffersallow_buffer_reusememory_planningr  rm   benchmark_harnessepilogue_fusionprologue_fusionepilogue_fusion_first*epilogue_fusion_user_defined_triton_kernelpattern_matcherb2b_gemm_passrn   	_inductorcustom_graph_passCustomGraphPassTypero   rp   CustomPartitionerFnTyperq   rr   rs   rt   r|  ru   split_cat_fx_passes efficient_conv_bn_eval_fx_passesis_predispatchgroup_fusionbatch_fusionrv   rw   reorder_for_localitydynamic_scale_rblockforce_fuse_int_mm_with_mul keep_addmm_fused_for_half_dtypesuse_mixed_mmr~   r    reorder_for_compute_comm_overlapr   r   reorder_for_peak_memoryreorder_for_peak_memory_debugr   r   r   r   r   r   r   r   r   estimate_op_runtimer   intra_node_bwinter_node_bw
cpu_gpu_bwr   distributed_max_autotune_gemmpipeline_max_autotune_gemmmax_autotunemax_autotune_pointwisemax_autotune_gemmr(   inductor_default_autotune_warmupinductor_default_autotune_repr   r   !max_autotune_report_choices_stats.max_autotune_prune_choices_based_on_shared_memtriton_disable_device_detectionr   r   r   r   r   r   r   uppermax_autotune_gemm_backendsr   r   max_autotune_conv_backendsr   r   r   r   r   r   r   r   r   r   autotune_fallback_to_atenunbacked_symint_fallbacksearch_autotune_cache	save_argsautotune_in_subproc+max_autotune_subproc_result_timeout_seconds-max_autotune_subproc_graceful_timeout_seconds.max_autotune_subproc_terminate_timeout_secondsautotune_multi_devicecollective_benchmark_nrunsrX  collective_benchmark_timeoutcoordinate_descent_tuning'coordinate_descent_check_all_directions coordinate_descent_search_radiusr   r   r   r   run_jit_post_compile_hookr   r   r   autoheuristic_log_pathr5   r3  layout_opt_defaultlayout_optimizationforce_layout_optimizationcache_sdpa_constraintkeep_output_stridewarn_mix_layoutrealize_reads_thresholdrealize_opcount_thresholdrealize_acc_reads_thresholdr  r  fallback_randomalign_random_eager"fallback_embedding_bag_byte_unpackimplicit_fallbacks assume_unaligned_fallback_outputr  aggressive_fusionr
  r  enabled_metric_tablesr  r  score_fusion_memory_thresholdbenchmark_epilogue_fusion max_epilogue_benchmarked_choicesmax_fusion_sizemin_overlap_ratio)max_fusion_buffer_group_pairwise_attemptsr  max_pointwise_cat_inputsforce_pointwise_catunroll_reductions_thresholdcomment_originconv_1x1_as_mmsplit_reductionsdeterministicmin_num_splitbenchmark_kernelconstant_and_index_propagationalways_keep_tensor_constantsassert_indirect_indexingdo_not_emit_runtime_assertionscompute_all_boundscombo_kernelsbenchmark_combo_kernelcombo_kernels_autotunecombo_kernel_allow_mixed_sizes#combo_kernel_foreach_dynamic_shapescombo_kernel_max_num_args!combo_kernel_per_subkernel_blockscombo_kernels_pointwise_onlyjoint_graph_constant_foldingdebug_index_asserts__version__is_nightly_or_sourcedeveloper_warnings"optimize_scatter_upon_const_tensorr  r  r  r&  r'  r(  r*  r,  _fuse_ddp_communication_fuse_ddp_bucket_sizer/  r0  partitioned_scatter_enabledr2  r4  r5  r7  r=  r]  rl  rk  rn  rp  rq  rr  rt  rv  rw  libfb.pyry  __package__get_dir_pathr  r  replaceseprx  
ValueErrorImportErrorkernel_name_max_opsshape_paddingcomprehensive_paddingpad_channels_lastpad_dynamic_shapesdisable_padding_cpu$expand_dimension_for_pointwise_nodespadding_alignment_bytespadding_stride_thresholdpad_outputsbw_outputs_user_visibler~  permute_fusionprofiler_mark_wrapper_callgenerate_intermediate_hooksdebug_ir_traceback_raise_error_for_testingr  _profile_varprofile_bandwidthprofile_bandwidth_regexr  /profile_bandwidth_with_do_bench_using_profilingdisable_cpp_codegenr  r  r  r  r  r  r  r  r  enable_linear_binary_foldingr  r  r  r  r  r  r  r  r  r   rG  r  r  r  r  r  r  r  r  r  r  r  r  r	  r
  r  r  Tensor*write_are_deterministic_algorithms_enabledr  r  torch.utils._config_typingr,  r2  modulesr   r   r   r   <module>r     s   	 
 $ 4 4  ( (  7?**..!@#F#M# Ctd{ CD4K Ctd{ CKtd{ K4$; F$+ Fd 
 
 JJNN7=D 
 	     8A{{3 
 '*JJNNA6J'  
 ?;1	  $) D ( &C%D td{ D ./ "4$;  )/MB)   " d ! &C%D td{ D .S-T td{ T $*VW d W 05 & 4 57 !4S> 6 &* d
 )  (1*  JJNN#>DKT K JJNN=sCsJ D  ::>>"<cBcI
D I JJNN4Y[[cc  d   >DK JJNN93?3F 
    zz~~:C@CGjjnn89S@JJNN=>#E   >DK JJNN4Y[[ccR
       **..!@#F#M 

<=D HJzz~~HWCD 
    *+   .3 *   TX 5??<<PP WTX EOO==QQ X TX u88PP W PT u88LL SPT 99MM T
 OS eoo77KK R 	 	;	<=:;	= 		" 	 	;	<=:;	= 		   $)     ( 68 c4S>12 7 79 $sDcN23 8   zz~~&JCPTWW  # 
 $(    	+ c3h  FQAB P $)  *  (	;	<=:;	=*  &* d
 )   %  /0 & / >D w9: CNR .#0Dt0K R  4!  5; 7=1 :RV 2HcUCZ4H44O V  #G4% 
 17 w}- 6NR .#0Dt0K R   */ !4 /   
 &,?<& d  JJNN@ASH  JJNN;<C 
 zz~~:;sB (NOSVV  JJNN#DEL #&II5r:$   !$II2C8! t  .U-V d
 V JJNNDcJcQ " JJNNQSVW
 / JJNNBCHCO   JJNN2y{{CPST
   .2 * 1
 *, T#Y + ,. tCy - -2 #T 1 $47 d  !# DI "  ZZ^^.0A%' sTz  ,Q+R cDj R  ZZ^^.%'  DF::>>2ID%' (? @ 
 DF::>>2ID%' (? @ 
 " T ! " T !   ! T   =F g89 E  $  #! d ! JJNN.4; 4 
 "      JJNN45<	 jjnn%HISP 
 /3 +03 -14 . 

'LMQTT  !JJNN=tD 
  %JJNN?F  
 JJNN<=D  JJNNJKsR ( $'JJNN<cB$  

@ @@ @ JJNN<cBcI 
BC BD B: : :6C 6D 6 *I 
 !& 1 1 1Ss JJNN68JKsR  JJNN+KSQUXX  JJNN8#>#E  ZZ^^$FLPSS  **..!@ASH
       !#*  %+>% T     &+ "  JJNNCDK ! BF %6!67$> E   ZZ^^$@ASHd H(HISP $ P

'LbQ JJNN29;;CC  D   (,  + !#  JJNN<cBcI 
 $%      -/ ) ,0 cDj /          99=sCsJ  		78C? BJJNN#@!DE::>>"BCHCO  "&   %     "'        "# &* #  %* !$   $     1 11OUe>O>O5O [8$8  JJNNEsKsR #
 #' S4Z &%) d
 ) 2 
C 
 67 S 7 &. s - !'7:!    T       C Xi%83%> ? 
 ! D   JJNN>DK 
 +, "C + +. "C - ,0 !5 /: :
X4 X4v6T 6 B '0kkd7M7Ot O $*:=$ D  #)# C  ">!? $ ? $*
;$ D  39IL3 )4  JJNN>DK T 
 '-
>' t 
 * ;;
 $$&33[00bff=zJ   '33J?
    

<cBcI JJNN8#>#E        (- $  (          >DK # 
 $    !  +4+o t 5zz~~5r: B&  , 3"  (*zz~~"D( #* 
 JJNNHISP 0    8#>#E$ E %* T )  %  $ (-  ,
 $ t # $ t # 8= . < ,1 "D 0 49 *D 8 JJNN?EL  **..)JCPTWW 4 W 37 )4 6 46 tCc3h/0 5RZZ^^,MuUV 3 V %  %	S4Z 	 %+1% c  "C+ C+LU Up
t. t.t% %( (D W17 1 1< WT' T TF# F#T =BW89 A 7?g23 > "*WX ) "*WX )& &
 8K K\" T#Y "* T#Y D ) DI 
 UWhellELLI4OPQ V IIH#NRUU +
   84 84v ,( (2 JJNN:C@CG  
 ckk(+ ,K, $   s   /A"AL AL LALLAL