
    3j                   r   S SK Jr  S SKrS SKrS SKrS SKrS SKrS SKrS SKrS SK	r	S SK
r
S SKrS SKrS SKrS SKJrJr  S SKJr  S SKJr  S SKJr  S SKJr  S SKJr  S S	KJr  S S
KJrJrJr  S SKJ r J!r!J"r"J#r#J$r$J%r%  S SK&J'r'  S SK(r)S SK*r)S SK+J,s  J-r.  S SK/J0r0  S SK)J1r1  S SK2J3r3  S SK4J5r5J6r7J	r8J,r9  S SK:J;r<  S SK=J>r>  S SK?J@r@  S SKAJBrBJCrCJDrDJErEJFrFJGrGJHrHJIrIJJrJJKrKJLrL  S SKMJNrNJ6rO  S SKPJQrQ  S SKRJSrSJTrTJUrUJVrV  S SKWJXrXJYrYJZrZ  S SK[J\r\J]r]J^r^J_r_J`r`  S SKaJbrb  S SKcJdrdJere  S SKfJgrgJhrhJiriJjrjJkrkJlrl  S SKmJnrn  S SKoJprpJqrqJrrrJsrsJtrtJuruJvrvJwrwJxrxJyry  S SKzJ{r{  S S K|J}r}  S S!K~Jr  S S"KJr  S S#K*Jr  S S$KJrJr  S S%KJr  S S&KJr  S S'KJr  S(S)KJrJr  S(S*KJr  S(S+KJr  S(S,KJr  S-S.KJ6r6JrJr  S-S/KJrJr  S-S0KJr  S-S1KJr  S-S2KJr  S-S3KJr  S-S4KJrJr  S-S5KJr  S-S6KJr  S-S7KJrJr  S-S8KJr  S-S9KJr  S-S:K,JrJrJrJrJrJrJrJrJr  S-S;KJr  \(       a"  S S<KJrJrJr  S S=KfJr  S S>KJr  S S?KJr  S-S@KJr  \"" SA5      r\" SB5      r\(       d  \6GR                  " 5       (       d  SSC jrSSD jrOS SEKJrJr  \(       a/  S SFKJrJrJr  \\\   /\\)GR                     4   \-  \\   -  \-  r " SG SH\GR                  5      r\ " SI SJ5      5       rSSK jrSSL jr\" 5       r\GR                  r\GR                  r\GR                  r\	GR                  " \5      r\)GR                  GR                  \SM5      r\)GR                  GR                  \SN5      r\)GR                  GR                  \SO5      r\)GR                  GR                  \SP5      r\)GR                  GR                  \SQ5      rSSR jrSSS jrSST jrSSU jr\GR                  " S5      SSV j5       r\GR                  SSW j5       rSSX jr        SSY jr S     SS[ jjr      SS\ jr  S       SS] jjrSSS^ jjr   S         SS` jjrSSa jr    SSb jr      SSc jr S       SSd jjr S   SSe jjr\GR                  SSf j5       Gr  " Sg Sh\$SZSi9Gr " Sj Sk\#5      Gr S         SSl jjGr\" SmSn9 S         SSo jj5       Gr " Sp Sq5      Gr " Sr Ss\5      Gr " St SuG\5      Gr S           SSv jjGr      SSw jGr	 SSxSxSxSy.                   SSz jjjGr
SS{ jGr        SS| jGr S       SS} jjGrG\S4         SS~ jjGr\" S 5      Gr                  SS jGrSS jGrSS jGr        SS jGrSS jGr    SS jGr\" S_S9 " S S5      5       Gr    SS jGrG\SZ4               SS jjGrG\4         SS jjGr      SS jGrG\SSSZS4               SS jjGr      SS jGr\SS.             SS jjGr\SS.             SS jjGrSS jGr        SS jGr         SS jGr!SS jGr" SSS.         SS jjjGr# S SS jjGr$g)    )annotationsN)ABCabstractmethod)defaultdict)AbstractContextManager)	dataclass)currentframe)count)
attrgetter)AnyTYPE_CHECKINGTypeVar)Neveroverride	ParamSpecProtocol	TypedDictUnpack)mock)#min_cut_rematerialization_partition)fx)enable_python_dispatcher)compiled_autogradconfigloggingutils)common)get_interface_for_device)wrap_compiler_debug)chromium_event_timedCompileEventLoggercountersdetect_fake_modedynamo_timedflatten_graph_inputsget_inputs_devicesget_metrics_context	GmWrapperlazy_format_graph_codeset_feature_use)aot_autogradr   )!unwrap_tensor_subclass_parameters)aot_export_moduleGraphOutputNamemake_boxed_funcSerializableAOTDispatchCompiler)	code_hashFxGraphCacheoutput_code_log)BoxedDeviceIndexcudagraphs_logformat_default_skip_message#log_cudagraph_skip_and_bump_counterPlaceholderInfo)CustomPartitionerFn)"create_mapping_pre_post_grad_nodessave_args_for_compile_fx_inner)CompiledAOTICompiledFxGraphCompiledFxGraphConstantsWithGmget_expanded_dimsindex_expanded_dims
OutputCode)	cache_dir)
	BoxedBoolcount_tangentsfresh_cacheget_all_devicesget_static_bw_input_idxs	InputTypeis_gpushould_assume_input_aligned should_use_remote_fx_graph_cachetensor_is_aligned)FakeScriptObject)is_opaque_type)trace_structured)compile_time_strobelight_meta)GraphModule)free_unbacked_symbolsSymExprPrinter)FakeTensorProp)_WaitCounter)
OrderedSet   )ShortenTraceback	SkipFrame)_use_lazy_graph_module)_PyTreeCodeGen)
has_triton   )r   distributed_autotunemetrics)get_wrapper_codegen_for_deviceinit_backend_registration)DebugContext)select_decomp_table)InductorError)joint_graph_passes)post_grad_passesview_to_reshape)pre_grad_passes)GraphLowering)get_device_typeIRNode)complex_memory_overlap)TritonBundler)	align_inputs_from_check_idxsclone_preserve_stridescopy_misaligned_inputs get_cloned_parameter_buffer_name%get_first_incompatible_cudagraph_node#maybe_get_suppress_shape_guards_ctxoutput_noderemove_unaligned_input_idxsshape_env_from_inputs)V)Callable	GeneratorSequence)_StrideExprStr)
OpOverload)Weights)ExternKernelNode_P_Tc                "    [         R                  $ N)dynamo_utilsidentityattrs    T/home/wildlama/miniconda3/lib/python3.13/site-packages/torch/_inductor/compile_fx.pytime_and_logr      s    $$$    c                     g r    )argskwargss     r   log_optimus_to_scubar      s    r   )r   r   )FQNGraphInputNameGraphSignaturec                       \ rS rSrSrSrSrSrg)FxCompileMode   r   r]   rW   r   N)__name__
__module____qualname____firstlineno__NORMAL	SERIALIZE
SUBPROCESS__static_attributes__r   r   r   r   r      s    F IJr   r   c                  4    \ rS rSr% S\S'   S\S'   S\S'   Srg)	FxCompileConfig   r   modebool	use_asyncuse_progressiver   Nr   r   r   r   __annotations__r   r   r   r   r   r      s    
Or   r   c                    Sn [         R                  R                  U 5      nUc  [        [        R
                  SS5      $ SnSnUR                  5       R                  S5      (       a  SnUSS  nUR                  5       R                  S5      (       a  SnUSS  n UR                  5       n[        [        U   X#5      $ ! [         a    SS K
nUR                  " [        5      nUR                  S	UU S
R                  [        S [        R                    5       5      5      5        [         R                  R#                  U 5        [        [        R
                  SS5      s $ f = f)NTORCHINDUCTOR_FX_COMPILE_MODEFzprogressive+T   zasync+   r   z>Invalid value of %s for %s. Expected one of %s. Using default.z, c              3  8   #    U  H  n[        U5      v   M     g 7fr   )repr.0xs     r   	<genexpr>+_fx_compile_mode_default.<locals>.<genexpr>   s     H.GT!WW.G   )osenvirongetr   r   r   lower
startswithupperKeyErrorr   	getLoggerr   errorjoinsorted__members__pop)namevaluer   r   r   logs         r   _fx_compile_mode_defaultr      s+   *DJJNN4 E}}33UEBBIO{{}//bc
{{}))	ab	C}U3YPP C)		LIIfHm.G.GHHI		
 	

t}33UEBBCs   "B> >BE E c                     SS0/$ )Nmax_autotuneTr   r   r   r   _get_progression_configsr      s     
 r   
perf_hintspre_grad_graphspost_grad_graphscudagraph_static_inputsinductor_metricsc                    [         R                  R                  R                  5       n[	        [        U 5      5      nU(       a  UR                  (       d  U$ UR                  R                  $ r   )torch_guardsTracingContexttry_getlistrangefw_metadatastatic_input_indices)	num_fixedcontextfixeds      r   get_static_input_idxsr      sM    
 mm**224Gy!"E'--333r   c                B   U R                   R                  SS9S   n/ n[        UR                  S   [        R
                  R                  5      (       d  UR                  S   nOUR                  nU H  n[        U[        R
                  R                  5      (       a^  UR                  R                  S5      =nb@  [        U[        R                  5      (       a!  UR                  UR                  5       5        M  UR                  S 5        M     X!R                  S'   g )Noutputopr   valoriginal_output_strides)graph
find_nodes
isinstancer   r   r   Nodemetar   Tensorappendstride)gmrt   output_stridesoutput_node_argsr   r   s         r   record_original_output_stridesr     s    ((%%%215KNk&&q)588==99&++A.&++"vuxx}}--..;3--!!#**,/ !!$' # 3A./r   c                    U R                   R                  S[        R                  R                  R
                  S9 H0  n[        XR                  S   R                  5      n[        U5        M2     [        U 5        g )Ncall_functionr   targetr   )r   r   r   opshigher_orderinvoke_subgraphgetattrr   r   )_recursive_record_original_output_stridesr   )r   nodesubgraphs      r   r   r     sa    ##599#9#9#I#I $  2yy|2231(;	 #2&r   c           	     "   U R                   R                  S[        R                  R                  R
                  S9 H  n[        XR                  S   R                  5      nUR                   R                  SS9 H}  n[        [        UR                  S   5      5       Vs/ s H@  n[        UR                  S   U   [        R                  R                  5      (       d  M>  UPMB     snUR                  S'   M     [        U5        M     g s  snf )Nr   r   r   r   r   user_visible_output_idxs)r   r   r   r   r   r   r   r   r   r   lenr   r   r   r   *_recursive_record_user_visible_output_idxs)r   r   r   idxs       r   r   r   $  s    ##599#9#9#I#I $  2yy|223NN---:D !TYYq\!2353Cdiil3/? 35DII01 ; 	38<5s   !=D"Dc                 6    [         R                  " [        5      $ r   )dynamo_loggingget_step_loggerr   r   r   r   _step_loggerr   4  s    ))#..r   c                 $   [         R                  R                  5       (       am  [         R                  R                  R                  R
                  S:w  a:  [         R                  R                  5       S:  a  [        R                  " S5        g g g g )Ntf32)   r   zTensorFloat32 tensor cores for float32 matrix multiplication available but not enabled. Consider setting `torch.set_float32_matmul_precision('high')` for better performance.)	r   cudais_availablebackendsmatmulfp32_precisionget_device_capabilitywarningswarnr   r   r   _warn_tf32_disabledr	  9  sl     	

!!NN&&55?JJ,,.&8d	
 9 @ 	"r   c           
       ^ [        U R                  SS9 VVs/ s H  u  p#UPM	     snn5      mTR                  [        U R                  SS9 VVs/ s H  u  p#UPM	     snn5      5        SU4S jjnUR                  R
                   GHr  nUR                  S:X  d  M  UR                  nUR                  S5      (       d  UR                  S5      (       d  MP  [        X5      (       d  Mb  [        U5      " U5      n[        U5      " U 5      n[        U[        5      (       a1  [        U[        5      (       a  UR                  UR                  L a  M  ORUR                  UR                  :X  a8  UR                  UR                  :X  a  [         R"                  " Xx5      (       a  GM  UR                  S5      (       a  SOSn	U" UR                  U	5      n
U	 U
 3nXl        [%        XU5        TR'                  U5        GMu     gs  snnf s  snnf )	a  
In aot_export_module (make_fx), we create get_attr nodes with name prefix
"_tensor_constant" and "_torchbind_obj". See Tracer.create_arg() in
torch/fx/_symbolic_trace.py

However, this might result in name collision if the original mod already
has a different buffer with the same name.

We resolve this potential name collision here by changing the target name
with a new number post fix.
Fremove_duplicatec                r  > SnU R                    H  nUR                  S:X  d  M  UR                  R                  U5      (       d  M7  [	        UR                  5      [	        U5      :  d  M[  UR                  R                  U5      S   nUR                  5       (       d  M  [        U[        U5      5      nM     T Hu  nUR                  U5      (       d  M  [	        U5      [	        U5      :  d  M5  UR                  U5      S   nUR                  5       (       d  M`  [        U[        U5      5      nMw     US-   $ )Nr   get_attrr]   )	nodesr   r   r   r   splitisdigitmaxint)r   prefixir   post_fixkeyexisting_keyss         r   find_smallest_i0_resolve_name_collision.<locals>.find_smallest_iZ  s    KKDww*$)?)?)G)Gt{{#c&k1#{{008<H''))3x=1   !C~~f%%s8c&k)"yy04H''))3x=1 ! 1ur   r  _tensor_constant_torchbind_objN)r   zfx.Graphr  strreturnr  )rV   named_parametersupdatenamed_buffersr   r  r   r   r   hasattrr   r   rM   real_objdevicedtyper   equalsetattradd)modr   r   r   r  r   target_name	gm_targetmodel_targetr  new_idnew_target_namer  s               @r   _resolve_name_collisionr0  F  s    "33U3KLK)$KLM #*;*;U*;*ST*SYTD*STU  77j ++K))" !,,-=>>3,,";/3I%k237L)%566|-=>>!**l.C.CC  L$7$77OO|'9'99KK	88  ))*<== #% 
 %RXXv6F!'1O)KB3o.I - 	M Us   H
Hc                   SSK JnJn  [        X5        0 nU R	                  SS9 H  u  pgXuU'   U" UUUUR
                  S9  M     U R                  SS9 H  u  phXU'   U" UUUUR                  S9  M     UR                  R                  SS9n	/ n
U	 H  nUR                  nXR                  ;   a"  UR                  U   nU
R                  U5        M@  XR                  ;   aE  UR                  U   nU
R                  U5        [        X^   5      UR                  [!        U5      '   M  XR"                  ;   d   eU
R                  S 5        M     SSKJn  [)        UR                  R+                  5       R,                  S   5      n/ nUR.                  nUR0                  nUR2                  n[5        U5       Hl  u  nnS nU[7        U5      [7        U5      -   [7        U5      -   :  a,  [9        UR                  5      nUU;   a  UU   nOUU;   a  UU   nUR                  U5        Mn     U" UU
U[:        R<                  " 5       S 5      n[?        URA                  5       5      UR                  S	'   U$ )
Nr   )_assign_attr	_AttrKindFr  )	attr_kindplaceholderr   )_unliftmutated_named_buffers)!torch.export.unflattenr2  r3  r0  r   	PARAMETERr"  BUFFERr   r   r   inputs_to_parametersr   inputs_to_buffersro   r   rq   user_inputstorch.export._unliftr6  tuplert   r   buffers_to_mutateuser_inputs_to_mutateoutput_tokens	enumerater   r.   pytreetreespec_leafrV   values)r*  r   graph_signaturer2  r3  
state_dictr   parambufferplaceholder_nodeslifted_inputsr   	node_nameparameter_namebuffer_namer6  outputsmutated_outputsbuffer_mutationsuser_input_mutationsrB  r   outr   unlifted_gms                            r   _unlift_graphrV    sq    ?C$IKJ++U+C 4))		
 D ))5)A!4&&		
 B ++}+=&(M "II	<<<,AA)LN  0;;;);;IFK  -&z'>? GG4[AB  ; ;;;;  & " -).rxx/C/C/E/J/J1/M)NGO&88*@@#11Mg&S-1%&-A)BBSEWWW"388,D''(.--,T2u% ' 
K 1;;K;R;R;T0UK,-r   Fc              #  &  #    [        S U R                  R                  SS9 5       5      n[        5       nU R                  5        HH  u  pEXB;   d  M  [	        U[
        R                  R                  5      (       d  M7  UR                  U5        MJ     U(       ak  U R                  R                  S[
        R                  R                  R                  S9 H+  nUR                  UR                  S   R                  5        M-     U S h  vN   g  N7f)Nc              3  8   #    U  H  oR                   v   M     g 7fr   )r   r   s     r   r   &_get_subgraph_names.<locals>.<genexpr>  s      5<Q<r   r  r   r   r   r   )rV   r   r   named_childrenr   r   r   rQ   r)  r   r   r   discardr   r   )r   skip_invoke_subgraphall_subgraph_namesfx_subgraph_names
child_namechild_moduler   s          r   _get_subgraph_namesra    s      +5 5((---<5 + *4$&$5$5$7 
 +
%((..1
 1
 !!*- %8 HH''uyy'='='M'M ( 
D %%diil&9&9:

 !  s   AD'D=BD	D
Dc                P   [        SSSS9   [        R                  (       d  U sS S S 5        $ [        R                  n[        R                  n[        U 5       H&  n[        X5      n[        US5      n[        XU5        M(     [        XX#5      sS S S 5        $ ! , (       d  f       g = f)N_recursive_pre_grad_passesTpre_grad_pass_time_uslog_pt2_compile_eventdynamo_compile_column_usr   )
r$   r   use_pre_grad_passesadd_pre_grad_passesremove_pre_grad_passesra  r   rc  r(  rh   )r   example_inputs
add_passesremove_passessubgraph_namer   new_subgraphs          r   rc  rc    s     
$"!8

 ))
 
 //
5504Mr1H5hCLB|4	 5
 r:M
 
 
s   BA B
B%c                P  ^ ^^ SU UU4S jjn[        SSSS9   [        R                  (       d  T sS S S 5        $ [        [	        T T5      5      nU H  nU" U5        M     [        T T5      n[	        USS9 H  nXT;  d  M
  U" U5        M     UsS S S 5        $ ! , (       d  f       g = f)	Nc                P   > [        TU 5      n[        UTT5      n[        TX5        g r   )r   _recursive_joint_graph_passesr(  )rn  r   ro  r   input_devicer\  s      r   _run_on_sub_graph_module?_recursive_joint_graph_passes.<locals>._run_on_sub_graph_module  s-    2}-4*L
 	M0r   rr  Tjoint_graph_pass_time_usre  F)r\  )rn  r  r  None)r$   r   use_joint_graph_passesrV   ra  re   )r   r\  rs  rt  old_subgraph_namesrn  out_gms   ```    r   rr  rr    s    
1 1 
'"!;

 ,,
 
 ((;B@T(UV/M$]3 0 $B5 1eTM6(7 U 7
 
 
s   BAB B
B%c                    [        SSSS9   [        R                  (       d
   S S S 5        g [        U 5       H  n[	        X5      n[        X15        M     [        X5        S S S 5        g ! , (       d  f       g = f)N_recursive_post_grad_passesTpost_grad_pass_time_usre  )r$   r   use_post_grad_passesra  r   r|  rf   )r   is_inferencern  r   s       r   r|  r|  9  sh    	%"!9

 **
 
 14Mr1H'? 5 	*
 
 
s   A(3A((
A6Tc                   SSK JnJnJnJnJn  U" XX#5      n	Uc  U	" 5       OSn
[        [        U	R                  R                  5      S   R                  S   5       VVs0 s H  u  pUR                  U_M     nnn/ n/ n0 nU R                  R                   H^  nUR                  U;   a  UR                  U5        M&  UR                  U   U:X  d  M;  UR                  S:w  d  MM  UR                  U5        M`     U HC  nSUR                  -   nU" U UUc  XUR                        OSU5        UUR                     UU'   ME     USSS2    Hb  nUR                  (       a3  UR                   H!  nUR                  U   U:X  a  M   SU S35       e   MG  U R                  R!                  U5        Md     U R#                  5         U	U4$ s  snnf )	a  
This function takes an GraphModule input "gm".
The gm will be split into 2 components,
  1) const_gm, which consists the subgraph of gm that can be constant folded.
  2) gm (being inplace modified,) which returns the graph after constant folding.

If an additional "lifted_constants" argument is passed in, we will assume the gm has
been lifted and run the transformation accordingly.

When a "skip_folding_node_fn" callback is passed, we will skip constant folding on
the nodes for which the callback returns True.

const_output_index is a mapping of corresponding node name from gm to the
output index of const_gm.
Returns (const_gm, const_output_index)
r   )CONST_MODULE_TAGMETA_TAG
MODULE_TAGreplace_node_with_constantrun_and_get_constant_graphNr  r5  _FOLDED_CONST_znode: z user not empty.) torch._inductor.constant_foldingr  r  r  r  r  rC  r?  r   r  r   r   r   r   r   users
erase_node	recompile)r   skip_constructorlifted_constant_namesskip_folding_node_fnr  r  r  r  r  const_gmconst_resultr   r   const_outputsto_erase_nodeto_replace_nodeconst_output_indexr   new_const_namens                       r   split_const_gmr  H  s   ,  *
3H "7!>8:DL #,E(..2F2F,G,K,P,PQR,S"T"T"T   MO99%""4(YYx $44M9Q  &	   )DII5" )0 49956		
 .;499-E>*   dd#::ZZvvh':5VvEU7VV5   HH% $ LLN'''Es   Gc                n   [         R                  R                  n[        UR                  R
                  UR                  R
                  UR                  R
                  UR                  R
                  /5      nU H  nU R                  R                  SUS9 H  n[        UR                  R                  SS 5      [         R                  5      (       d  M>  UR                  S   R                  [         R                   :X  d  Mk  UR                  S   R"                  R$                  S:X  d  M      g   M     g)Nr   r   r   r  TF)r   r   atenrV   mmdefaultaddmmbmmbaddbmmr   r   r   r   r   r   r&  float32r%  type)r   r  tf32_opsr   r   s        r   is_tf32_warning_applicabler    s    99>>DGGOOJJHHLL  		
H HH''?6'JD499==5u||DDIIe$**emm;IIe$++00F: K  r   c                   [        S U  5       5      n[        R                  (       aE  [        R                  (       a0  U(       d)  [        R                  S5        [        R                  " SS9$ [        R                  R                  (       a)  [        R                  S5        [        R                  " SS9$ [        R                  " 5       $ )z
For CPU backend, enable comprehensive padding causes some unit tests
fail due to changing number of generated kernels. Skip for now.
c              3     #    U  HE  n[        U[        R                  5      (       d  M$  [        UR                  R
                  5      v   MG     g 7fr   )r   r   r   rI   r%  r  )r   ts     r   r   6maybe_disable_comprehensive_padding.<locals>.<genexpr>  s3      '5!Au||9Tqxx}}~s
   #A&Az!Skip comprehensive padding on CPUF)comprehensive_paddingz;Skip comprehensive padding for use_runtime_constant_folding)anyr   disable_padding_cpur  perf_hint_loginfopatchaot_inductoruse_runtime_constant_folding
contextlibnullcontext)rk  has_gpus     r   #maybe_disable_comprehensive_paddingr    s      '5 G !!f&B&B7>?||%88				9	9I	
 ||%88%%''r   c                p    U (       d  U(       a  [         R                  " SS9$ [        R                  " 5       $ )z@
graph partition does not support cpp_wrapper and aot_mode yet.
F)graph_partition)r   r  r  r  )cpp_wrapperaot_modes     r   maybe_disable_graph_partitionr    s'     h||E22%%''r   c                   [        5          [        U5      nU(       d4  [        R                  R	                  SS9n[        XS9R                  " U6   O^U(       d  [        R                  " 5       O [        R                  R                  USS5      nU   [        XS9R                  " U6   SSS5        SSS5        U$ ! , (       d  f       N= f! , (       d  f       W$ = f)zq
If we can not detect fake mode from the context of inputs, create one.

The created fake mode will be returned.
Tallow_non_fake_inputs)r   r  N)r   r#   r   _subclassesFakeTensorModerT   	propagater  r  r   r  objectpropagate_dont_convert_inputs)r   rk  force_allow_non_fake_inputs	fake_modectxs        r   fake_tensor_propr    s     
"	#$^4	))88t8TI2.88.I 3 &&(ZZ&&y2I4P 
 r2PP#  
$    
$	#  s$   BCB:(C:
C	C
Cc                    [         R                  " U 5         [         R                  " 5       sS S S 5        $ ! , (       d  f       g = fr   )r   r  get_config_copy)config_patchess    r   get_patched_config_dictr    s(     
n	%%%' 
&	%	%s	   6
Ac               #     #    [         R                  (       a   [        [        5       SS9   S v   S S S 5        g S v   g ! , (       d  f       g = f7f)NF)dirdelete)r   force_disable_cachesrE   rB   r   r   r   with_fresh_cache_if_configr    s4     "" Y[7 87 	 87s   (A=A
AAc                      \ rS rSr% S\S'   S\S'   S\S'   S\S	'   S\S
'   S\S'   S\S'   S\S'   S\S'   S\S'   S\S'   S\S'   Srg)_CompileFxKwargsi  zBoxedBool | None
cudagraphsSequence[int]static_input_idxsr   is_backwardz
int | Nonegraph_idr  r  r  bool | None
layout_optz.Callable[[list[ExternKernelNode]], Any] | Noneextern_node_serializerzBoxedDeviceIndex | Noneboxed_forward_device_index
fx_wrapper,Callable[..., dict[Any, Callable[..., Any]]]get_decomp_fnr   Nr   r   r   r   r  r    sI      $$NJJ 77??r   r  )totalc                  6    \ rS rSr S         SS jjrSrg)_CompileFxCallablei	  Nc                    g r   r   )selfr   rk  compile_region_namer   s        r   __call___CompileFxCallable.__call__
  s     r   r   r   
r   rQ   rk  Sequence[InputType]r  
str | Noner   Unpack[_CompileFxKwargs]r  rA   )r   r   r   r   r  r   r   r   r   r  r  	  sA    
 +/	 , (	
 + 
 r   r  c                   UR                  SS 5        UR                  SS5        UR                  SS5        UR                  SS 5        UR                  SS5        UR                  SS5        UR                  S	S5        UR                  S
S 5        UR                  SS 5        UR                  SS 5        [        R                  " 5        nUR                  [        R
                  R                  R                  5       5        UR                  [        [        R                  5      5        UR                  [        R                  " SSSSSSS95        UR                  [        5       5        UR                  [        5       5        [        R                   " SUS   S9  [#        [$        SS9" U U4SU0UD6sS S S 5        $ ! , (       d  f       g = f)Nr  r  r   r  Fr  r  r  r  r  r  r  compile_fx_innerinductor_compileTcompile_inductor#inductor_cumulative_compile_time_us)
phase_namerf  log_waitcounterwaitcounter_name_overriderg  )r  inductor)compiler_namer  )
setdefaultr  	ExitStackenter_contextr   r   _python_dispatch_disable_current_modesrZ   dynamo_configuse_lazy_graph_moduler   r$   r  rb   r!   pt2_compiler   _compile_fx_inner)r   rk  r  r   stacks        r   r  r    s    lD)
)2.
mU+
j$'
mU+
lE*
ne,
2D9
lD)
.5 
			5EKK88OOQR2=3V3VWX%%"-&* $*<)N		
 	689LN+&&}-	
 ##4JO
 !4
 	
' 
 		s   
C)F==
Gzcompilation time (in seconds)r   c                  ^,^- [         R                  nSSKJn  U" 5       (       a&  SSKJn  UR                  5       nUR                  5         [        R                  R                  R                  R                  5         [        R                  " U R                  5      S:X  a  U(       d  [        R                  R                   R"                  (       d|  SSKJn  SSKJn	  U	R-                  U 5        [        R.                  R0                  R3                  5       n
[4        R6                  " SSU
0UR8                  S9  [;        U R<                  5      $ UR?                  S	S
5      n[@        RC                  SU5        [E        X5      n[G        [I        [K        [M        U R                  RN                  5      5      5      RP                  S   [R        [T        45      (       d   SU R                   35       eURW                  S5      c&  [Y        [         RZ                  R\                  5      US'   [         R^                  (       a  [a        U U4SU0UD6  [b        Rb                  " 5       n[e        5       n[g        5         [i        S S [k        U 5       5        5       5      n[m        SSSS9   [         Rn                  (       + =(       a`    [         Rp                  =(       d    U=(       a@    U(       + =(       a2    U=(       a)    [        R                  R                   R"                  (       + n[         Rp                  nUn[s        SU5        [t        RC                  SUUUU[         Rn                  5        [w        U5       H\  u  nn[G        U[        Rx                  5      (       d  M'  [{        UR|                  R~                  5      (       d  MM  UU;   d  MU  SUl@        M^     SnSnSm,Sn[        U 5      n[b        R                  " 5       nU(       a  [        R                  " XX<U5      u  nm,Ubz  Uu  nn[t        RC                  SU5        U(       a*  [        R                  " 5       n[t        RC                  S5        [        R                  " UUUUUURW                  SS5      US9u  nm,O[t        RC                  S5        [        R                  R                   R"                  (       av  Ub   eT,b   e[        R                  " 5          [        U UU4SU0UD6nUc   e[        R                  " 5       u  nnUR                  U5         [        R                  " 5         GOT,b	  T,S   S:X  aC  Ub   e[t        RC                  ST,b  T,RW                  S S!5      OS"5         [        U UU4SU0UD6nGORT,S   S#:X  Ga
  Ub   eUc   e[t        RC                  S$5        [        R                  " 5          [        U UU4SU0UD6nUc   e[b        R                  " 5       U-
  UlT        Uu  nnUUlU        UUlV        [        R                  " 5       u  nnUR                  U5         [        R                  " 5         Ub  [        U5      T,S%'   UR                  T,S&'   [t        RC                  S'U5        [        R                  " UUUUU5        O>T,S   S(:X  d   eUc   eUc   eUu  nn[t        RC                  S)U5        UUlU        UUlV        Uc   eUn [G        U [        5      (       a  UU lZ        T,b  T,S   OS*m-[4        R                  " S+T- 3T,=(       d    0 US,9  [4        R                  " S-T-UT,(       a  T,RW                  S.5      OST,(       a  T,RW                  S/5      OST,(       a  T,RW                  S 5      OS0UUS19  T,b  [        S2U-4S3 jU,4S4 jS59  U R                  UUU5        [         R                  n!U!b  U!R                  U 5      n SSS5        [t        RC                  S6[b        Rb                  " 5       U-
  5        [t        R                  [        R                  5      (       Ga  / n"[        S7   R                  5        GH	  u  nn#UR                  S85      n$[        U$5      S9:  a  U"R                  US:S;S;S;U#/5        M@  [        U$5      S<:  a  S8R                  U$SS= 5      OS8R                  U$SS> 5      n%U%R                  S?5      n&U&(       aF  [        U$5      S<:  a7  U$S=S u  n'n(n)n*S8R                  U$SS= 5      n%U"R                  U%U'U(U)U*U#/5        M  U$S>S u  n(n)n*S8R                  U$SS> 5      n%U"R                  U%S:U(U)U*U#/5        GM     [t        R                  S@5        [t        R                  SAR                  SBSCSDSESFSG5      5        [t        R                  SH5        U" H:  n+[t        R                  SAR                  " U+6 5        [t        R                  SH5        M<     [        R                  R                  R                  R                  5         [        5       " [        R                  SIUS   (       a  SJOSK SLUSM    35        W $ ! [        [        4 a    e [         a3  n[        U[        5       5      R                  UR                  5      SeSnAff = f! [        R                  " 5         f = f! [         a3  n[        U[        5       5      R                  UR                  5      SeSnAff = f! [        [        4 a    e [         a3  n[        U[        5       5      R                  UR                  5      SeSnAff = f! [        R                  " 5         f = f! , (       d  f       GN= f)Nz
Inductor API that compiles a single graph.

If you change the argument list for this function, make sure you
also update the call to save_args_for_compile_fx_inner below accordingly.
r   )use_pipelined_autotuning)AutotuneProcessPool)CompileEventLogLevel)_LazyGraphModulezbackward no-op
compile_id)metadata	log_levelr  r   z&static input idxs compile_fx_inner: %szGinductor can only compile FX graphs which return a tuple/list, but got r  Nr  c              3  D   #    U  H  nUc  M  UR                   v   M     g 7fr   )supports_caching)r   backends     r   r   $_compile_fx_inner.<locals>.<genexpr>  s(      	#
G  	!  
s     c              3     #    U  H8  n[        UR                  [        R                  [        R                  5      v   M:     g 7fr   )r`   r  r   r  r  r   r%  s     r   r   r    s<      
 . +V//1B1B  .s   A Afx_codegen_and_compileT)rf  r  fx_cachezXFX cache status: use_cache=%s, local=%s, remote=%s, aot_mode=%s, force_disable_caches=%szFX cache key generated: %szUsing remote FX cacher  F)r  	constantszFailed to generate FX cache keycache_statebypasszFX cache bypass reason: %scache_bypass_reasonunknownz*FX cache disabled or key generation failedmissz,FX cache miss, compiling and saving to cachetriton_bundler_metatime_taken_nsz.Saving compiled graph to FX cache with key: %shitzFX cache hit with key: %sdisabledfx_graph_cache_)r   time_nsr  r  
componentszcache not enabled)r  cache_event_timer  r  r  remote_cache_enabledlocal_cache_enabledartifactc                    > ST  3SS.$ )Nr  jsonr   encodingr   )r  s   r   <lambda>#_compile_fx_inner.<locals>.<lambda>t  s    -k]; &%r   c                 0   > [         R                  " T 5      $ r   )r  dumps)
cache_infos   r   r  r   x  s    4::j#9r   metadata_fn
payload_fnz%FX codegen and compilation took %.3fsaten_mm_info_   -?   )r  r  z$Overview info of inductor aten mms: z3{:<30} | {:<20} | {:<20} | {:<20} | {:<20} | {:<20}NameBMNKCountz----------------------------------------------------------------------------------------------------------------------------------ztorchinductor done compiling 	BACKWARDSFORWARDS graph r  )nrw   aot_compilation torch._inductor.autotune_processr  r  get_instancewarm_upr   	_inductorasync_compileCompiledTritonKernelscache_clearr   count_callsr   
_functorchr   bundled_autograd_cachetorch._dynamo.utilsr  torch.fx._lazy_graph_moduler  force_recompiler   CompileContextcurrent_compile_idr!   log_instant_eventPT2_COMPILEr/   forwardr  static_inputs_logdebugget_input_idxs_to_checkr   nextiterreversedr  r   r?  r   r   rC   tritonr  	save_argsr;   timerK   ra   allrF   r$   r  fx_graph_cacher*   r   rC  r   rI   r%  r  _is_inductor_staticr>   r  r2   prepare_keyget_remote_cacheload_with_keyrm   begin_compiler  collectset_triton_bundlerX   rY   	Exceptionrd   r	   with_traceback__traceback__end_compile_time_taken_ns_fx_graph_cache_key_fx_graph_cache_debug_linesr  _save_graphr=   r  instanttry_add_pt2_compilerO   post_compilecudagraph_policywrap_outputisEnabledForr   INFOr"   itemsr  r   r   r   endswithr  formatr   ).r   rk  r  graph_kwargsr  r  r  pool_instancer  r  r  r  inputs_to_checkstartfx_graph_remote_cachebackends_support_caching	use_cachelocalremoter  inputmb_compiled_graphkey_inforemote_cacher
  
start_timer  debug_linestriton_bundler  e	cache_keycompiled_graphpolicymm_table_datar   partsr   
is_batchedbatchmr  krowr#  r  s.                                               @@r   r  r  C  s
    &&HI!!H+88: 
OO!!77CCE 	  *a/  ''>> 	=@((,]]11DDF
,,"J/*66	
 rzz**'3'>'>?RTV'WDFWX-nPOd4 89:??BUDMRR 
QRTRZRZQ[\R %-%.v}}/G/G%H\"&	
 !4	
 		
 IIKE<> " 	#
 *"-	
	# 	  
 d
 +++ C&&?*?CC )C $$++BBB 	 %%&
I.		f''	
 ".1HAu5%,,//5<<,,--**,0) 2 04
226	 \\^
%1%=%=L6&"Xz
 ##+ [		6<#/#@#@#BLII560<0J0J"  , 0 0 F'1-!: 		;<""99$,,,%%%
 ''),$:"#% )<	%
 #%! )444 "))+!'!33MB ))+
 :m#<#H$,,,II, "- NN#8)DE$:"#% )<	%
 #%! &&0$,,,'''IIDE''),$:"#% )<	%
 #%! )44437<<>J3N!0)1&	;8A!5@K!= "))+!'!33MB ))+".478K4L
01*;*J*JJ'IIF	R$$! m,555$000''''/$YII19=4=1<G9 ,,,*no661DN. *4)?J}%Z 	 	""k]+%2	
 	..#')3
u%7Az~~l3t  45(!' %	
  ! : 	##NI|L((#//?NO
R II5tyy{U7JK %%">288:JCIIcNE5zA~$$c3S#u%EF ,/u:?388E#2J'sPR@TD'9:Jc%jAo!&rsq!Qxxcr
+$$dE1aE%BC  *1axxcr
+$$dCAq%%@A) ;, 	78AHHS#sG	

 	 CHHJQQSVWXHHY ! 
OO!!77CCEN'&}5;:
F Gj)*	, C %i0  #A|~6EEOO
 ))+.  #A|~6EEOO: %i0  #A|~6EEOO
 ))+u
 
s   C1m9"m98m9 D/m90>i%/Am9kA m9A.lF#m9%j2?.j--j22j55km9
l.llm9m).mmmm66m99
nc                  .    \ rS rSr% SrS\S'   SS jrSrg)	_FxCompileStati  r   r  codegen_and_compilec                     SU R                    3$ )Nzcodegen_and_compile: )r  )r  s    r   __repr___FxCompileStat.__repr__  s    &t'?'?&@AAr   r   N)r  r  )r   r   r   r   r  r   r  r   r   r   r   r  r    s      Br   r  c                  ~    \ rS rSr% Sr\" \5      rS\S'   Sr	S\S'   \
          SS j5       r\SS	 j5       rS
rg)	FxCompilei  zU
An FxCompile represents a mechanism that can turn a GraphModule into an
OutputCode.
z%dict[type[FxCompile], _FxCompileStat]_compile_statsNr  r  c                    g r   r   )r  r   rk  rq  ro  s        r   r  FxCompile.codegen_and_compile  s     r   c                8    U R                   R                  5         g r   )r  clear)clss    r   _reset_statsFxCompile._reset_stats  s      "r   r   
r   rQ   rk  r  rq  r  ro  r  r  rA   r  rw  )r   r   r   r   __doc__r   r  r  r   r  r   r  classmethodr  r   r   r   r   r  r    s}     =H<WN9W&**
  , '	
 ' 
  # #r   r  c                  <    \ rS rSr\          SS j5       rSrg)_InProcessFxCompilei  c                8#  ^^4^5^6^7^8 SU;   a  US   c   eUS   nUR                  SS5      nUR                  SS5      nUR                  SS5      nUR                  SS5      n	UR                  S	S5      n
[        R                  nUR                  S
S5      nUR                  SS5      nUR                  S[        5      n[	        S5      R                  5          [        R                  " 5          [        R                  =nb,  SSK
n[        R                  SU5        UR                  " U5        [        T5      (       a
  [        5         [         S   R#                  5       n[$        R&                  " [)        [$        R*                  " 5       S5      5        [-        5       " [.        R0                  SU(       a  SOS SU 35        [2        R4                  " 5       n[6        R8                  R:                  R<                  R?                  UTUSSS9  URA                  5       m8[C        SS U84S jS9  [        RD                  RG                  TU5        TRH                  nUc  [K        U5      n[M        T5        [O        SSS9   [6        RP                  " 5          [S        TU5      nSSS5        SSS5        [U        T5        [C        SS U4S jS9  [        RV                  " W5         [Y        T5      nU   [[        TUS 9  SSS5        [        RD                  R]                  TU5        [^        RE                  S![a        S"TSSSS#95        TRc                  SSSSS$9m5[C        SS% U54S& jS9  [        Rd                  Rf                  S:w  a~  [6        Rh                  Rj                  Rm                  TRn                  5      n[q        [6        Rr                  RD                  Rt                  U5      [6        Rr                  RD                  l;        [y        5       nUR{                  5       (       a,  [         S'   R}                  5       n[~        R                  " SUS(9  [        R                  " 5       (       a   [        S)[        [        5       5      0S*9  SSS5        [        RV                  " U5         [        U5         [        X5         SnSnSnSnU(       a  [        R                  R                  (       a  [        TS, S-9u  nn[        U/ UUU	UUUUSU
US.9n[        R                  " U5         [        R                  " / 5         U	(       d   S/5       eUR                  5         UR                  5       u  nnSSS5        SSS5        [        T4UUUU	UUUUUU(       a  UR                  OSU(       a  UR                  OSUUU
US0.6n[        R                  " 5       nUR                  5         [        R                  " U5         [        R                  " / 5         [        R                  " 5          UR                  " U6   / n UR                  b  [        5       m7UR                   H  n![        U![        5      (       ay  U!R                  5       (       ad  [        [        U!R                  5       5      5      S:X  a>  U R                  [        U74S1 jU!R                  5       R                   5       5      5        M  U R                  S5        M     [        U5        Sn"[O        S2SS9   UR                  (       ag  UR                  (       aV  UR                  (       a   eUR                  5       S   R                  n#[        RD                  " S3U#Rc                  SS495        GOUR                  (       Ga`  S5S6KiJjn$  UR                  (       d   S/5       eUR                  5       u  n%n&[        RD                  " S7U%R                  5        U&R                  (       a!  [        RD                  " S8U&R                  5        Sn'[        R                  (       a6  UR                  [        R                  5      n'[        RD                  " S9U'5        [O        S:SS9   U$R                  UU%R                  U&R                  U'UR                  / [        R                  UR                  R                  U(       a  UR                  R                  O/ -   5      QS;9n#SSS5        O)UR                  5       n(U(R                  n#[        U(S<S5      n"SSS5        Sm6Sm4[        Rd                  Rf                  S:w  a  [        R                  " [6        Rr                  RD                  R                  5       5      m6[        R                  " [6        Rr                  RD                  R                  5      m4[C        SS= U64S> jS9  [C        SS? U44S@ jS9  T4(       a1  [y        5       nUR{                  5       (       a  UR                  SAT45        Sn)[        R                  [.        R0                  5      (       a{  UR                  5       u  n*n+n)[        =R                  U*-  sl~        [        =R                  U)-  sl        [        =GR                   U+-  sl        [        GR                  SBU*U+U)SC.5        [        GR                  (       a>  UR                  5       u    n,n)[6        Rr                  RD                  GR                  U)5        [6        Rr                  RD                  GR	                  UGR
                  GR                  5        U(       Gaz  [        GR                  GR                  (       GaX  [        GR                  (       GdA  [        Rn                  GR                  (       Gd   [6        Rr                  GR                  GR                  " U6 (       a  Sn-TRn                  GR                   H  n.U.GR                  R                  SDS5      n/U.GR                  SE:X  dP  [        U/[6        GR                  5      (       a0  [6        Rr                  GR                  GR                  U/5      (       d  M  U.GR                  R                  SFS5      =n-(       d  M    O   SGn0U-(       a	  U0 SHU- SI3n0OU0 SI3n0U0[        Rn                  l        U(       a  [        GR                  (       d  [        Rn                  GR                  (       dd  G[!        T5      n1U1(       aQ  SJU1GR"                   3n0U1GR                  R                  SFS5      =n-(       a  U0 SHU- SI3n0U0[        Rn                  l        [        R                  (       a  [        W#[        G[$        [6        Rh                  GR&                  45      (       d   G[)        U#5      5       eG[+        U#UR                  SK9sSSS5        sSSS5        sSSS5        sSSS5        sSSS5        sSSS5        sSSS5        sSSS5        $ U(       aZ  [        Rn                  GR                  (       d:  SSLKJn2  U2" [        Rn                  GR0                  5      [        Rn                  l        U GR2                  G[)        U 5         =GR4                  S5-  sl        [6        Rr                  RD                  GR6                  (       a  [6        Rr                  RD                  GR8                  bp  [        [6        GR:                  GR<                  GR?                  5       5      n3UR                  S5      nUb(  U3[6        Rr                  RD                  GR8                  U'   G[A        W#UTU [        Rn                  GR                  UGRC                  5       [         S   U-
  UUUU GRD                  UUT8T5U"T6T45      sSSS5        sSSS5        sSSS5        sSSS5        sSSS5        sSSS5        sSSS5        sSSS5        $ ! , (       d  f       GN= f! , (       d  f       GN= f! , (       d  f       GNT= f! [         a    [        R                  S+5         GNf = f! , (       d  f       GN= f! , (       d  f       GN/= f! , (       d  f       GN9= f! , (       d  f       GN= f! , (       d  f       GN= f! , (       d  f       O= f SSS5        O! , (       d  f       O= fSSS5        O! , (       d  f       O= fSSS5        O! , (       d  f       O= fSSS5        O! , (       d  f       O= fSSS5        O! , (       d  f       O= fSSS5        O! , (       d  f       O= fSSS5        g! , (       d  f       g= f)MzC
Generates the OutputCode from the GraphModule and example_inputs.
r  Nr  r   r  Fr  r  r  r  r  r  z/pytorch.wait_counter.actual_codegen_and_compiler   z3Sleeping for %s since sleep_sec_TESTING_ONLY is setr  i  ztorchinductor compiling r5  r6  r7  )save_dirr  c                     SSS.$ )Nfx_graph_runnablestringr  r   r   r   r   r  9_InProcessFxCompile.codegen_and_compile.<locals>.<lambda>  s    / (%r   c                    > T $ r   r   )runnable_graph_strs   r   r  r    s    #5r   r$  additional_fake_tensor_propTrf  c                     SSS.$ )Nbefore_post_grad_graphr  r  r   r   r   r   r  r  M  s    4 (%r   c                 &   > T R                  SSSS9$ NFTprint_outputinclude_strideinclude_deviceprint_readabler   s   r   r  r  Q  s    2#4#4!&tD $5 $r   r  %szAFTER POST GRADr  r  colored)r  r  r  fast_sympy_printc                     SSS.$ )Ninductor_post_grad_graphr  r  r   r   r   r   r  r  s  s     :$,)r   c                    > T $ r   r   )inductor_post_grad_graph_strs   r   r  r  w  s    'Cr   graph_break)	overwritenum_graph_breakspt2_configs)extra_loggingzfailed to log pt2_configsc                   U R                   S:H  =(       am    [        U R                  [        5      =(       aL    U R                  R	                  S5      =(       d*    [        U R
                  R                  SS 5      [        5      $ )Nr  r  r   )r   r   r   r  r   r   r   rM   )r   s    r   r  r    s_    $''Z:O ;&t{{C8; !KK223CD X)$))--t*DFVW	;r   )r  )rk  	shape_envr  r  r  r  r  r  is_const_graphr  r  z"AOT mode only supports C++ wrapper)rk  r  r  r  r  r  r  r  r  const_wrapper_codeconst_kernel_codeconst_modulerq  r  r  c              3  F   >#    U  H  nTR                  U5      v   M     g 7fr   )doprint)r   sps     r   r   :_InProcessFxCompile.codegen_and_compile.<locals>.<genexpr>  s     )X@W1!))A,,@Ws   !zGraphLowering.compile_to_fnzOutput graph module: 
%s)r  r]   )AotCodeCompilerzOutput wrapper code: 
%szOutput kernel code:
%sz#Serialized Extern Kernel Nodes: 
%szAotCodeCompiler.compile)device_typeadditional_filesrunnerc                     SSS.$ )N*inductor_provenance_tracking_node_mappingsr  r  r   r   r   r   r  r  O  s    (T,21r   c                    > T $ r   r   )r  s   r   r  r  S  s    /Yr   c                     SSS.$ )N0inductor_provenance_tracking_kernel_stack_tracesr  r  r   r   r   r   r  r  W  s    (Z,21r   c                    > T $ r   r   )inductor_kernel_stack_trace_strs   r   r  r  [  s    /Nr   inductor_provenancezGraph Metrics:
%s)num_bytes_accessednodes_num_elemnode_runtimesr   r5  stack_tracezWgraph with symbolic shapes inputs and config.triton.cudagraph_skip_dynamic_graphs=True.z Found from 
z,disabling cudagraphs due to incompatible op )filenamer  ) check_lowering_disable_cudagraph)r   rw   r8  rc   rU   guardr   preserve_rng_stater   sleep_sec_TESTING_ONLYrS  r   warningsleepr  r	  r"   copysyssetrecursionlimitr  getrecursionlimitr   r   rk  ioStringIOr   _dynamorepro	after_aotsave_graph_reprogetvaluerO   rL  fx_graphr  rv   rg   r$   no_gradr  r   set_fake_modeget_cuda_device_contextr|  fx_graph_transformedpost_grad_graphs_logr)   r  traceprovenance_tracking_levelr   	tracebackget_graph_provenance_jsonr   r:   r<  _pre_grad_graph_id _inductor_post_to_pre_grad_nodesr'   in_progressr  r!   compilation_metric	is_fbcoder   r  r  r]  r  r  r  r  r  ri   set_graph_handlerset_extern_kernel_nodesruncodegen_with_cpp_wrapperr   r_   CachedMetricsHelperfreeze_runtime_assertsr^   graph_contextgraph_outputsrS   r   rk   has_tensor_outputr   rR   
get_strider   r?  
get_layoutr   _check_triton_bf16_supportr  r  r  codegenr   r3   	codecacher  extern_kernel_nodesr  compiler  dictfromkeyswrapper_coder  compile_to_modulecallr   r  r"  dump_inductor_provenance_info_inductor_kernel_stack_trace
add_to_setinductor_metrics_logrj  count_bytesr  r  r  r  log_tlparselog_runtime_and_tensor_metalog_collective_schedule	schedulerr  rQ  cudagraph_skip_dynamic_graphsr  disable_cudagraphs_reasonr   any_is_symbolicr   r   r   rr   r   r   rQ   r  r<   torch._inductor.cudagraph_utilsr  device_node_mappingr  r  RECORD_GRAPH_EXECUTIONGRAPH_COMPILE_IDSr   rF  rG  r=   
get_deltasr  )9r  r   rk  rq  ro  r  r  r  r  r  r  r  r  r  r  	sleep_secrS  inductor_countersfdr  r  cuda_contextprovenance_tracking_jsonmetrics_contextr  r  const_graphr  r  r  r   metrics_helperr   rT  compiled_fn_runnercompiled_fnr  r  kernel_codeserialized_extern_kernel_nodescompiled_moduler  	num_bytesr  r(  r  r   meta_valdisablemaybe_incompat_noder  r  r  r  r  r  r  s9    `                                                  @@@@@r   r  '_InProcessFxCompile.codegen_and_compile  sq     |+\0J0VVV ,\ :
+7+;+;<OQS+T(,,]EB+//
DA(,,]EB'++L%@
**)--neD5t< 	 GSFVFV0G
 JKQQS++-#:::	GI9 

9%)"--#% ( 4 9 9 ; !!#c&;&;&=t"DEN*"-;:> ?!
$ BMM))::B
T ;  "$ 6 GGR0
 I 1.A	$ B-T ]]_ 0^ DI % 6b9
 	 +6r:!/N ",,R@$***)'+'+ $	 02/@/@!&#'#'%)	 0A 0, !!  D <<99Q>**DDRXXN - ;!OO11DD4 OO))J #6"7"..00'/'>'D'D'F$&99"&9I ##%%	A, -s3J3L/M+o ,D 	*3NC-kD%)""%)"$(! 3 3 P P 4B.40H0 #0 ')"+!)$/!)/E%1$/'+#-&3#K ++K811"5*P,PP{#)'@@B >*,=	 6 9 &
 $2'% +%+A!- +'94F*00D 4E)//$!,$3)"//2 ")!<!<!> ,,.''.--b1(668II~.NPN**6 +,#(#6#6C *3 7 7$'$9$9$;$;$'(=cnn>N(O$PTU$U !/ 5 5$))X@P@W@W)X$X!" !/ 5 5d ; $7 /u5 *.&%5T !>>e.>.>','8'88#8*/--/!*<*?*?K+11 ; + : : : N
 #^^^B#(#4#4  D#4 9>8V8V8X5L++11 ;\=O=O  +00 / 5 5$={?P?P!" >B: 44$)$@$@AVAV$W !? !0 5 5$J$B!"
 ". 9QU" />.E.E$)$0$6$6$/$5$5$B050A0A	6&)-,1,>,>,O,O 4? 1<0H0H0Y0Y57	-.**	6& /F /"	" ", /4.E.E.GO*9*>*>K18 /42.yB BF>6:3||==BEIZZ!OO11OOQFB ;?**!OO11NN;7 )&) (Z )&) (O ;.A.CO.::<< / : :$9$C!"
 %)M+88FFCHCTCTCV@	>=22i?2-->-...@.,1106?2@1> ))).3.?.?.A+1m--II-X OO))AA%//BWBWX #"MMGGG & 6 6 6 ! A A A!OO11AA>R&*$&HHNND'+yy}}UD'AH $= 8'1(ELL'I'I','<'<'L'LX'V'V (.2iimmM4.PP{P % %3 #|&)0	k]"&MG)0	nG<C9
 # & 6 6 6 ! A A A.STV.W+.(TUhUoUoTp&qG.A.F.F.J.J -t/  {   .5I\+b*Q@GAGG= (()' $(<(<=    -  ,	-  
  ,%0e>O>O [ 98 21 /.W ED DC +* .- TSD "!''*K*K*K = ! ; ; 9 ''T
3GG1LG --DDD!OO11CCO%(!MM88KKM&
 $0#3#3J#?#/ !+ "OO11CCHM +#&99&113 ,/@@"&)00$'*4*B7'g 98 21 /.W ED DC +* .- TSX %_ 8 "\r % A $?@Ay ,+P 65 98V" "I ; 988 211 /..W EDD DCC +** .-- TSSss  AF-FAE18A@0A@	A@0#9AE1AA:*AA	5E,AA:"AA>AE1AE(AD=	4A'AD#AB22AB$AB,BAD#/AD	AC/C2ACE&AC4A<AB002AC"M=AC#EAC$	AC/-	AD	6	AD#?	AD=		AE	AE1	AF-F(AC	AC/	AD	'	AD#0	AD=	9	AE@	AE1@	AF@
A@-@(A@0@0
A@?@:AE1A
AAAAA:AAA7	A3AA:A6AA7	A7AA:A:
AB	BAE1B
ABBABB
AB-B(AD#B0
AB?B:ACC
ACCACC
AC"CAC/C&	AD	C/
AC=C9AD	D 	AD#D	
ADDAD#D	AD=	D#
AD1D-AD=	D4	AED=
AEEAEE	AE1E
AE%E!AE1E(	AFE1
AE?	E;AFF
AFr   Nr  )r   r   r   r   r   r  r   r   r   r   r  r    sK    UU ,U '	U
 'U 
U Ur   r  c                f   [         [        R                  :X  a  [        5       nOK[         [        R                  :X  a  SSKJn  U" 5       nO%[         [        R                  :X  a  SSKJ	n  U" 5       n[        (       a=  SSKJn  SSKJn	  [        WU	5      (       d   S5       eU" U5      nUUR                  l        ["        (       aX  SSKJn
  SSKJn	  [        WU	5      (       d   S5       e['        5       n[        5       nX<l        U
" XU5      nUUR(                  l        UWl        UR+                  XX$5      $ )	Nr]   )_DebugSerdeFxCompile)_SubprocessFxCompile)_AsyncFxCompile)_OutOfProcessFxCompilez7async is only valid with an out-of-process compile mode)_ProgressiveFxCompilez=progressive is only valid with an out-of-process compile mode)fx_compile_moder   r   r  r   compile_fx_extr9  r   compile_fx_subprocr:  fx_compile_asynccompile_fx_asyncr;  r<  r   _compiler  fx_compile_progressiver=  r   _optimized_compiler  )r   rk  rq  r  ro  schemer9  r:  r;  r<  r=  progression_configsfast_schemes                r   r  r    s    -...$&	M33	38%'	M44	4<%'5: &"899 	
E	
9 !( 	+ ;: &"899 	
K	
9 78 *+*=' '{<OP 	!!5 "5F %%b/XXr   c                   / n[        U 5       H  u  p4[        U[        R                  5      (       d  M&  [	        UR
                  R                  5      (       d  ML  [        5          X1;   a  [        U5      (       a   SSS5        Mw  [        U5      (       d   SSS5        M   SSS5        UR                  U5        M     U$ ! , (       d  f       N%= f)z
This function runs at compile time, and generates a list of indices for which we
might need to do a copy to preserve alignment requirements.
N)rC  r   r   r   rI   r%  r  rs   rL   rJ   r   )inputsr  ids_to_checkr  rx  s        r   rM  rM  /  s     Lf%%..ell''((02 %*;E*B*B	 32
 /u55 32
 6 3 	A) &,  32s   $C C  
C	r   )r
  placeholdersmutated_input_idxsc                 ^ ^^^ SSK Jn	  [        R                  R                  (       aC  [
        R                  " U	UUUUUUU[        R                  R                  R                  5       S9	mO[        mS mSUUU U4S jjn
U
$ )Nr   )cudagraphify_impl)device_indexstack_tracesr  r  r
  rL  rM  r  c                   > Tc(  [         R                  " 5          T" TU T5      mS S S 5        T" U 5      $ ! , (       d  f       N= fr   )r   r  )
new_inputsr/  cudagraphify_fnmodelr  s    r   r  cudagraphify.<locals>.runt  s=    002-eZARS 3:&& 32s	   5
A)rS  r  r  r   )torch._inductor.cudagraph_treesrO  r   rQ  cudagraph_trees	functoolspartialr   r   rF  rG  )rU  r  rP  rQ  r  r  r
  rL  rM  new_cudagraphify_implr  r/  rT  s   ``         @@r   cudagraphifyr\  R  sr    
 }}$$#++!%%#%%1}}33FFH

 ,K' ' Jr   c                    [         R                  " U R                  5       U R                  5       U R                  U R
                  S9$ )z)
Copy and input while preserving strides
)r&  r%  )r   empty_stridedsizer   r&  r%  )r   s    r   static_inputr`  ~  s/     qvvx177188TTr   c                R    [        X5      n [        X5      nU R                  U5        g)z=Index into expanded dimensions of both dst and src then copy_N)r@   copy_)dstsrcexpanded_dimss      r   index_expanded_dims_and_copy_rf    s#     c
1C
c
1CIIcNr   c                  ^^	^
^^^ [        UT5      n[        [        UT5      5      m[        X5        [	        U[
        5      (       d   e[        U5       VVs/ s H  u  pEUT;  a  [        U5      O/ PM     snnm[        U5       VVs/ s HG  u  pE[	        U[        R                  5      (       d  UO UT;  a  [        U5      OUR                  5       PMI     snnm[        [        UT5      5       H@  u  nu  pV[	        U[        R                  5      (       d  M)  UT;  d  M1  [        TU   XV5        MB     [        R                  R                  5         [        R                  R!                  5       nUR#                  [        R                  R%                  5       5        [        R                  R'                  U5         U " [        T5      5        SSS5        UR                  5         [        R                  R%                  5       R#                  U5        [        R                  R                  5         [        R                  R)                  5       m
[        R                  R+                  T
USS9   U " [        T5      5      mSSS5        [	        T[
        [,        45      (       d  T4m[.        R0                  (       a  SU
UUUU4S jjnO8[3        [5        T5      5       Vs/ s H  oDT;  d  M
  UPM     snm	SU	U
UUU4S jjn[7        X[        5       5      $ s  snnf s  snnf ! , (       d  f       GNY= f! , (       d  f       N= fs  snf )zI
Assumes inputs[static_input_idxs[i]] are always the same memory address
Nthread_local)streamcapture_error_modec                  > [        T5      [        U 5      :X  d   e[        [        TU T5      5       H  u  nu  p#n[        U[        R
                  5      (       d  M*  [        U[        R
                  5      (       d   eUT;   a&  UR                  5       UR                  5       :X  d   eMw  [        X#U5        M     U R                  5         TR                  5         T	$ r   )
r   rC  zipr   r   r   data_ptrrf  r  replay)
rS  r   rc  rd  re  r   inps_expanded_dimsr  static_inputsstatic_outputss
        r   r  cudagraphify_impl.<locals>.run  s    }%Z8882;M:/AB3..c "#u||44!#u||4444++<<>S\\^;;;
 2#MJ3 LLN!!r   c                   > T H<  nTU   nX   n[        U[        R                  5      (       d   e[        TU   X25        M>     U R	                  5         TR                  5         T$ r   )r   r   r   rf  r  rn  )	rS  r   re  rd  copy_indicesr   ro  rp  rq  s	       r   r  rr    sa    # 23 7 o!#u||4444-mC.@#U	 $
 LLN!!r   )rS  list[InputType]r   Callable[[list[InputType]], Any])rM  rV   ru   rp   r   r   rC  r?   r   r   r`  detachrl  rf  r  synchronizeStreamwait_streamcurrent_streamri  	CUDAGraphr   r?  r   size_assertsr   r   rn   )rU  rJ  r  check_input_idxsr   r   re  ri  r  rt  r   ro  rp  rq  s     `      @@@@@r   rO  rO    s    /v7HI)3#F,=>* 64fd####  ''FC !$+< <!"D'  '	 (FC a..  ++ a	 (	M $-S9K-L#Maa&&36G+G)-*<aO $N
 
JJZZ F
uzz0023			6	"d=!" 
#
	JJ++F3	JJ JJ  "E			%>		RtM23 
SntUm44(*	" 	", !]!34
4CCT8TC4
		" 		" (z|LL]	* 
#	" 
S	R8
s1   L	AL<L .L2	M+M 
L/2
M c                   [        U [        5      (       d   U 5       e[        U 5        [        R                  " U=(       d    0 5      nUR                  SS5      (       d  [        R                  (       d  SUS'   UR                  S[        R                  R                  5      nU(       a  UR                  S5      (       a   S5       eO0 UES[        U R                  5      0EnSS	KJn  U" U5      nUR                  S
S 5      nU R                   R                  SS 5      n["        R$                  R'                  U5      n[(        R*                  " S5         ["        R$                  R-                  U5         [/        SSSS9   [1        5          [3        U U[4        R6                  " UUS9US9n	[        U	[8        5      (       d   eU	R:                  sS S S 5        sS S S 5        sS S S 5        sS S S 5        $ ! , (       d  f       O= f S S S 5        O! , (       d  f       O= fS S S 5        O! , (       d  f       O= fS S S 5        g ! , (       d  f       g = f)Nr  FTr  zaot_inductor.output_pathz.pt2a
  The output path for aot_compile should not have an extension with .pt2 this is for specifying the output path for the .so in AOTInductor. If you would like to package the AOTInductor generated files into a pt2, please call `torch._inductor.aoti_compile_and_package`.r]   )maybe_aoti_standalone_configr  dynamo_compile_idcompile_fx_aot)rf  reset_event_log_on_exit)r  )inner_compiler  )r   rQ   r,   r  deepcopyr   r   r  r  output_pathrm  r1   coder   r  r   r   r   r   rF  rw   set_aot_compilationcompile_contextr    r'   
compile_fxrY  rZ  r<   r  )
model_example_inputs_r  r  r  r  r  saved_compile_idsaved_compile_contextcompiled_artifactss
             r   r  r    s    fk**2F2* &f- &*]]>3GR%HN|U33v7H7H(,}% $$"F$7$7$C$CK ''// 	
R	
//

&	&++(>

 41.AN+//0H$O{{':DA!MM889IJ	d#%%&;<"&$(	

 	'#++'= *
 ,l;;;;!** 		
 	
 	=< 	$# 		
 	
 	
 	=<< 	$##sa    I 3H&?H
AG1		H	H&	I 1
G?;H	H&
HH&	I &
H4	0I  
Ic                  ^^^^ SSK JnJn	  [        X5      n
[	        U [        [        U
5      5      S9n [        R                  " U SS9nU(       a  [        XS5        U" U 5        U	" UU U5      u  nmT Vs/ s H  oU   PM	     nn[        U5      nUR                  R                  Gt nnUR                  S   n[        U5       VVs/ s H3  u  nn[        U[         R"                  R$                  5      (       d  M1  UPM5     snnUR&                  S'   / n[         R(                  R*                  R-                  5       nS/mSmUGbA  UR.                  c   eUR.                  n[1        S[3        U5      S-
  5      m[4        [6           " 5       nUR8                  nUc   eSn[3        U5      S:  a  / m[;        [3        U5      5       HN  nUT;  a   S UU'   US:  a  UU   UUS-
     :X  a  US-  nOUR=                  UU   5        TR?                  U5        MP     UR@                  c   e[;        [3        UR@                  5      5       H  nUU;  d  M  S UR@                  U'   M     URB                  (       a  URB                  RD                  n[F        RH                  RK                  USS5         U" UUUUUSUUS	9mS S S 5        [L        RN                  (       a  T$ SUUUU4S
 jjnSUl(        U$ s  snf s  snnf ! , (       d  f       ND= f)Nr   )%convert_conv_weights_to_channels_lastfreezers  Tr  r   r]   r  )r  r  r  r  r  r  c           
        > T Vs/ s H  nXT[        UT5         -
     PM     nnU R                  5         T" U5      $ s  snf r   )minr  )r   r  args_newmax_offset_idxoptimized_functionpreserved_arg_indicesunwrapped_args_offsetss      r   wrapper%fw_compiler_freezing.<locals>.wrapper  sU     +
* +C>,BCCD* 	 
 	

!(++
s   >)r   zlist[object]r  zSequence[torch.Tensor]))torch._inductor.freezingr  r  r&   rr  rN  rO  ri   decide_layout_optr  r#   r   r  r   rC  r   r   r   r   r   r   r   r   params_flat_unwrap_subclassesr  r   rV   r  params_unwrapped_to_flat_indexr   r)  r   params_flatr   r   r   r  r  rw   r8  _boxed_call) aot_autograd_modelaot_example_inputsdynamo_modelnum_example_inputsr  r  r  forward_devicer  r  inputs_devicesr  	opt_modelindr  r(  model_outputs_nodemodel_outputsr   r  r  tracing_contextparams_flat_unwrappreserved_indices_params_flatunwrapped_idxscurrent_offsetr  r  r  r  r  r  s                                @@@@r   fw_compiler_freezingr  8  s    W ((:ON6$~./
 001CRVWJ+F-.@A'-($I$ >SS=RcS1=RS !34I '__22Q&++A.M#M2;2QjEHHMM6R2;67 $&mm22::<OSN"<<HHH,JJQ$6 7! ;<(23(9%(GG)))!"Q&%'"s-./A--(,"1%q5^A..Q2GG"a'N-11.2CD")).9 0 **666s?6678A5515++A. 9 && / ; ; P P			9&=t	D*/!'5!	
 
E 	!!, , GNQ T;L 
E	Ds   5K'0K,:K,&K22
L c                    [         R                  R                  (       a)  [         R                  (       a  [	        [        S5      5        [         R                  R                  b  [         R                  R                  O [        5       =(       a    [        R                  n U U (       + [         R                  R                  =(       a1    [        R                  (       + =(       a    [         R                  (       + SS.$ )Nz0cpp-wrapper does not support graph partition yetT)ztriton.autotune_at_compile_timeztriton.autotune_cublasLttriton.cudagraphsztriton.store_cubin)
r   rQ  r  r  r7   r6   autotune_at_compile_timer\   rw   r8  )r  s    r   get_cpp_wrapper_configr    s    }}F$:$:+'B	
 ==11= 	.. \/a//	  ,D(@$@MM$$ +%%%+***"	 	r   c                R   [         R                  R                  5       (       d  [        R                  " 5       $ [        S [        U 5       5       5      n[        U5      S:X  a1  [         R                  R                  [        [        U5      5      5      $ [        R                  " 5       $ )zP
Returns a cuda device context manager if there is a single device in the graph
c              3  H   #    U  H  oR                   S :X  d  M  Uv   M     g7f)r  N)r  r  s     r   r   *get_cuda_device_context.<locals>.<genexpr>  s      806KK64I0s   "	"r]   )r   r  r  r  r  rV   rF   r   r%  rN  rO  )r   cuda_devicess     r   r  r    s     ::""$$%%''-7 8,R08 .L |! 	

$tL123 ##%r   c           
        [        U 5      nU   [        X5      n[        U S[        [	        U5      5      S9n S S S 5        UR                  SS 5      n[        R                  c/  [        R                  " SSS9   [        U U4SUS.UD6sS S S 5        $ [        [        R                  [        5      (       d   e[        R                  " [        R                  R                  R                  SS9   [        R                  " U U4SUS.UD6sS S S 5        $ ! , (       d  f       N= f! , (       d  f       g = f! , (       d  f       g = f)NT)r\  rs  static_lifetime_input_indicesr   r  r  )compilerr  )r  r&   rr  rN  rO  r   r   custom_partitioner_fnr   r$   r   r   r9   	__class__r   )r   joint_inputsr   r)  r  r  s         r   partition_fnr    s>   
 +2.L	 ,L=*!%d>23
 
 7=jj'7! ##+&&1
 7 $.K	 
 
 &668KLLLL&&((22;;"&
 // $.K	
 	
 
; 
 
 

 
s#   )D7D#-D4
D #
D14
Ec                h    [        U 5      n[        R                  " UR                  6 n[	        U5      $ r   )rt   rD  arg_tree_leavesr   r   )rU  r  r  s      r   get_num_model_outputsr    s/    $U+**,>,C,CDM}r   c                    U R                   (       a7  [        R                  R                  (       d  [        R                  " SS05      $ [
        R                  " 5       $ )Nr  T)r   r   rQ  r  r  r  r  )r  s    r   cudagraph_annotation_contextr  	  s=      8 8||0$788!!##r   )frozenc                  L    \ rS rSr% S\S'   S\S'   S\S'   S\S'   S	rS
\S'   Srg	)CompilerConfigExtrai	  rC   r  r  r  r4   r  forward_is_partitionedNr  cudagraphs_bwd_overrider   )r   r   r   r   r   r  r   r   r   r   r  r  	  s$    M$$%%+/[/r   r  c                   [        U [        5      (       a  U R                  OS n[        [        R
                  R                  5      nS nUb  UR                  S5      =nb  UR                  bp  UR                  [        R
                  R                  :w  aH  [        UR                  5      nUR                  (       a  [        R                  " S5        O[        S5        UR                  (       a5  UR                  b(  UR                  (       d  UR                  n[        S5        [        [        5      n[!        S 5      n[        S5      n[#        UUUUUS9$ )Ncudagraph_annotationz9enabling cudagraphs due to override_cudagraphs annotationz:disabling cudagraphs due to override_cudagraphs annotationzGdisabling cudagraphs for backward due to override_cudagraphs annotationF)r  r  r  r  r  )r   rQ   r   rC   r   rQ  r  r   fwdr5   r  r7   r   bwdrN  _graph_counterr4   r  )r   gm_metar  r  
annotationr  r  r  s           r   create_compiler_config_extrar  	  s    $B44bgg$G 6==334J+/ 	";;'=>>ZK>>%*..FMM<T<T*T":>>2J~~##O 4P 
 ::>>&0nn#/Y N#H &d+N
 'u-% 75 r   c                  ^  U(       a  [        SS U 4S jS9  [        T 5      nUR                  S    Vs/ s HS  n[        U[        R
                  R                  R                  5      (       a  UR                  R                  S5      OSPMU     snUR                  S'   [        UT 5      n	[        T [        [        U	5      5      S	9m [        SS
 U 4S jS9  [        R                  R                  R!                  U[#        U5      5      n
[        T 5      n[$        R&                  (       a  [(        R*                  " UR                  6 n[#        U5      n[        R,                  R.                  R1                  5       nUb/  UR2                  (       a  U(       d  UR2                  R4                  nOSnX-::  d   eX-   nUU::  d   e[7        UU5       Vs/ s H3  n[        UU   [        R
                  R                  5      (       d  M1  UPM5     snUR                  S'   O/ UR                  S'   [9        T 5        [;        UR<                  5         U" T U[?        U
5      UR<                  UR@                  UURB                  S9nU(       dP  [        U[D        5      (       a;  URF                  (       a*  [#        URF                  5      S:  a  SURH                  l%        UsSSS5        $ s  snf s  snf ! , (       d  f       g= f)a  
Compile the forward graph of the given graph module.

Args:
    gm: The graph module to compile.
    example_inputs: The example inputs to use for compilation.
    num_orig_model_outputs: The number of model outputs from the original dynamo graph.
    num_example_inputs: The number of example inputs from the original dynamo graph.
    compiler_config_extra: Extra configuration for the compiler.
    inner_compile: The inner compile function to use.
    is_inference: Whether this is an inference graph.
r  c                     SSS.$ )Nbefore_joint_graphr  r  r   r   r   r   r  $compile_fx_forward.<locals>.<lambda>q	  s    ,$!r   c                 &   > T R                  SSSS9$ r  r  r  s   r   r  r  u	      r00"4  1  r   r$  r   r  Noutput_stack_tracesr  c                     SSS.$ )Nafter_joint_graphr  r  r   r   r   r   r  r  	  s    +$!r   c                 &   > T R                  SSSS9$ r  r  r  s   r   r  r  	  r  r   r   )r  r  r  r  r  r]   T)&rO   rt   r   r   r   r   r   r   r   r   r&   rr  rN  rO  r<  r   num_fw_fixed_argumentsr   r   keep_output_striderD  r  r   r   r   r   num_mutated_inp_runtime_indicesr   r   r  r  r   r  r  r=   partition_mapsr  r   )r   rk  num_orig_model_outputsr  compiler_config_extrar  r  r   argr  r   r  r  num_model_outputsr   original_output_start_indexorig_output_end_idxr   results   `                  r   compile_fx_forwardr  W	  s   , 		
 R {{1~.
 & c588==#5#566 ]+ &.
)* ,NB?*2DnAU<VW		
 OO!!88C/E %R  ..0B0G0GH.--..6687#6#6|##CC ( +,'%::: :R #&7777 8:MN?
N-,ehhmm< N?
 :; ?A :;
 /r2	%&;&F&F	G3E:,77*33%'<'K'K
 6?33%%F))*Q.AE!88>' 
H	GU.
z?
 
H	Gs   AK0K$K$	BK))
K7c                   SSK Jn  U   [        U 5      n[        R                  (       av  [
        R                  " UR                  6 n[        U5       VVs/ s H2  u  px[        U[        R                  R                  5      (       d  M0  UPM4     snnUR                  S'   O/ UR                  S'   [        U 5      n	UR                  n
UR                   b  [#        UR                   5      n
UR$                  R&                  (       a  [)        U 5      nO[+        [-        U	5      5      n[        R.                  (       a  [        R0                  " [3        5       5      O[4        R6                  " 5          [9        U
5         U" U UUU
SUR:                  UR<                  S9sSSS5        sSSS5        sSSS5        $ s  snnf ! , (       d  f       O= f SSS5        O! , (       d  f       O= fSSS5        g! , (       d  f       g= f)a  
Compile the backward graph of the given graph module.

Args:
    gm: The graph module to compile.
    example_inputs: The example inputs to use for compilation.
    compiler_config_extra: Extra configuration for the compiler.
    inner_compile: The inner compile function to use.
r   )compile_lockr   NT)r  r  r  r  r  )torch._dynamo.convert_framer  rt   r   bw_outputs_user_visiblerD  r  r   rC  r   r   r   r   r   rD   r  r  rC   r  r   rG   r   r   r  r  r  r  r  r  r  r  )r   rk  r  r  r  r  r  r   r  r   r  r  s               r   compile_fx_backwardr  	  s    9	(_))"224F4K4KLM (6C6FCa/ 6C##$>? CE##$>?r" +55
 88D"#8#P#PQJ
 !77==/G/K $U5\ 2 %% 356++-. )4 "3% .77+@+O+O 54	. .9 
C8 544	. . .9 
s[   AG./F3F3CG.+G7 F9	G 	G.3G.9
GG	G.
G"	G..
G<c                  ^  [        SS U 4S jS9  [        R                  S[        ST SSSS95        [	        T R
                  5      [        R                  R                  l        [        R                  R                  S	:X  ak  T R
                  R                   HQ  nUR                  (       d  M  UR                  [        R                  R                  R                  UR                  '   MS     [!        T U5      m [        SS
 U 4S jS9  T $ )Nr  c                     SSS.$ )Nbefore_pre_grad_graphr  r  r   r   r   r   r  %run_pre_grad_passes.<locals>.<lambda>"
  s    + 
r   c                 X   > T R                  SSSS9S[        T R                  5       3-   $ NFTr  z

 # graph id: r  idr   r  s   r   r  r  &
  7    600tD 1 
 b./
01r   r$  r  zBEFORE PRE GRADTr  r]   c                     SSS.$ )Nafter_pre_grad_graphr  r  r   r   r   r   r  r  A
  s    * 
r   c                 X   > T R                  SSSS9S[        T R                  5       3-   $ r  r  r  s   r   r  r  E
  r  r   )rO   pre_grad_graphs_logrL  r)   r  r   r   r<  r  r   r  r  r  r  #_inductor_pre_grad_node_stack_tracer   rc  )r  r  r   s   `  r   run_pre_grad_passesr  
  s    
 
1
 	
	 02&,,/?EOO,||--2LL&&D$$ %%II$))T ' (@F
1
 Mr   c                  ^ Tb	  SU4S jjnO[         nSSKJn  UR                  SS5      (       a  U $ U(       aH  [        R
                  " U5         [        U U[        R
                  " U5      " U5      TUUS9sSSS5        $ [        R                  " UUS9n[        S	 U 5       5      (       a2  [        R                  R                  R                  R                  5         [        R                  (       d  [        R                   (       a  SS
KJn	  [        R                  n
[        R                   n[        R
                  " ['        5       5         [(        R*                  " U5         [-        U [.        5      (       a  [1        X5      OUn[3        U5      nU	" X0 U5       u  nnnnn[5        UU[        R                  " UU
US9UUUS9sSSS5        sSSS5        sSSS5        $ [5        U UUUUUS9$ ! , (       d  f       GN|= f! , (       d  f       O= f SSS5        O! , (       d  f       O= fSSS5        MV  ! , (       d  f       Nd= f)a  
Main entry point for compiling given FX graph.  Despite the fact that this
lives in :mod:`torch._inductor`, this function is responsible for calling
into AOT Autograd (and we will eventually get a callback to
``inner_compile`` to perform actual compilation.  In other words, this
function orchestrates end-to-end compilation for the inductor backend when
you use :func:`torch.compile`.

NB: This function TAKES OWNERSHIP of the input ``model_`` and can potentially
mutate it!  Make a copy if you need to preserve the original GraphModule.
Nc                    > T $ r   r   )decompositionss   r   r  !compile_fx.<locals>.get_decomp_fnc
  s	    !!r   r   )CompilerBisectorr  pre_grad_graph)r  r  ignore_shape_envr  )r  c              3     #    U  H?  n[        U[        R                  5      =(       a    UR                  R                  S ;   v   MA     g7f))r  xpuN)r   r   r   r%  r  )r   r  s     r   r   compile_fx.<locals>.<genexpr>
  s7       A 	1ell#H(HH s   AA	)_fakify_script_objects)r  r  r  r  r  r  r  r  )r  zdict[Any, Callable[..., Any]])rc   !torch._inductor.compiler_bisectorr  disable_subsystemr   r  r  rY  rZ  r  r   r<  r=  AsyncCompilewakeupr  r  torch._export.non_strict_utilsr	  r  rw   set_real_inputsr   rQ    _extract_inputs_from_exported_gmr#   _maybe_wrap_and_compile_fx_main)r  r  r  r  r  r  r  r  r  r	  cpp_wrapper_configfx_wrapper_configinputs_r  patched_mod	fake_argsr(  s       `            r   r  r  M
  s    ( !	" 	" ,
 C))*6FGG\\.)$ll>:=I-!1$7 *) %%/M      	%%2299;V..I#//"-- LL/12o. fk22 1I$ 
 )1I'YG L6"+"3"3%$6#4#
 &6"/(; HG /. 32: +#/ y *)P HGG /.. 322sN   (G/H7%8H(H	H	H7/
G>
HH	H7
H*	&H77
Ic           
        U R                   R                   Vs/ s H/  o"R                  S:X  d  M  UR                  R	                  S5      PM1     nn[
        R                  (       d3  U Vs/ s H&  n[        U[        R                  5      (       a  UOS PM(     nn[        S U 5       5      (       a  [        [        5       X15       H  u  pVnUc  M  [        U[        R                  5      (       d  M,  [        U[        R                  5      (       d   eUR                  UR                  :w  d  Mi  [        SU SUR                   SUR                   S35      e   U$ U$ s  snf s  snf )Nr5  r   c              3  (   #    U  H  oS Lv   M
     g 7fr   r   )r   vs     r   r   3_extract_inputs_from_exported_gm.<locals>.<genexpr>
  s     
.+QD=+s   zBDevice mismatch between fake input and example input at position #z: z vs zx. If the model was exported via torch.export(), make sure torch.export() and torch.aot_compile() run on the same device.)r   r  r   r   r   r   r  r   r   r   r  rl  r
   r%  
ValueError)r   r  r   fake_inputsinpr   fir  s           r   r  r  
  s8    *,)777m;S		e   
 GR
FQs:c5<<00Cd:k 	 
 
.+
...eg{DJCQ~*R">">!!U\\222299($\]`\aac99+T!(( 4cc 	 E 3
s   EE*-E!r  c          	     h   [         R                  " [        UUUUS9n[        U 5      (       d  [	        XU5      $ [        U [        5      (       a5  [        U R                  R                  [        5      (       a  [        XU5      $ [        S U 5       5      (       a  [        XU5      $ [        U UUUUUS9$ )z
Part of compile_fx, called after patching configs.

Ultimately we want to call _compile_fx_main, where the actual work happens.
But under various conditions, various forms of wrapping might be needed
around _compile_fx_main.
r
  c              3  X   #    U  H   n[        U[        [        [        45      v   M"     g 7fr   r   r   r?  r  r   s     r   r   2_maybe_wrap_and_compile_fx_main.<locals>.<genexpr>
  s!     
G!:a$t,--   (*r  )rY  rZ  r  graph_returns_tuplemake_graph_return_tupler   rQ   r   _codegenr[   handle_dynamo_export_graphr  r%   _compile_fx_main)r  r  r  r  r  r  
compile_gms          r   r  r  
  s    $ ""'#)#/J v&&&v
KK&+&&:~, , *&:NN

G
GGG $FZHH #/ r   c               @  ^ ^^^ [        [        R                  5         [        5          [        R
                  R                  R                  [        R                  R                  S:H  5         [        R                  R                  R                  5          [        R                  (       a   e[        U5      m[!        T 5      mU" 5       n["        R$                  " TUS9m        SUUU U4S jjn["        R$                  " USS9n['        [(        U5      n[        R*                  (       aW  [        R,                  " 5       (       d=  ["        R$                  " [.        T TTTR0                  TR2                  TR4                  S9n	O%["        R$                  " USS9n	['        [(        U	5      n	[7        SS	9      SUU4S
 jj5       n
['        [(        U
5      n
[9        U5      =(       d    [        R:                  R=                  SS9n[        R>                  R@                  RC                  5       =(       d    [        R>                  RA                  U5      n[D        RF                  (       Ga  [        RH                  (       Gd  SSK%J&n  U" 5         [O        T [P        5      (       a  [S        T U5      m [T        RV                  " S[        RX                  S9   [[        T USUS9u  p[O        U[P        5      (       d   eSSK.J/n  U" U5      nUR`                  Rb                   GH  nURd                  S:X  d  M  SURf                  ;  d  M(  [i        URj                  5      " U5      n[O        U[        Rl                  5      (       a$  Uc   eURo                  USS9URf                  S'   M  [O        U[        Rp                  5      (       d  [s        [u        U5      5      (       a9  [        Rv                  Rx                  R{                  UU5      URf                  S'   M  [O        U[|        5      (       d  GM  UURf                  S'   GM!     SSS5        [        T WW5      nST Rf                  ;   a  T Rf                  S   URf                  S'   ST Rf                  ;   a  T Rf                  S   URf                  S'   [        R                  R                  5       nU(       a  [        R                  R                  O[        R                  n[D        R                  " U5         [        R                  " 5          U" 5          U	" UU5      sSSS5        sSSS5        sSSS5        sSSS5        sSSS5        sSSS5        sSSS5        $ [D        R                  " U5         [        R>                  R                  U5         [        R                  " 5          [T        RV                  " S[        RX                  S9    [        R                  " UU
U	U[        STU[R        US9
" T U5      sSSS5        sSSS5        sSSS5        sSSS5        sSSS5        sSSS5        sSSS5        sSSS5        $ ! , (       d  f       GN&= f! , (       d  f       O= fSSS5        O! , (       d  f       O= fSSS5        GM.  ! , (       d  f       GN== f! [         a  nUR                  5       SeSnAff = f! , (       d  f       O= f SSS5        O! , (       d  f       O= fSSS5        O! , (       d  f       O= fSSS5        O! , (       d  f       O= fSSS5        O! , (       d  f       O= fSSS5        O! , (       d  f       O= fSSS5        O! , (       d  f       O= fSSS5        g! , (       d  f       g= f)a)  
Main part of compile_fx, called after wrapping is done.

Roughly speaking, here the steps will be:
(1) apply pre-grad passes
(2) create `fw_compiler` and `bw_compiler` functions out of `inner_compile`
(3) call aot_autograd, which:
- (3a) creates a joint graph with `decompositions`,
- (3b) partitions it with `partition_fn` into fw and bw graphs (applying joint-graph passes),
- (3c) calls `fw_compiler` and `bw_compiler` on those graphs (applying post-grad passes)
- (3d) finally, assembles the fw and bw compiled functions back together and returns.
r]   )r  c                   > [         R                  " S5         [        T[        5      (       a  [	        T5      nO[	        U 5      n[        U UUTTTUS9sS S S 5        $ ! , (       d  f       g = f)Nz$compile_fx.<locals>.fw_compiler_base)r  r  r  r  r  )r   r$   r   rQ   r  r  )r   rk  r  r  r  r  r  r  s       r   fw_compiler_base*_compile_fx_main.<locals>.fw_compiler_base2  sa    
 **+QRfk22-B6-J*-B2-F*)"+A'9*?"/!- SRRs   ;A
A+Fr  )r  r  r  r  r  r  Tbackward)r  c                ~   > [         R                  " S5         [        U UTTS9sS S S 5        $ ! , (       d  f       g = f)Nzcompile_fx.<locals>.bw_compiler)r  r  )r   r$   r  )r   rk  r  r  s     r   bw_compiler%_compile_fx_main.<locals>.bw_compiler[  s5    
 ))*KL*"*?"/	 MLLs   .
<r  )is_valid_aoti_model_nameunlift_effect_tokensselective_decompose)trace_jointr  r   )_detect_fake_mode_from_gmr  r   N)static_shapes dynamo_flat_name_to_original_fqnr  )
fw_compilerr2  inference_compilerr  r  keep_inference_input_mutationsr  r  rh   r  )r   rQ   rk  r  r  r   r  rA   )r   rQ   rk  r  r  rA   )NrZ   r  r  r   r   r   r  preserve_node_metar   r  r  r<  rL  reset_provenance_globals_raise_error_for_testingr   r  rY  rZ  r0   rA   freezingis_grad_enabledr  r  r  r  rP   r#   r  r  r   r   r   rw   r8  enable_autograd_for_aotr   r4  r   rQ   r  functorch_configr  r7  r-   torch._export.utilsr9  r   r  r   r   r   r   r   from_tensorScriptObjectrN   r  _libraryfake_class_registrymaybe_to_fake_objrM   rV  _C_is_any_autocast_enabled_DisableAutocastr  r  r  r   _disabletracingdynamo_commonr+   r  rX   remove_dynamo_frames)r  r  r  r  r  r  r  r.  r<  r=  r2  r  r  r4  r   rG  r9  r   r   rU  disable_ampr   r  r  r  s   ` `                    @@r   r*  r*  	  s   , 	}BBC "--LL22a7	
 	668
 2222 1 <V D&!))-}U		/	 	 		 	* .UC 	 6j+N??5#8#8#:#:5>5F5F$##5+0;;.774CC6 "+!2!23CRV!W!@." 
'*	=		-@		 
>	 6j+N$
 J--D-I 	 MM((002 7}}++I6 	
 V%C%C%C7$&
 &+..,V_E!''%)$*$>$> '8# %#1	'# ""k2222I5b9	 HHNNDww*,dii1G!+DKK!8!<%fell;;#,#88#8/8/D/D &d 0E 0DIIe, (0B0BCC~ LH H !& B B T T$-v!" !IIe,
 (0@AA/5DIIe,# +)N (ODK1V[[@GM{{6H  !CD #fkk18>DW8X  !45  ((;;=K-8))j>T>T  +->-G-G-I79)+G LU9-I-I++i 	98	
 	
 	#" 	DCz OOI&MM!!/2&&(""%)$*$>$>
9$11 + +'9#1!-37*?%5$7(; /+  )( 32 '&q 	98	
 	
 	#" 	DC~ t LU9-I-I-I++++2 $ 9 ,,.D89%   )(( 322 '&&q 	988	
 	
 	
 	#"" 	DCCs  ^A]5.)]H8]	AY/YC"Y(Y<C]	Z#$Z,	Y-5	Z>	Z#	]		]	]5"	^5]	 \'+\$[3%['+Z5	[3	\$	\'-	]	6	]?	]5	^
Y*%]	-
Y;7Z>	Z#
ZZ#]	#
Z2-]	5
[?[[[
[&"[3*	\3
\=\	\'
\\'	]	'
\51]	8	]
]]	]5
])%]5,	^5
^	?^
^c                   [        U [        5      (       d  g[        U 5      R                  u  n[        U[        [
        45      (       a  g[        U[        R                  R                  R                  5      (       a~  [        UR                  S5      (       ac  [        UR                  R                  R                  5      S:  a6  [        S UR                  R                  R                   5       5      (       a  gg)z"True if a FX graph returns a tupleT_schemar]   c              3  R   #    U  H  n[        UR                  5      S :H  v   M     g7f)r   N)r  r  )r   rets     r   r   &graph_returns_tuple.<locals>.<genexpr>  s     O5NcCHH)5Ns   %'F)r   rQ   rt   r   r   r?  r   r   r   r   r#  r   r   rU  returnsrT  )r   rvs     r   r&  r&    s    b+&&O  ER"tUm$$2uxx}}))**BIIy))		!!))*Q.ORYY5F5F5N5NOOO r   c                  ^^ [        U 5      nUR                  u  n[        R                  " U5      u  nmU R                  R                  U5         U R                  R                  U5        SSS5        U R                  R                  U5        [        U 5      (       d   eU" X5      m[        R                  " T5      SUU4S jj5       nU$ ! , (       d  f       Ng= f)zu
Mutate gm so it returns a tuple.  This is only needed for graphs
not created by torchdynamo that return non-tuples.
Nc                 >   > [         R                  " T" U 0 UD6T5      $ r   )rD  tree_unflatten)r   r   r/  specs     r   r  (make_graph_return_tuple.<locals>.wrapper  s     $$[$%A&%A4HHr   )r   r   r   r   r  r   )rt   r   rD  tree_flattenr   inserting_beforer   r  r&  rY  wraps)r   rJ  r+  r   rZ  r  r/  r^  s         @@r   r'  r'    s     r?DIIER""2&HB		"	"4	(
 
)HHr""""R(K__[!I "I N 
)	(s   C
Cc                4  ^^ U R                   R                  m[        R                  R                   R	                  5       U R                   l        U R                  5         U" U TR                  " U6 5      m[        R                  " T5      SUU4S jj5       nU$ )z
`torch._dynamo.export` embeds pytrees in the FX graph codegen object,
convert that to a normal FX graph so inductor can compile it.
c                 F   > TR                  T" TR                  " U 6 6 5      $ r   )process_outputsprocess_inputs)r   r  r/  s    r   r  +handle_dynamo_export_graph.<locals>.wrapper  s$    &&{G4J4JD4Q'RSSr   )r   r   r  r   )	r   r(  r   r   CodeGenr  rf  rY  rb  )r   rJ  r+  r  r  r/  s       @@r   r)  r)    su     hhG..0BHHLLNR!7!7!@AK__[!T "T Nr   c                   SS jn[         R                  " U R                  R                  5       U R                  5       H  n[        U[        5      (       d  M  [        U5      nU(       a2  [        U5      (       a"  UR                  5       [        R                  :w  a  M`  [        U5      nUR                  SS9(       a    g U" UR                  5       5        M     g )Nc                    SSK Jn  U c   e[        U R                  5      nUR	                  U 5      n[
        R                  " UR                   S35        U" S5      e)Nr   )rY   z9 does not support bfloat16 compilation natively, skippingzBF16 is not supported)torch._dynamo.excrY   r   r  get_device_propertiesr  r  r   )r%  rY   device_interfacedevice_propss       r   warn_and_skip1_check_triton_bf16_support.<locals>.warn_and_skip  s\    /!!!3FKK@'==fE  !!Z[	
 /00r   F)including_emulation)r%  torch.device | Noner  r   )	itertoolschaingraph_inputsrF  r  r   rk   rj   rI   	get_dtyper   bfloat16r   is_bf16_supported
get_device)r   ro  r   r  rm  s        r   r  r    s    
1  2 2 9 9 ;U=P=PQ$''%d++&&~~5>>1 4K@--%-Hdoo'( Rr   )optionsc                  SSK Jn  U" U 5      (       d   S5       eSnSn[        U R                  R                  [
        R                  R                  R                  5      (       a  U R                  R                  n[
        R                  R                  R                  5       U R                  l        U R                  5         UR                  R                  b  UR                  R                  nUR                  R                  b  UR                  R                  nO:[        U S5      (       a  U R                  n[        U S5      (       a  U R                  nUb  [         R"                  " U5      OSnUb  [         R"                  " U5      OSn	[         R$                  " X=(       d    0 45      u  p['        S U
 5       5      (       a  S	S
KJnJn  U" UR.                  S5      eU
 Vs/ s H,  n[        US   [
        R0                  5      (       a  US   OSPM.     nnUb  X:w  a  [3        SU SU 35      eUc  UU	S.O0 UEUU	S.EnX4$ s  snf )z
Flatten the inputs to the graph module and return the flat inputs and options.
Add "aot_inductor.serialized_in_spec" and "aot_inductor.serialized_out_spec" to the options.
r]   )r&  zGraph output must be a tuple(). This is so that we can avoid pytree processing of the outputs. Please change the module to have tuple outputs.N_in_spec	_out_spec c              3  \   #    U  H"  n[        US    [        R                  5      v   M$     g7f)r]   N)r   r   rH  r   s     r   r   '_aoti_flatten_inputs.<locals>.<genexpr>h  s&     
M9LA:adE..//9Ls   *,r   )	UserErrorUserErrorTypezTorchBind objects found in inputs. TorchBind object inputs are not supported in AOTInductor. TorchBind objects can only be attributes.z>Trying to flatten user inputs with exported input tree spec: 
z-
but actually got inputs with tree spec of: 
)zaot_inductor.serialized_in_specz aot_inductor.serialized_out_spec)r  r&  r   r   r(  r   r   r[   rh  r  pytree_infoin_specout_specr#  r|  r}  rD  treespec_dumpstree_flatten_with_pathr  rk  r  r  INVALID_INPUTr   r  )r   r   r   rz  r&  r  r  r  serialized_in_specserialized_out_specflat_args_with_pathreceived_specr  r  r   flat_example_inputss                   r   _aoti_flatten_inputsr  8  s5    0r"" 	" GH"((##UXX^^%B%BCC((##!HHNN224
&&2))11G''3**33H 2z""kkG2{##||H;B;N..w7TV+3+?h'R  *0)F)F	|*& 
M9L
MMM>''8
 	
 CVBUQ
1Q4..!D8BU   }7Mi <o
 	
 ? 0B0C	



/A0C
  ''1s   &3Ic                   [         R                  (       d  [         R                  (       a  [        S5      eUb  UO	[	        5       n[        U [        5      (       a  [        U 5      (       d  [        S5      e[        S U 5       5      (       a  [        S5      e[        U 5      n[        U5      =(       d    [        R                  R                  SS9n[        R                  R                   R#                  5       =(       d    [        R                  R!                  U5      n[$        R&                  " S[         R(                  S9   [+        [,        R.                  5         [1        5          [        R2                  R4                  R7                  [         R8                  R:                  S:H  5         [        R<                  R>                  RA                  5          [B        RD                  " U5         [        R                  RG                  U5         [H        RJ                  " 5          [L        RN                  " U UUUUSS	9sS S S 5        sS S S 5        sS S S 5        sS S S 5        sS S S 5        sS S S 5        sS S S 5        sS S S 5        $ ! , (       d  f       O= f S S S 5        O! , (       d  f       O= fS S S 5        O! , (       d  f       O= fS S S 5        O! , (       d  f       O= fS S S 5        O! , (       d  f       O= fS S S 5        O! , (       d  f       O= fS S S 5        O! , (       d  f       O= fS S S 5        g ! , (       d  f       g = f)
NzBautograd_cache_key is not supported with cpp_wrapper or fx_wrapperzDautograd_cache_key does not support graphs that don't return a tuplec              3  X   #    U  H   n[        U[        [        [        45      v   M"     g 7fr   r#  r   s     r   r   %autograd_cache_key.<locals>.<genexpr>  s!     
F~!:a$t,--~r%  z;autograd_cache_key does not support nested container inputsTr  r5  r]   )r  r  r  r>  )(r   r  r  RuntimeErrorrc   r   rQ   r&  NotImplementedErrorr  r  r#   r   r  r  r   r   r   rE  r  r7  rZ   r  r  r   r   r  r?  r  r  r<  rL  r@  rw   r  rP  r   rO  r+   autograd_cache_key)r   rk  r  r  r  r  r  s          r   r  r    s`    V..P
 	

 )4:M:O  %%%.A%.H.H!R
 	
 
F~
FFF!I
 	
 9? !0 E4E4E4T4T" 5U 5I 	$$,,. 	3==''	2  	!%6;U;U	
 	}BBC "--LL22a7	
 	668		"o.""$..-)"7+/
 	%$ 	/. 	#" 	98	
 	
 	#" 	DC	
 	
 	%$$ 	/.. 	#"" 	988	
 	
 	
 	#"" 	DCC	
 	
 	
s   L;L!'AL,)K-	K, J9J"J;	J	J9	K	K-		L(	L!1	L;
JJ	J9
J-)J90	K9
KK
	K-	
K!K-	$	L-
K;7L>	L!
LL!	L;!
L/	+L;;
M	)r   r  r  z.Callable[[Callable[_P, _T]], Callable[_P, _T]])r   r  r   r  r  rw  )r  r   )r  zlist[dict[str, Any]])r   r  r  	list[int])r   rQ   r  rw  )r  zCallable[..., None]r  )r*  rQ   r   rQ   r  rw  )r*  rQ   r   rQ   rG  r   r  rQ   )F)r   rQ   r\  r   r  zGenerator[str, None, None])r   rQ   rk  r  r  rQ   )FN)r   rQ   r\  r   rs  rr  r  rQ   )r   rQ   r  r   r  rw  )TNN)
r   rQ   r  r   r  zlist[str] | Noner  z&Callable[[torch.fx.Node], bool] | Noner  z"tuple[GraphModule, dict[str, int]])r   rQ   r  r   )rk  r  r  "AbstractContextManager[None, None])r  r   r  r   r  r  )r   rQ   rk  r  r  r   r  z torch._subclasses.FakeTensorModer   )r  zstr | dict[str, Any] | Noner  zdict[str, Any])r  zGenerator[None, None, None]r  )
r   rQ   rk  r  r  r  ro  r  r  rA   )r   rQ   rk  r  rq  r  r  r  ro  r  r  rA   )rJ  r  r  r  r  r  )r   )rU  Callable[..., Any]r  r  rP  r  rQ  zlist[str | None]r  r   r  r   r
  ztuple[torch.Tensor, ...]rL  zSequence[PlaceholderInfo]rM  ztuple[int, ...]r  r  )r   torch.Tensorr  r  )rc  r  rd  r  re  r  r  rw  )rU  r  rJ  zlist[torch.Tensor]r  r  r  rv  )
r  rQ   r  ru  r  r  r  dict[str, Any] | Noner  z'list[str | Weights] | str | GraphModule)r  rQ   r  r  r  rQ   r  r  r  r  r  rC   r  r  r  r4   r  z0Callable[[list[object]], Sequence[torch.Tensor]])r  zdict[str, object])r   torch.fx.GraphModuler  zAbstractContextManager[None])r   rQ   r  zSequence[object]r   r  r  ztuple[GraphModule, GraphModule])rU  rQ   r  r  )r  rC   r  z'contextlib.AbstractContextManager[None])r   zGraphModule | GmWrapperr  r  )r   rQ   rk  r  r  r  r  r  r  r  r  Callable[..., OutputCode]r  r   r  rA   )
r   rQ   rk  r  r  r  r  r  r  rA   )r  rQ   r  r  r  rQ   )r  rQ   r  r  r  r  r  r  r  z+dict[OpOverload, Callable[..., Any]] | Noner  r   r  r  r  CompileFxOutput)r   rQ   r  r  r  r  )r  rQ   r  r  r  r  r  r   r  r  r  r  r  r  )r   rQ   rJ  r  r+  r  r  r  )r   ri   r  rw  )
r   r  r   zlist[Any] | tuple[Any, ...]r   r  rz  r  r  z tuple[list[Any], dict[str, Any]])r  r   (%  
__future__r   r  r  enumrY  r  rs  r  r   r   r  rS  r  abcr   r   collectionsr   r   dataclassesr   inspectr	   r
   operatorr   typingr   r   r   typing_extensionsr   r   r   r   r   r   unittestr   torch._inductor.async_compiler   torch.fxtorch.utils._pytreer   _pytreerD  functorch.compiler   r   torch._dispatch.pythonr   torch._dynamor   r   r  r   r   torch._dynamo.backendsr   rQ  torch._dynamo.device_interfacer   torch._dynamo.repro.after_aotr   rC  r    r!   r"   r#   r$   r%   r&   r'   r(   r)   r*   torch._functorchr+   rE  7torch._functorch._aot_autograd.subclass_parametrizationr,   torch._functorch.aot_autogradr-   r.   r/   r0   torch._inductor.codecacher1   r2   r3   r!  r4   r5   r6   r7   r8   !torch._inductor.custom_graph_passr9   torch._inductor.debugr:   r;   torch._inductor.output_coder<   r=   r>   r?   r@   rA   'torch._inductor.runtime.cache_dir_utilsrB   torch._inductor.utilsrC   rD   rE   rF   rG   rH   rI   rJ   rK   rL   "torch._library.fake_class_registryrM   torch._library.opaque_objectrN   torch._loggingrO   torch._utils_internalrP   rQ   %torch.fx.experimental.symbolic_shapesrR   rS    torch.fx.passes.fake_tensor_proprT   torch.monitorrU   torch.utils._ordered_setrV   _dynamo.excrX   rY   fx._lazy_graph_modulerZ   fx.graphr[   utils._tritonr\   r~  r^   r_   codegen.commonr`   ra   rL  rb   decompositionrc   excrd   fx_passes.joint_graphre   fx_passes.post_gradrf   rg   fx_passes.pre_gradrh   r   ri   irrj   rk   output_coderl   triton_bundlerrm   rn   ro   rp   rq   rr   rs   rt   ru   rv   virtualizedrw   collections.abcrx   ry   rz   r{   
torch._opsr|   )torch.export.pt2_archive._package_weightsr}   r~   r   r   r  r   r   torch._inductor.fb.utils&torch._functorch._aot_autograd.schemasr   r   r   r   r  r   r  r  Enumr   r   r   r   _fx_compile_configr   r>  r   rA  r   rD  r   r   r   _logginggetArtifactLoggerr  r  r  rK  r  r   r   r   r   	lru_cacher   cacher	  r0  rV  ra  rc  rr  r|  r  r  r  r  r  r  contextmanagerr  r  r  r  r  r  r  r  r  rM  r\  r`  rf  rO  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r*  r&  r'  r)  r  r  r  r   r   r   <module>r     s
   "     	    	 
   # # - !     . . U U  $  $ $ A  ;  ; C =    F  O N  B  >   @ 7 + ?   W ; & / 5 : % & 3 3 U  .  5 B /   ' / )
 
 
  ==:%A$ t_T]((**% L  	$v,%,,!7783>cJWT 
DII    CD ./ $))%// +;; !00<Hnn66xARS ~~77BTU NN44'  ~~77BTU 
4A,'=  T/ / 	
 	
H/VM	M%M8FMMb 38!!+/!!0NN'N N0 "'(,''' &' 	'T+" ".2CG	E(E(E( ,E( A	E(
 (E(P*('('(.	(	(!%	('	( ).' "& &	@ 37(/((  @y @  '+-
-
'-
 $-
 '	-

 -
` 23 '+ll'l $l -	l
 l 4l^B B# #6W) W@ '+<Y<Y'<Y
 #<Y $<Y -<Y <Y~  $   J (*) +-.0*,))$) 	)
 #) ) ) () ,) () )XU		  
	 (*_M_M_M %_M &	_MJ )9,0	@+@+$@+ &@+ *	@+
 -@+F qh#h+h h 	h
 &h h h %h 6hV4$--"- - %	-`$$,$ $0 0 0;;;H 0@CC'C  C 	C
 /C -C C CT 0@	;;'; /; -	;
 ;|//*=//j 0@,0BF"&*hh(h -h *	h
 @h h $h hV&9J CV&*//(/ -/ 	/ @/ $/ /p CV&*K9K9(K9 -K9 	K9 @K9 $K9 K9\$ # 	4 # 	,)D %)S(
 &*S(S(
%S( "S(
 #S( &S(t 	C
 C
r   