
    3j                   p   S SK Jr  S SKrS SKrS SKrS SKrS SKrS SKrS SKrS SK	r	S SK
r
S SKrS SKJr  S SKJr  S SKJrJrJr  S SKrS SKJr  S SKrS SKrS SKrS SKJrJr  S SKJr  S S	KJrJr  S S
KJ r   S SK!J"r"J#r#J$r$  S SK%J&r&  S SKJ'r'J(r(  S SK)J*r*J+r+  S SK,J-r-  S SK.J/r/  S SK0J1r1  S SK2J3r3J4r4  S SK5J6r6J7r7J8r8J9r9J:r:J;r;J<r<J=r=  S SK>J?r?  S SK@JArA  S SKBJCrC  S SKDJErE  S SKFJGrG  SSKHJIrIJJrJJKrK  SSKLJMrMJNrNJOrOJPrPJQrQJRrRJSrSJTrT  SSKUJVrVJWrWJXrXJYrY  SSKZJ[r[  SSKJJ\r\J]r]J^r^J_r_J`r`JaraJbrbJcrcJdrdJereJfrfJgrgJhrh  SSKiJjrjJkrkJlrlJmrmJnrnJoroJprpJqrqJrrrJsrsJtrtJuruJvrv  SS KwJxrx  SS!KyJzrz  SS"K{J|r|  SS#K}J~r~JrJrJrJrJrJrJrJrJrJrJrJr  SS$KJrJr  \(       a;  S S%KJrJrJrJr  S S&KJr  S S'KJr  S S(KJr  S S)KJr  SS*KJr  SS+KJr  SS,KJr  \\O-  rS S-KJr  \GRF                  " \5      r\GRL                  GRO                  \S.5      r\GRR                  GRT                  r\GRV                  " 5       r\IGRZ                  " 5       (       a  S S/KJr  S S0KJr  O
S>S1 jrS>S2 jrS?S3 jrS@S4 jr      SAS5 jrSBS6 jr    SCS7 jr      SDS8 jrSES9 jr " S: S;\GRr                  GRt                  5      r " S< S=\5      rg)F    )annotationsN)defaultdict)contextmanager)AnyNoReturnTYPE_CHECKING)Expr)deviceTensor)get_decompositions)defakedynamo_timed)FakeScriptObject)is_opaque_reference_typeis_opaque_typeis_opaque_value_type)get_layout_constraint_tag)
LazyStringtrace_structured)compute_required_storage_lengthmake_channels_last_strides_for)
FakeTensor)full_aoti_runtime_assert)BackwardState)magic_methodsmethod_to_operator)_get_placeholder_exprfree_unbacked_symbolshas_free_symbolsresolve_unbacked_bindingsRuntimeAssertShapeEnvSympyBooleanSymTypes)Node)_is_view_op)no_dispatch)
OrderedSet)int_oo   )configirmetrics)BackendFeatureDeviceOpOverridesFileBackedGraphModuleget_backend_featuresget_device_op_overridesget_wrapper_codegen_for_deviceinit_backend_registrationWorkspaceArg)CppWrapperCodegenErrorLoweringExceptionMissingOperatorWithDecompMissingOperatorWithoutDecomp)count_flops_fx)assign_origin_nodeConstantDonatedBufferFixedLayoutget_device_typeGraphPartitionSignatureInputBuffer	Pointwise	ReductionShapeAsConstantBuffer
StorageBox	TensorBoxTorchBindObject)constrain_to_fake_tensorsconstrain_to_fx_stridesFALLBACK_ALLOW_LISTfallback_handler%fallback_node_due_to_unsupported_type	loweringsmake_fallbackmaybe_layout_constraintsneeds_realized_inputsrequire_contiguoustag_to_layout_constraintunsupported_output_tensoruser_lowerings)autotune_cache)AutotuneCacheBundler)SizeVarAllocator)convert_shape_to_inductorgather_origins get_cloned_parameter_buffer_nameget_donated_idxsget_sympy_Expr_dtypeGraphPartitionMapis_same_tensor#maybe_get_suppress_shape_guards_ctxnormalize_nameshould_assume_input_alignedshould_fallback_by_defaultSUPPORTED_MKLDNN_DEVICESValueWithLineMap)NullHandlerV)CallableIterableIteratorSequence)
ModuleType)_EffectType)GraphModule)Graph)PythonWrapperCodegen)Dep)BaseSchedulerNode)output_code_log
perf_hints) save_triton_kernel_perf_artifact)log_module_codec                     g N argskwargss     O/home/wildlama/miniconda3/lib/python3.13/site-packages/torch/_inductor/graph.pyru   ru              c                     g rw   rx   ry   s     r|   rt   rt      r}   r~   c                   [        U [        R                  [        R                  [        R                  R
                  R                  45      (       d   S5       e[        U [        R                  R
                  R                  5      (       a  [        R                  $ [        U [        R                  5      (       a  [        U 5      $ U R                  (       a  [        R                  $ U R                  (       a  [        R                  $ g )Nzgget_constant_buffer_dtype only supports input of sympy.Symbol, sympy.Expr or sympy.core.numbers.Integer)
isinstancesympySymbolr	   corenumbersIntegertorchint64r\   
is_integeris_floatfloat32)constant_buffers    r|   may_get_constant_buffer_dtyper      s    %,,

EJJ4F4F4N4NO   	r 
 /5::#5#5#=#=>>{{/5::..#O44!!{{		!	!}}r~   c                6    [        S [         5       5      nX;   $ )Nc              3  8   #    U  H  n[        U5      v   M     g 7frw   )r   ).0ms     r|   	<genexpr>"is_magic_method.<locals>.<genexpr>   s     H-Q-a00-s   )r(   r   )op	magic_opss     r|   is_magic_methodr      s    H-HHI?r~   c           	         UR                  S5      nU n[        U5       H@  u  pE[        X55      (       d   [        SSR	                  US U 5       35      e[        X55      nMB     U$ )N.z#Node referenced nonexistent target )split	enumeratehasattrRuntimeErrorjoingetattr)objtargettarget_atomsattr_itriatoms         r|   getattr_recursiver      sn     <<$LH\*x&&5chh|BQ?O6P5QR  8* + Or~   c                n   0 nU R                  SS9S   nSUR                  ;  a  U$ [        UR                  S   [        R
                  R                  5      (       d  UR                  S   nOUR                  n[        U5       H-  u  pEXBR                  S   ;   d  M  UR                  S   U   X'   M/     U$ )Noutputr   r   user_visible_output_idxsoriginal_output_strides)
find_nodesmetar   rz   r   fxr%   r   )gretoutput_nodeoutput_node_argsidxnodes         r|   get_user_visible_output_stridesr      s    ')C,,(,+A.K!)9)99
k&&q)588==99&++A.&++/0	""#=>>#(()BCCHCI 1 Jr~   c                   0 U En/ UR                  5       Qn[        / UQ5      nU(       a  UR                  5       n[        UR                  5      (       a  UR
                  (       a~  [        UR
                  S   [        R                  R                  5      (       aH  UR
                  S   nXS;  a4  UR                  US5        UR                  U5        UR                  U5        U(       a  M  U$ )z[
Extend user_visible_output_strides to include view ops that lead to user-visible outputs.
r   N)keysr(   popr&   r   rz   r   r   r   r%   
setdefaultaddappend)user_visible_outputsresultqueuevisitedcurrentbases         r|   "extend_user_visible_output_stridesr      s     ":$8!9FfkkmE5"G
))+''7<<?EHHMM::<<?D"!!$-D!T" % Mr~   c                z   [         R                  (       d  g[        U5      n[        [        R
                  [        R                  [        R                  /5      n[        [        R                  [        R                  [        R                  [        R                  [        R                  [        R                  [        R                  [        R                  [        R                   [        R"                  [        R$                  [        R&                  /5      n    SS jn[)        U R*                  5       GH  n[-        UR.                  [0        R2                  R4                  R6                  5      (       a  SUR8                  S'   MR  [-        UR.                  [0        R:                  R<                  5      (       aL  [?        UR.                  5      [0        R@                  RB                  RD                  :X  a  SUR8                  S'   M  U" U5      nU(       d  M  Xs;   a  SUR8                  S'   UR8                  RG                  SS5      (       a:  URH                   H*  nU" U5      n	U	(       d  M  X;  d  M  SUR8                  S'   M,     [         RJ                  (       a  GMi  Xb;   d  GMq  SUR8                  S'   GM     g)ar  
Nodes like convolution/convolution_backward want its input to be dense.
If we pad their inputs, we result in extra calls to copy kernels!  On the other hand, padding usually helps reduction.

The pass finds nodes that dislike padding. These are nodes that can be reached
from a convolution/convolution_backward in the backward direction without
going thru a reduction.
Nc                    U R                   S:X  ad  [        U R                  [        R                  R
                  5      (       a1  [        U R                  S5      (       a  U R                  R                  $ S $ )Ncall_function_overloadpacket)r   r   r   r   _ops
OpOverloadr   r   )r   s    r|   _get_overload_packet8mark_nodes_dislike_padding.<locals>._get_overload_packet   s[    
 ww/)4;;

(=(=>>%677	 KK''	
 	
r~   Tdislike_paddingF)r   torch.fx.Nodereturnz"torch._ops.OpOverloadPacket | None)&r+   comprehensive_paddingr   r(   atenconvolutionconvolution_backward
_scaled_mmvar_meansummeanprodanyaminamaxminmaxargminargmaxscatter_reducereversednodesr   r   r   _higher_order_opstriton_kernel_wrapTritonKernelWrapperMutationr   r   r   r   _CTagneeds_exact_stridesgetall_input_nodespad_outputs)
r   user_visible_output_stridesextended_user_visible_nodesops_dislike_paddingops_like_paddingr   curr   priorprior_ops
             r|   mark_nodes_dislike_paddingr      s    ''"D## %%%OO	
 "MMHHIIIIHHIIIIHHHHKKKK	
"



	+

  JJ##66RR
 
 +/CHH&' szz5::#8#899)#**5xx||//0 +/CHH&'!#&$*.CHH&'88<<)511,,/6348EJJ01 - !!!c&H*.CHH&'A !r~   c                   [        [        R                  SS 5      Gbb  [        [        R                  R                  SS 5      Gb;  [	        U R
                  [        R                  5      (       Ga  [        U R
                  R                  5      S:  a  [        U R
                  R                  S   S5      (       a  U R
                  R                  S   R                   H  nUR                  S   [        R                  R                  R                  R                  [        R                  R                  R                  R                  [        R                  R                  R                   R                  4;   d  M    g   g)Nmkldnn_convolution_pointwiser   targetsTF)r   r   opsr   r   r   	functoolspartiallenrz   r   r   fnsr   defaultbinary_convolution_pointwise_)r   r   s     r|   is_mkldnn_convr   O  s    			8T*6EII$$&>EQt{{I$5$566  !A%DKK$$Q'33kk&&q)11Fzz!}		  77??		  77>>		  88??! 
  2 r~   c                    ^  \ rS rSr% S\S'                     SH                                       SIU 4S jjjrSJS jr    SKS jr    SLS	 jr    SMS
 jr	    SNS jr
      SOS jrSPSQS jjrSRS jr\R                  SSS j5       rSTS jr\SUS j5       rSVS jr        SWS jrSXS jrSYS jrSZS jr\S[S j5       r    S\S jrS]S jr    S^S jrS_S jrS`S jrSaU 4S jjr SbS jr!SS.ScS  jjr"SdS! jr#SeS" jr$SYS# jr%SfS$ jr&SgS% jr'ShSiS& jjr(SjS' jr)        SkU 4S( jjr*\+RX                  SlU 4S) jj5       r-\SmS* j5       r.        SnS+ jr/SoS, jr0SoS- jr1\+RX                          SpU 4S. jj5       r2SJS/ jr3\SqS0 j5       r4\SrS1 j5       r5            SsS2 jr6\StS3 j5       r7SuS4 jr8SvU 4S5 jjr9      SwS6 jr:SJS7 jr;    Sx         SyS8 jjr<    SzS9 jr=  S{S: jr>SJS; jr?S{S< jr@S|S= jrA  S}S> jrBSrCS?\S@'   S~SA jrDS~SB jrE    SSC jrFSSD jrGSSE jrHSSF jrISGrJU =rK$ )GraphLoweringid  list[ir.IRNode]graph_outputsNFc                  > [         TU ]  U5        UU l        X l        Ub  UOU R	                  XS9U l        SU l        Xl        Xl        Xl	        Xl
        Xl        Xl        UU l        SU l        SU l        Uc  [!        5       nSU l        OSU l        X0l        UR&                  R)                  5       U l        [,        [.        R0                     " 5       U l        [5        U5      U l        / U l        0 U l        0 U l        S U l        [-        5       U l         U(       a  URB                  O	[-        5       U l!        U(       a  URD                  O	[-        5       U l"        SU l#        [I        [,        5      U l%        [I        [,        5      U l&        0 U l'        / U l(        / U l)        U(       a  UO0 U l*        U(       a  [-        URW                  5       5      O	[-        5       U l,        U(       a  URZ                  O0 U l-        U(       a  UR\                  O0 U l.        UR^                  Ra                  S[-        5       5      U l1        U(       a  URd                  O0 U l2        0 U l3        0 U l4        0 U l5        0 U l6        [-        5       U l7        [-        5       U l8        [-        5       U l9        [-        5       U l:        0 U l;        [-        5       U l<        [-        5       U l=        S U l>        S U l?        SSK@JAn  [        R                  " 5       (       a	  U(       a  UOUU lD        S U lE        0 U lF        [-        5       U lG        / U lH        0 U lI        [I        [        5      U lK        0 U lL        [        R                  " 5       U lN        UU lO        XPlP        UU lQ        XPlR        0 U lS        X`lT        X@lU        [        [        5      U lX        S U lY        S U lZ        S U l[        S U l\        S U l]        U R
                  (       a  U R                  5       O	[-        5       U l_        [-        S/5      U l`        [        UR                  5      U lc        [        UR                  U R                  5        S	U le        S	U lf        / U lg        S U lh        0 U li        UR                  5       U lk        U R                  R]                  5        H  u  nnUU R\                  U'   M     U R                  Re                  5        H  u  nnUU Rd                  U'   M     U R                  R^                  Ra                  S
0 5      U lm        Ub  UR                  O0 U ln        [        5         [        R                  " S 5      " [        5      U lr        0 U ls        [-        5       U lt        [-        5       U lu        0 U lv        [-        5       U lw        [-        5       U lx        [-        5       U ly        [        R                  " 5       U l|        SU l}        [        5       U l        0 U l        g )N)is_inferencer   FTcpumutated_named_buffers)extern_node_json_serializerzaten.convolution_backward  dynamo_flat_name_to_original_fqn)super__init__get_decomp_fnexample_inputsdecide_layout_opt
layout_optnum_channels_last_convr  is_backwardis_const_graphconst_wrapper_codeconst_kernel_codeconst_moduleinputs_to_check_defers_input_alignmentextra_tracebackr"   reuse_shape_env
_shape_envdeferred_runtime_assertscopyras_by_symbolr(   r   r   bound_unbacked_symbolsrW   sizevarsgraph_input_namesgraph_inputsgraph_inputs_originalpartition_mapszero_dim_cpu_tensor_listdevice_typesdevice_idxsdevice_typer   additional_buffer_depsadditional_star_depsbuffer_to_padded_sizebuffers
operationsconst_output_indexr   folded_constants	constantsnamed_buffersr   r   r  named_parameterstorchbind_constantsopaque_value_type_classesseen_subgraphsconstant_reprsremoved_operationsremoved_buffersremoved_inplace_buffersmutated_bufferssdpa_constraint_cachenever_reuse_buffersinplaced_to_remove
device_opswrapper_code&torch._inductor.extern_node_serializerr  r+   	is_fbcodeextern_node_serializercurrent_nodelistsmutated_inputsmutated_input_idxsname_to_bufferlistname_to_users
name_to_optimecreation_timenamecpp_wrapper
fx_wrapperrecord_multi_kernel_choicemulti_kernel_to_choiceaot_modegraph_idnext_post_grad_graph_counterpost_grad_graph_id	schedulerautotuning_inputsautotuning_mappingautotuning_gridscurrent_devicefind_nodes_prefer_channels_lastnodes_prefer_channels_last_warned_fallbackr   graphr   r   	cache_key
cache_pathcache_linemapdisable_cudagraphs_reasondevice_node_mapping__copy__orig_gmmoduler  allocated_constant_namer4   r   	lru_cacher1   effectful_opsunaligned_buffersno_fuse_buffer_namesbuffer_layout_constraintslow_precision_codegen_opsinvoke_quant_opsall_codegen_kernel_names	itertoolscountworkspace_idplaceholder_idxr[   bw_donated_idxsdep_size_hint_cache)selfgmr  	shape_envrR  rM  rQ  r  rA  r  r  r  r-  r  r  r  rL  r  rN  r  r  kv	__class__s                          r|   r  GraphLowering.__init__g  sB   . 	*, % '''F 	
 '(#(&,"4!2(.',$$ 
I#(D #'D # ..335 	 '1&>&@#(3,.QS=?">B9C%)5L%%:< 	 )5L$$*, 	 !BMC
# ALJ@W! <>"(*.0"4" 	
 " )..01 	 '3L"" 	 +7L&&B 	 @Bww{{#Z\@
" .:L))r 	
  	  ;=&68.03=<0:8B$0:=?"4>L 3=<-126V !!&< #, 	# ,0+-
/9|-/46@KD@Q35!YY[	&$
 +6'68#  "&'?"@>B =ADH7; 48 7;ooD002:< 	' !+,G+H I+J288+T("288T-M-MN !  	 6:& GI -/[[]LL..0DAq$%Dq! 1LL113DAq'(D!!!$ 4040@0@0D0D.1
- 5A4LL00RT 	$ 	"#$-$7$7$=>R$S!;= 3=,5?\! EG&:D,&1; :D% &OO-  "/1 AC r~   c                8    U R                   R                  5         g rw   )r  freeze_runtime_assertsrv  s    r|   r~  $GraphLowering.freeze_runtime_asserts=  s    ..0r~   c                Z   U R                   (       a2  [        UR                  5       5      [        UR                  5       5      4$ SSKJn  U" S[        U R                  R                  5       35      nU R                  R                  UU5      u  nnnU Vs/ s H:  n[        U[        R                  5      (       a  UR                  R                  OUPM<     nnU Vs/ s H:  n[        U[        R                  5      (       a  UR                  R                  OUPM<     n	nX4$ s  snf s  snf )z
Support dynamic shapes and dynamic strides by assigning variables
to each dimension.  We duck-shape tensors, so if two tensors
have the same size they get assigned the same symbolic variable.
r   )ConstantSource__inductor_unknown_tensor_)r  rX   sizestridetorch._dynamo.sourcer  r   r  backed_var_to_val,create_symbolic_sizes_strides_storage_offsetr   r   SymIntr   expr)
rv  exr  sourcer  r  _r   r_sizer_strides
             r|   symbolic_sizes_strides$GraphLowering.symbolic_sizes_strides@  s    ,RWWY79R		:   < $,S1R1R-S,TUF LL	 NRRTAu||!<!<!&&++!CTROUVv!:a#>#>AFFKKAEvV SVs   AD#AD(c                    UR                  5        Vs/ s H  n[        R                  " U5      PM     nnUR                  5        Vs/ s H  n[        R                  " U5      PM     nnX44$ s  snf s  snf )z
Primarily used to weights
)r  r   r   r  )rv  r  r   r  r  s        r|   static_sizes_strides"GraphLowering.static_sizes_stridese  sZ     +-'')4)Qa )4,.IIK8Kq%--"K8| 58s    A, A1c                r   [        U[        R                  5      (       a  UR                  n[        U[        R                  5      (       a  UR                  n[        U[        R
                  5      (       a3  UR                  U R                  ;   a  U R                  UR                     $ UR                  5       $ rw   )	r   r,   rF   datarE   ComputedBufferrL  r*  get_size)rv  r   s     r|   get_allocation_size!GraphLowering.get_allocation_sizeo  s     dBLL))99DdBMM**99DtR..//		T777 --dii88==?"r~   c                    UR                  5       nU R                  U5      nUR                  nUR                  n[	        X4U5      $ rw   )
get_layoutr  r  offsetr   )rv  r   layoutr  r  r  s         r|   get_allocation_storage_size)GraphLowering.get_allocation_storage_size  s?     "''-.tVDDr~   c                r    [        U[        5      (       d   U5       eX R                  [        U5      5      ;   $ rw   )r   r.   r1   r?   )rv  r
   features      r|   has_featureGraphLowering.has_feature  s5    
 '>22;G;233OF4KLLLr~   c                   X4U R                   ;  a  SnU R                  R                  UR                  5      n[	        U[
        R                  5      (       a  SU R                   X4'   g UR                  5       (       a.  U R                  R                  UR                  5       5      (       a(  U(       a  UR                  5       nOUR                  5       nX0R                   X4'   U R                   X4   $ ! [         a     N+f = f)zS
Get the size hint for a dependency with caching to avoid expensive recomputation.
r   )ru  r!  r   rL  r   r,   NonTensorObjhas_unbacked_symbolsr  all_unbacked_explicitly_hinted	get_numelnumbytes_hint
numel_hintKeyError)rv  depcount_bytesresinps        r|   get_dep_size_hintGraphLowering.get_dep_size_hint  s     T%=%==C ##''1C#r//?@((#);<0022}}CCCMMOTT"!//1!nn. <?$$c%78''(:;;   	s   *AC4 C4 4
D Dc                B    U R                   =n(       a  U$ [        S5      e)NzNo current device)rZ  r   rv  r
   s     r|   get_current_device_or_throw)GraphLowering.get_current_device_or_throw  s$    (((6(M233r~   c              #  \   #    U R                   nXl          S v   X l         g ! X l         f = f7frw   )rZ  )rv  r
   r   s      r|   set_current_device GraphLowering.set_current_device  s*     ##$	("'%s   ,! ,),c                L    U R                   (       a  gU R                  (       a  gg)N	inferencebackwardforward)r  r  r  s    r|   get_training_phase GraphLowering.get_training_phase  s    r~   c                  [         R                  (       d  g[         R                  (       a  gU R                  R                   Vs/ s HA  o"R
                  [        R                  R                  R                  R                  L d  M?  UPMC     nnU R                  R                   H&  n[        U5      (       d  M  UR                  U5        M(     [        U5      nUS:X  a  g[        R                  R                  R                   (       aE  [        R                  R                  R#                  5       (       a  [%        S U 5       5      (       a  g[        ['        U R                  R                  5      5      SU-  :  a  [(        R+                  S5        g[-        S U 5       5      (       a  [(        R+                  S5        gSS	 jnSS
 jnSS jnU(       a  [/        [0        5      nU HQ  n	[3        U	5      n
U
c  M  U" U	5      (       a  SnO"U" U	5      (       a  SnOU" U	5      (       a  SnOSnX==   U
-  ss'   MS     [(        R+                  S5        SnSnSnSn[5        UR7                  5       5      nUS   U-  US   U-  -   US   U-  -   US   U-  -   nUU:*  nU(       d  [(        R+                  SUU5        U$ [-        [9        XS5      5      (       a  [(        R+                  S5        g[-        [9        Xc5      5      (       a  [(        R+                  S5        g[%        [9        Xs5      5      (       a  [(        R+                  S5        ggs  snf )zT
Decide if we should enable layout optimization for this graph based on
heuristics.
FTr   c              3     #    U  HF  nS   H<  nUR                   U   R                  S   R                  R                  [        ;   v   M>     MH     g7f)r   r*   valN)rz   r   r
   typerc   r   nr   s      r|   r   2GraphLowering.decide_layout_opt.<locals>.<genexpr>  sJ      #A!C s  '..337OO! P#s   AAi,  z*Skipped layout opt because only a few convc              3     #    U  H4  nS   H*  n[        UR                  U   R                  S   5      v   M,     M6     g7fr  )r   rz   r   r  s      r|   r   r    s?      
 QVVC[--e455 6s   <>zeSee perf regression with dynamic shape. Follow up in https://github.com/pytorch/pytorch/issues/102670c                    U R                   S   R                  S   n[        U[        R                  5      (       d   eU R                   S   S:  =(       a    UR                  S5      S:  $ )Nr*   r  r	  )rz   r   r   r   r   r  )r  meta_vals     r|   
is_grouped3GraphLowering.decide_layout_opt.<locals>.is_grouped  sT    vvay~~e,Hh555566":>:hmmA&6&::r~   c                    U R                   S   R                  S   R                  S5      S-  U R                   S   R                  S   R                  S5      :*  =(       a.    U R                   S   R                  S   R                  S5      S:  $ )Nr*   r  r      rz   r   r  r  s    r|   is_in_out_channel:GraphLowering.decide_layout_opt.<locals>.is_in_out_channel  sv    q	u%**1-1QVVAY^^E5J5O5OPQ5RR 6FF1INN5)..q1A5r~   c                    U R                   S   R                  S   R                  S5      S:*  =(       a.    U R                   S   R                  S   R                  S5      S:*  $ )Nr*   r  r   @   r  r  s    r|   is_small_channel9GraphLowering.decide_layout_opt.<locals>.is_small_channel	  sT    q	u%**1-3 8FF1INN5)..q1R7r~   groupedsmallin_outr   zConv inputs meta not foundg|?5^?gtV?g333333?guV?zhSkipped layout opt in inference because weighted flops indicate slowdown, default: %d, channels last: %dzFSkip layout opt because found grouped convolution with >1 in_channels!zBSkip layout opt because some convolutions have smaller out_channelz>Skip layout opt because all convolution channels are too small)r  r   r   bool)r  r   r   r  )r+   layout_optimizationforce_layout_optimizationr^  r   r   r   r   r   r   r   r   r   r   backendsr   enabledis_availableallrG  logdebugr   r   floatr:   r   valuesmap)rw  r  r  
conv_nodesnconvr  r  r  flop_countsr   counted_flops	node_typeGROUPED_MULTIPLIERDEFAULT_MULTIPLIERIN_OUT_MULTIPLIERSMALL_MULTIPLIERtotal_flopsweighted_flopsdo_layout_opts                      r|   r  GraphLowering.decide_layout_opt  s    ))++ xx~~
%!UYY^^5O5O5W5W)WA~ 	 
 Aa  !!!$   JA: NN!!))%%2244 #   
 tBHHNN#$e3IIBC 

 
 

 IIw 	;
		 ,7,>K" .t 4 (d## )I%d++ 'I&t,, (I )I&-7& #  		67
 "'!& %$k0023K I&);;g&)99:h'*;;< i(+==>  +k9M 		~"
 ! & s:*++IIX  s$122IIT  s#011IIVWg
s   >M	M	c                B    U R                   b  U R                    SU 3$ U$ )z2Prepend the given name with the graph name if any.r  )rL  rv  rL  s     r|   qualify_nameGraphLowering.qualify_namei  s&    99 ii[$((r~   c                    [        U UUU R                  U R                  U R                  U R                  U R
                  U R                  U R                  U5      S9
$ )a  
Make a subgraph of the current graph with all inherited parts, except
the graph module (`gm`) and `example_inputs`.  The subgraphs are lowered
separately and lifted into a separate function in the parent output
wrapper code.  The subgraph name is qualified by the parent graph's
name. Note that the lifting of subgraph is supported for python wrapper
only. For cpp wrapper, we inline the subgraphs in the parent wrapper.
)
parentrw  r  rx  rM  rQ  rA  r  r  rL  )SubgraphLoweringr  rM  rQ  rA  r  r  r  )rv  rw  r  subgraph_names       r|   make_subgraphGraphLowering.make_subgrapho  sZ      )oo((]]#'#>#>**((""=1
 	
r~   c                2   Sn[         R                  R                  R                  R                  /n[
        [           " 5       n[        U R                  R                  R                  5       H  nUR                  [         R                  R                  R                  R                  L a  UR                  U5        Uc  UnMV  UR                  U;   a  Mh  [        U5      (       a  UR                  U5        M  UR                   H  nXS;   d  M
  UR                  U5          M     M     U R                  R                  R                   HK  nUb  XA:X  a    U$ XC;   d  M  UR                   H&  nUR                  U;   a  M  UR                  U5        M(     MM     U$ )a  
The rule to decide if an node prefer channels last is simple.
1. if it's input/output of a convolution
2. if one of its user prefers channels last

We have rule 1 because cudnn runs a faster convolution kernel for channels last inputs;
Rule 2 is also important. It makes sure that indirect inputs to convolution also prefers
channels last.

Consider the scenario: conv -> batch-norm -> relu -> conv
Without rule 2, batch-norm output may use a contiguous layout. That will cause 2 extra copies:
1. the output of batch-norm should be channels last initially since its input is a conv's output.
   Forcing the batch-norm's output to be contiguous results in the first copy
2. The second conv's input is initially contiguous. This layout is propagated from the batch-norm's output.
   We need convert it to channels last layout which results in the second copy.
With rule 2, we makes sure all the tensors in the chain uses channels last layout. So both copies
can be saved.
N)r   r   r   bmmr   r(   r%   r   rf  r^  r   r   r   r   r   users)rv  	last_convnodes_cannot_propagate
output_setr  users         r|   r[  -GraphLowering.find_nodes_prefer_channels_last  sC   & 	"')).."4"4"<"<!=%'
$++++112Axx599>>55===q!$ !Ixx11a  q!%NN1%   3< ""((A$  GGD{{&<< NN4( $ ) r~   c                    XR                   ;  a2  U R                   R                  U5        [        R                  SU5        g g )NzUsing FallbackKernel: %s)r]  r   perf_hint_loginfor  s     r|   warn_fallbackGraphLowering.warn_fallback  s8    ,,,!!%%d+94@ -r~   c                `   U R                   R                  UR                  5        UR                  b%  U R                  R                  UR                  5        [
        R                  R                  (       a8  XR                  ;  a(  [
        R                  R                  U R                  U'   g g g rw   )	r%  r   r  indexr&  rf   r^  rB  rc  r  s     r|   add_device_infoGraphLowering.add_device_info  sw    fkk*<<#  .77F2J2J$J/0ww/C/CD$$V, %Kr~   c                "    [         R                  $ rw   )rf   	fake_moder  s    r|   r  GraphLowering.fake_mode  s    {{r~   c           	        XR                   ;   a  U R                   U   $ XR                  ;   a  U R                  U   $ XR                  ;   ay  [        R                  R                  U   n[
        R                  " U[
        R                  " UR                  UR                  /[        R                  R                  U5      Q76 S9$ g NrL  r  )rF  r!  r/  rf   r^  r,   ConstantBufferr>   r
   dtyper  )rv  buffer_namer  s      r|   try_get_bufferGraphLowering.try_get_buffer  s     ---&&{33+++$$[11..(77$$[1D$$ ~~KK./gg.J.J4.P  r~   c                    [        S5      e)Nz'Should not be called for the main graph)r   )rv  symbols     r|   add_symbol_graph_input$GraphLowering.add_symbol_graph_input  s    DEEr~   c                J    U R                  U5      nUb  U$ [        SU 35      e)Nz$Failed to find buffer matching name )r  r   rv  r  bufs      r|   
get_bufferGraphLowering.get_buffer  s1     !!+.?JA+OPPr~   c                   XR                   ;   a  U R                   U   R                  $ [        U R                  S5      (       a  XR                  R                  ;   aq  U R                  R                  U   nX R
                  ;   a  U R
                  U   R                  5       $ X R                  ;   a  U R                  U   R                  5       $ XR
                  ;   a  U R
                  U   R                  5       $ XR                  ;   a  U R                  U   R                  5       $ [        R                  " SU5      nU(       a   U R                  UR                  S5      5      $ [        SU 35      e)Nmutation_real_namez1(as_strided|reinterpret_tensor)\(([a-zA-Z0-9_]+),r*   could not find )r/  r  r   rV  r#  rF  	get_dtyper!  rematchgroupr  )rv  r  mutated_bufr   s       r|   r%  GraphLowering.get_dtype  s.   ..(>>+.444 DNN$899~~@@@..;;KHK111**;7AACC///((5??AA---&&{3==??+++$$[1;;==HHI;W>>!''!*--677r~   c                V   XR                   ;   a  U R                   U   R                  5       $ XR                  ;   a5  U R                  U   nUR                  5       (       d  gUR	                  5       $ XR
                  ;   a  U R
                  U   R	                  5       $ [        SU 35      e)Nr*   r$  )r/  numelrF  has_tensor_outputr  r!  r  r  s      r|   r  GraphLowering.get_numel  s    ..(>>+.4466---%%k2C((**==?"+++$$[1;;==677r~   c                j   > [        S5         [        TU ]  " U6 sS S S 5        $ ! , (       d  f       g = f)NzGraphLowering.run)r   r
  run)rv  rz   r{  s     r|   r0  GraphLowering.run  s$    -.7;% /..s   $
2c                "   UR                   b
   SU 35       e[        U[        R                  5      (       d   eU R	                  S[        U R                  5       35      nU R                  R                  U5        XR                  U'   X!l         U$ )NzOperation registered twice: r   )	operation_namer   r,   	Operationr  r   r,  r   rI  )rv  r   rL  s      r|   register_operation GraphLowering.register_operation  s      (M,H*MM("bll++++  2c$//&:%;!<=r" " r~   set_namec                  U R                  S[        U R                  5       35      nU R                  R                  U5        XR                  U'   UR                  5       nUb_  [        U[        R                  5      (       a/  UR                  5       (       a  U[        R                  " S5      :X  d  U R                  U5        U(       a  X1l        U$ )Nr  r  )r  r   r+  r   rF  
get_devicer   r,   r  is_zero_elementsr   r
   r  rL  )rv  bufferr8  rL  r
   s        r|   register_bufferGraphLowering.register_buffer&  s      3s4<<'8&9!:;F#$*D!""$ 62#4#455++--ell511   (Kr~   c                h    U R                  SSR                  U5      -   5      nXR                  U'   U$ )Nlist_r  )r  r   rC  )rv  operation_namesrL  s      r|   register_operation_list%GraphLowering.register_operation_list:  s1      388O+D!DE*

4r~   c                *   ^ ^ SUU 4S jjmT" U5        g )Nc                  > [        U [        [        45      (       a  U  H  nT" U5        M     [        U [        R                  5      (       a6  U R                  5        H!  nTR                  U   R                  U 5        M#     g g rw   )r   rG  tupler,   rF   get_read_namesrH  r   )valuex	read_nameregisterrv  s      r|   rK  1GraphLowering.register_users_of.<locals>.register@  sg    %$//AQK %..!&!5!5!7I&&y188? "8 /r~   )rH  Iterable[ir.IRNode] | ir.IRNoder   Nonerx   )rv  node_outputrK  s   ` @r|   register_users_ofGraphLowering.register_users_of?  s    	@ 	@ 	r~   c                    [        U[        5      (       d   eU R                  R                  U5        XR                  ;  a  gU R                  U    H  nUR                  5         M     g)zz
When a buffer is mutated we need to make sure all the reads to
the old version are realized before the mutation happens.
N)r   strr9  r   rH  realize)rv  rL  r  s      r|   mark_buffer_mutated!GraphLowering.mark_buffer_mutatedJ  sX    
 $$$$$  &)))&&t,DLLN -r~   c                   XR                   ;   a  XR                  ;   d
   SU-   5       e[        U R                   U   5      nX R                  R                  ;   a  U R                  R                  U   $ U R                  U   $ )z
In AOTI, module buffers may have been mutated during the tracing and compilation.
Thus we need to read from previously stored original buffers, to make sure the
generated model.so uses correct initial values.
z$Can not find the original value for )rg  r/  rZ   rf  r   )rv  rL  	orig_names      r|   get_original_value_of_constant,GraphLowering.get_original_value_of_constantX  s     3338N 	
2T9	
N 5T5Q5QRV5WX	 KK,,, KKY'	
 %	
r~   c                   [         R                  R                  (       d7  U R                  R	                  5        H  u  p4[        X$5      (       d  M  Us  $    Uc  S[        U R                  5       3nUnUS   R                  5       (       a  SU 3nU R                  U5      n[        U5      nUnSnXR                  ;   a  U SU 3nUS-  nXR                  ;   a  M  X R                  U'   UR                  < SUR                  < S[        UR                  5       5      < S[        UR                  5       5      < S[        U5      S 3	U R                   U'   XPR"                  U'   U$ )Nconstantr   	constant_r  r*    rI  )r+   aot_inductoruse_runtime_constant_foldingr/  itemsr^   r   isdigitr  r`   r
   r  rF  r  r  hashr5  rg  )rv  rL  r  constant_namerH  rX  prefixcnts           r|   allocate_non_dup_const_name)GraphLowering.allocate_non_dup_const_nameh  sM   ""??(,(<(<(>$!$..(( )? <c$..123D	7??tf%D  &  %nn$XQse$D1HC nn$  $t{{oQtzznATYY[!$AeDKKM&:%=QDz!n 	D!
 .7$$T*r~   c                    U R                  X!5      n[        R                  " [        R                  " U[        UR                  UR                  /U R                  U5      Q76 S95      $ r  )	rg  rF   creater,   r  r>   r
   r  r  )rv  r  rL  new_names       r|   add_tensor_constant!GraphLowering.add_tensor_constant  s^    33D?"KK.2.G.G.M
 	
r~   c                  ^ U R                   T   R                  U:X  d  Uc  T$ [        R                  R                  R                  5          U R                  T SUR                   UR                  =(       d    S 3U R                   T   R                  U5      5      nX0R                   ;   d
   U S35       e[        U4S jU R                   5       5      (       a  U R                   U   U R                  U'   [        U4S jU R                   5       5      (       a  U R                   U   U R                  U'   UsSSS5        $ ! , (       d  f       g= f)z
We AOT copy constants to the devices they are needed on.
If device_override doesn't match the constant's device, then
copy it and return a different name.
Nr  r   z' should be in V.graph.constants alreadyc              3  @   >#    U  H  nT[        U5      :H  v   M     g 7frw   r`   )r   r  rL  s     r|   r   .GraphLowering.constant_name.<locals>.<genexpr>  s"      #5K {33#5   c              3  @   >#    U  H  nT[        U5      :H  v   M     g 7frw   rp  )r   
param_namerL  s     r|   r   rq    s"      "7J z22"7rr  )r/  r
   r   utils_python_dispatch_disable_current_modesrg  r  r  tor   r0  r1  )rv  rL  device_overridenon_dup_const_names    `  r|   rd  GraphLowering.constant_name  sE    >>$&&/9_=TK[[))@@B "&!A!A&/../0E0E0J/KLt$''8"
 &7 %&&MN7  #'#5#5   :>&:""#56  "&"7"7   =ANN&=%%&89 &= CBBs   C-E
Ec                H
  > U =R                   S-  sl         [        TU ]	  XU5      nU R                  U5      n[	        U[
        5      (       av  [        R                  R                  (       d  [        UR                  5      nOUR                  R                  nXPR                  U'   U R                  R                  U5        U$ [	        U[        [         ["        45      (       aA  [$        R&                  " U5      nXPR                  U'   U R                  R                  U5        U$ [	        U[(        5      (       a4  [+        XS9nX`R                  U'   U R                  R                  U5        U$ Uc  U R                  R                  U5        g [	        U[,        5      (       a  U R                  R                  U5        g [	        U[.        R0                  5      (       Ga  [3        [        R                  R4                  R6                  5      S:X  a  [9        [;        [        R                  R4                  R6                  5      5      R<                  [.        R>                  R@                  RB                  [.        RD                  RF                  RH                  4;   d   e[J        RL                  " XRN                  S9nXpR                  U'   U R                  R                  U5        U$ [Q        [S        U5      5      (       a?  [J        RT                  " XS9nXR                  U'   U R                  R                  U5        U$ [	        U[.        RV                  5      (       d   U5       eURX                  (       d  U R[                  U5      u  pOU R]                  U5      u  pU R                  (       ai  U R^                  (       aX  U R                   U R^                  ;   a>  [`        Rb                  " [e        U[g        URN                  URh                  X5      S95      nO=[`        Rb                  " [k        U[g        URN                  URh                  X5      S95      nXR                  U'   U R                  R                  U5        URl                  Rl                  U Rn                  U'   U R4                  R6                  (       a  U Rq                  URN                  5        [s        5          [u        U5      (       d  U Rv                  Ry                  U5        S S S 5        U$ ! , (       d  f       U$ = f)Nr*   rL  rH  )rL  r
   r  )=rs  r
  placeholderr  r   r$   rf   r^  r  r   r   r  r!  r   r   intr  r  r   sympifyr   rG   r   r   	Generatorr   rB  r  rS  iterr   _prims	rng_primsgraphsafe_run_with_rng_stater   higher_orderinvoke_subgraphr,   GeneratorStater
   r   r  OpaqueObjectStater   _has_symbolic_sizes_stridesr  r  rt  rF   rj  r=   r>   r  rA   r  r"  r  r_   ra   rj  r   )rv  r   rz   r{   exampler  r   gen
opaque_objsizesstridestensorr{  s               r|   r~  GraphLowering.placeholder  s    	!'%fF;""6*gx(( 77&&,W\\:||(((,f%""))&1K#tU!344==)D(,f%""))&1K!122!v=C(+f%""))&1J_""))&1g}-- ""))&111qww++112a7DQWW))//0=f&&CC		&&66=   ##GC(+f%""))&1J%d7m44--6IJ(2f%""))&1'5<<009'90
 22!66w?NE7!88ANE $$$$(<(<<%%&w~~w}}eUF %%&w~~w}}eUF %+&!%%f--3[[-=-=""6*""  0 12.w77&&**62 3  32 s   ,T
T!c           	     	  >^ T[         R                  L a3  [        US   [        [        [
        45      (       a  [        TU ]  TX#5      $ [        T[        R                  R                  5      (       d  [        TS5      (       a  T" U0 UD6$ T[        ;  Gau  [        T[        R                  R                  5      (       d
   T S35       eTR                  5       R                  S5      S   nU[         ;   a  [#        TSU R$                  SS9  O[&        R(                  (       a  [+        T/5      (       a  [,        O[.        n[0        R3                  SUR5                  TX#5      5        [7        TSS	9nUcF  [        R8                  R:                  R=                  T5      (       a  U R>                  (       a  [@        nO[7        TSS	9n[C        U5      n[#        TUU R$                  S
9  O)[+        T/5      (       a  [-        TX#5      e[/        TX#5      e [0        RE                  S[        T   5        U RF                  n	[I        T5      n
U
(       a  X#pU
[J        L a  SU	RL                  ;   ap  U	RL                  S   u  p[        T[        R                  R                  5      (       d   eSU4S jjnU" X5      u  pU" X#5      u  p#U" X5      u  p[K        X#X5      u  p#OU
" U	/UQ70 UD6u  p#SU	RL                  ;   a  [O        TSS9" U0 UD6nOS nT[P        ;   ab  T[R        RT                  ;  aN  [R        RT                  RW                  T5         [P        T   " U0 UD6n[R        RT                  RY                  T5        Uc*  T[        ;   a  [        T   " U0 UD6nO[O        TSS9" U0 UD6nU
(       a  U R[                  U	WWX#5        U$ ! [R        RT                  RY                  T5        f = f! [\         a  nS n[        U S5      (       ae  U RF                  bX  [        U RF                  S5      (       a=  U RF                  RL                  b&  U RF                  RL                  R_                  SS 5      n[a        UTX#US9Rc                  URd                  5      S eS nAff = f)Nr   _inductor_lowering_functionz is not an OpOverloadr   FT)warnr  override_decompz"Creating implicit fallback for:
%s)with_default)layout_constraintr  z  via %seager_input_valsc                v   > [         R                  R                  R                  TX5      nUc   eUS   US   4$ )Nr   r*   )r   r   operator_schemasnormalize_function)rz   r{   r   r   s      r|   	normalize.GraphLowering.call_function.<locals>.normalize  sC    %*XX%>%>%Q%Q &&F $*#55#5#)!9fQi#77r~   should_fallbackadd_to_fallback_setrB  r   stack_trace)r  )rz   r   r{   r   r   ztuple[Any, Any])3operatorgetitemr   rG  rF  dictr
  r   r   r   OpOverloadPacketr   rM   r   rL  r   rJ   rN   r  r+   implicit_fallbacksr   r8   r9   r  r  operator_strr   _libraryru  
is_builtinr  rQ   rR   r  rB  rO   rH   r   rK   rT   rf   active_user_lowering_opsr   discardpropagate_mutation	Exceptionr   r7   with_traceback__traceback__)rv  r   rz   r{   	base_nameerrortagdecided_constraintdefault_tagr  layout_constraintsold_args
old_kwargs	fake_argsfake_kwargsr  outer  r{  s    `                 r|   r   GraphLowering.call_function&  sS   X%%%*T!WtUD>Q*R*R7(>> &%**"="=>>71D
 D
 4*6**"fejj&;&;<< (/0< ++C03I//"&"4"4$(	 ** *6(33 .5 
 9&&vt<
 ,E, K,,77??(( + ' 1JT1K *B+)N&&8"&"4"4 $VH-- 0EE264HHS	8IIj)F"34!!A!9&!A!'+*%)BB
 *QVV3128J1K.	  *&%**2G2GHHHH8 2;91R.	'0'>/8/N,'@ )( $6a#I$#I&#ILD AFF*&v5I#  n,a&@&@@ ..226:C,V4dEfE22::6B ;*'/@@ /v5Q!%+ " ''8ZNJ% 22::6B&  	8Kn--%%1D--v66%%**6"//4488M#64[nQ__-48	8s3   D5O N: A(O :!OO 
R(BRRc                d    [        U R                  5      S:H  =(       a    U R                  S   S:*  $ )z=
True if this is a small constant attr that will be inlined.
r*   r      )r   shape)ts    r|   can_inline_constant!GraphLowering.can_inline_constant  s(    
 177|q 4QWWQZ1_4r~   c                b   [        U R                  U5      n[        U[        R                  R
                  5      (       aB  XR                  ;   a  U R                  U   $ [        R                  " XS9nXPR                  U'   U$ [        U[        R                  R                  5      (       a&  X@R                  U'   SU R                  U'   [        XS9$ [        U[        5      (       a&  X@R                  U'   SU R                  U'   [        XS9$ [        [!        U5      5      (       a&  X@R                  U'   SU R                  U'   [        XS9$ [        U[        R"                  5      (       d   e[$        R&                  R(                  (       d4  [$        R*                  (       d  [-        U5      (       d  XR.                  ;   a  U R1                  XA5      $ [3        5          UR4                  S:X  a6  [7        UR9                  5       UR:                  UR<                  S9sS S S 5        $ U R?                  U5      (       aO  [@        RC                  SU5        SSK"J#n  U" URI                  5       UR:                  UR<                  S	9sS S S 5        $  S S S 5        U R1                  XA5      $ ! , (       d  f       N= f)
N)rL  graph_moduler  r}  rx   )rH  r  r
   zInlining constant: %s r*   )r  )r  r
   )%r   rf  r   r   r   rm   r4  r,   Subgraphr   ScriptObjectr2  r5  rG   r   r   r  r   r+   r_  r`  always_keep_tensor_constantsrS   r  rl  r'   r  r<   itemr  r
   r  r  r  loweringr  tolist)rv  r   rz   r{   rH  r  r  s          r|   get_attrGraphLowering.get_attr  s    "$++v6eUXX1122,,,**622++6>C*-'JeUXX2233/4$$V,*,D'"<</00/4$$V,*,D'"<<DK((/4$$V,*,D'"<<%....<<22(//333++E::]{{b **,ekk%,, ]
 ''..		2F;,ellnEKKU ]
 /  ''66 ]s   =J !AJ  
J.c                    [         erw   AssertionErrorrv  r   rz   r{   s       r|   call_moduleGraphLowering.call_module      r~   c                    [         erw   r  r  s       r|   call_methodGraphLowering.call_method  r  r~   c                	  > [         TU ]  XU5      n[        U[        [        45      (       d  U4n[        U[        [        45      (       d   [        U5      5       eU Vs/ s H2  n[        [        U5      5      (       a  [        R                  " US9OUPM4     nn[        S U 5       5      (       d   U5       e[        R                  R                  R                  S   n[        U[        [        45      (       d  U4nU Vs/ s H"  n[        R                  R                  U5      PM$     nn/ n[!        U5      [!        U5      :X  d   e[#        XF5       GH\  u  p[        U[        R$                  [        R&                  45      (       d  UR)                  U5        MH  [        UR+                  5       [        R,                  5      (       a0  UR)                  [        R                  R/                  U5      5        M  [0        R2                  R                  R5                  U5      (       d   eU	R6                  S   R9                  5        V
s/ s H:  n
[        U
[0        R:                  5      (       a  U
R<                  R>                  OU
PM<     nn
UR)                  [        R@                  " X5      5        GM_     Xpl!        U RD                  RG                  5        GH  u  p[        U[H        [J        RL                  [0        R2                  R                  RN                  [0        R2                  R                  RP                  45      (       a  Ms  [        U[$        5      (       d   S[        U5       35       eURS                  5         [        U[$        5      (       d   eURT                  n[        U[        RV                  5      (       d   eUnURT                  n[        U[X        5      (       a  UR[                  5       U:w  d  GM)  [        R\                  R_                  XR`                  U   5         U RB                  Rc                  U5      nU R`                  U   U RB                  U'   GM     U Rg                  5         [h        Rk                  SU Rl                  U Rn                  b  U Rn                  5        g S5        g s  snf s  snf s  sn
f ! [d         a     GM  f = f)NrH  c              3    #    U  H  n[        U[        [        R                  [	        S 5      [        R
                  [        R                  [        R                  R                  R                  [        [        R                  [        R                  [        [        R                  [        R                   [        R"                  45      v   M     g 7frw   )r   rF   r,   r<   r  r  r   r	   logicboolalgBooleanr  EffectfulKernelrD   rG   OpaqueMultiOutputOpaqueValueTypeConstantr  )r   rI  s     r|   r   'GraphLowering.output.<locals>.<genexpr>  s      
& % KKJ%%JJKK''//&&,,#((..(( $ s   CCr   r  z'Unsupported inductor graph input type: zGForce channels last inputs for %d conv for the current graph with id %dr	  )8r
  r   r   rF  rG  r  r   r,   r  r  rf   r^  rB  rz   ExternKernelrealize_inputr   ziprF   BaseViewr   get_output_specCommBufferLayout
copy_inputr   	_inductoris_storage_and_layoutr   r  r  r   r  try_match_insignificant_stridesr  r!  ra  rG   r   Basicr  r  rT  r  rE   rA   get_nameMutationLayoutSHOULDREMOVErealize_intor"  r  
ValueErrorfinalizer  r  r  rR  )rv  r   rz   r{   r   rI  fx_node_argsresult_correct_stridesrfx_nodesmeta_stridesrL  rH  value_storage_boxindr{  s                   r|   r   GraphLowering.output
  s    f5&5$-00YF&5$-00>$v,>0 
 4HQ3P3PB&&Q/VWW 	 
  
& '
 
 
 	( )	 
, ww++003,66(?L<BCFq"////2FC!#< CK///f3JAa",,!<==&--a0A--/1D1DEE '--boo.H.H.KL ))??BBBB %\\%0779  : $.a#>#>AFFKKAE9   
 '--66qG% 4, 4,,224KD#KKOO&&55OO&&88	  eY// 9$u+G/ MMOeY////JJEeR]]3333 %JJEe[11U^^5E5M--::55d;,,223DEC.2.H.H.ND&&s+9 5@ 			U''!]]6DMM	
 =?	
w
< D" P " s%   9R6)R;AS *7S
SSc                J    U R                    H  nUR                  5         M     g rw   )r+  decide_layout)rv  r  s     r|   r  GraphLowering.finalizew  s    <<C  r~   c              #  \   #    U R                   n Xl         S v   X l         g ! X l         f = f7frw   )rB  )rv  r   olds      r|   set_current_nodeGraphLowering.set_current_node{  s*     	$ $ #s   ,
! ,),c              #  P   #    U R                   n S v   Xl         g ! Xl         f = f7frw   r>  )rv  r  s     r|   set_current_wrapper_code&GraphLowering.set_current_wrapper_code  s$     	$ #s   & &#&c                  ^  [        U5      [        U5      :X  d   e[        U5      [        U5      :X  d   eUR                  [        R                  R                  R
                  L Ga  UR                  S   n[        U[        5      (       d   e[        R                  R                  R                  US   US   UR                  5        VVs0 s H@  u  pxU[        U[        R                  R                  5      (       a  UR                  S   OU_MB     snnUS   5      n	U	 HY  n
US   U
   nUS   U
   nXL a  M  T R!                  [        R                  R"                  R$                  R&                  X40 5        M[     g[        UR                  [        R(                  R*                  5      (       d   e        SU 4S jjnUR                  R,                  n[/        [1        X$5      5       H   u  nu  pUR2                  U   nU" UX5        M"     UR2                   Vs0 s H  nUR4                  U_M     nnU H  nUU   nUU   nUU   nU" UX5        M     gs  snnf s  snf )	aP  Propagate mutations on new_args/new_kwargs back to old_args/old_kwargs.

Assumes we may have cloned old_args/old_kwargs into new_args/new_kwargs
and then called fx_node(*new_args, **new_kwargs).

If fx_node mutates any of new_args/new_kwargs, and they are different from
old_args/old_kwargs, then we need to update the original tensor.
r{   
kernel_idxconstant_args_idxr  tma_descriptor_metadataNc                b  > XL a  g U R                   b  U R                   R                  (       a  [        U[        R                  5      (       a  U4nU4n[        X5       HK  u  p4X4L a  M  TR                  [        R                  R                  R                  R                  X440 5        MM     g g g rw   )
alias_infois_writer   r,   IRNoder  r   r   r   r   copy_r   )
schema_argold_argnew_argold_arg_itemnew_arg_itemrv  s        r|   maybe_propagate9GraphLowering.propagate_mutation.<locals>.maybe_propagate  s     !$$0Z5J5J5S5S gryy11&jG&jG25g2G.L#3 &&		,,44|6RTV 3H 6T0r~   )r  ztorch._C.Argumentr  	ir.IRNoder  r  r   rN  )r   r   r   r   r  triton_kernel_wrapper_mutationr{   r   r  r   r   get_mutated_tensorsra  r   r%   r   r   r   r  r   r   r   _schemar   r  	argumentsrL  )rv  r  r  r  new_args
new_kwargsr{   ry  rz  mutatedrL  r  r  r  schemar   r  argschema_kwargskeys   `                   r|   r   GraphLowering.propagate_mutation  sA     8}H---:#j/111>>UYY33RRR^^H-Ffd++++--@@TT<(./ !' . 
1ehhmm(D(Dqvve}!K . 45G  $X.t4$X.t4%""599>>#7#7#?#?'ASUWX   '..%***?*?@@@@	)	4=	HQ		( '''0X1H'I#C#'))#.JJ9 (J 392B2BC2B332BCC oG oG&s+JJ9	 YT Ds   AI I!c                X    U R                   R                  S0 5      R                  S5      $ )z:Get the user-annotated stream index from FX node metadata.customstream)r   r   r  s    r|   _get_node_streamGraphLowering._get_node_stream  s$     vvzz(B'++H55r~   c                   U R                  U5      nUR                   Hg  nU R                  U5      nXB:X  a  M  U R                  R                  U5      n[	        U[
        R                  5      (       d  MW  UR                  5         Mi     g)a.  Realize IR inputs that are on a different stream.

Without this, pointwise ops across stream boundaries would be inlined
into each other during lowering, making it impossible for the scheduler
to split them into separate kernels.

None means the default stream, so it is compared like any other value.
N)r%  r   envr   r   r,   rF   rT  )rv  r  node_stream
input_nodeinput_streamir_values         r|   $_realize_inputs_at_stream_boundaries2GraphLowering._realize_inputs_at_stream_boundaries  sl     ++A.++J00<L*xx||J/H(BLL11  " ,r~   c                %  >^ ^^#^$^%^&^' S'U4S jjn      S(U%U&U 4S jjnSSK Jn  [        T R                  5      m$[        T R                  5      m'[        T/5      nTR                  S:H  nU(       a2  T R                  T5      u  pxU[        Xx5      -  nT R                  T5        [        R                  R                  U5         [        R                  R                  T R                  T5      5         T R                  T5         [         R                  " T5         TR                  S:X  a  TR"                  (       a  [%        TR"                  [&        R(                  R*                  5      (       a  [&        R,                  R.                  R1                  TR"                  5      (       aP  [3        T5      (       d  UR5                  SSU4S j5      (       a$  U" S	5        [7        TR"                  S
S9" W0 WD6n	GOXTR                  S:X  a  [%        TR"                  [&        R(                  R*                  [&        R(                  R8                  45      (       a4  [;        T5      (       a$  U" S	5        [7        TR"                  S
S9" W0 WD6n	GOTR                  S:X  a  TR"                  [&        R<                  R>                  R@                  L a  [B        RD                  S:w  a  U" S5        [B        RD                  S:X  a  Wn
WnTRF                  RI                  S5      =n(       a  US   nUS   n[K        UUUU5      u  pxO[M        T/UQ70 UD6u  pxT RO                  TR"                  Xx5      n	T RQ                  TXXx5        O[S        S[B        RD                   35      e[U        TR"                  5      (       a  U" S5        [%        TRF                  S   [&        RV                  [&        RX                  [&        RZ                  45      (       a$  TRF                  S   R\                  R^                  n	O'[`        T(T ]  T5      n	OU" S5        [`        T(T ]  T5      n	[&        R<                  Rd                  Rf                  Rh                  [&        R<                  Rd                  Rj                  Rh                  [&        R<                  Rd                  Rl                  Rh                  [&        R<                  Rd                  Rn                  Rh                  [&        R<                  Rd                  Rp                  Rh                  /m#[s        S TRt                   5       5      nTT Rv                  ;   m&[s        U#4S jTRt                   5       5      m%TRF                  RI                  SS
5      (       a  [%        U	[x        5      (       a  U	R{                  5         TRF                  S   R}                  5       n[&        R~                  R.                  R                  " U6 nU	R                  5       U:w  a=  U(       d6  [        R                  " U5      n[        R                  R                  U	U5      n	U(       aN  [%        U	[x        5      (       a9  [%        U	R                  [        R                  5      (       a  U	R{                  5         U(       d  T%(       Gad  [%        TRF                  RI                  S5      [&        R                  5      (       Ga+  T&(       a  T Rv                  RI                  T5      nOTRF                  S   R}                  5       nUGb  [        U5      S:  Ga  [B        R                  =(       d    T&(       + =(       a    T%(       + n[&        R                  R                  TRF                  S   5      n[        [        U5      5      S:  nU(       d~  U(       aw  [        U	R                  5       5      S:X  aZ  TT R                  ;   aJ  T&(       dC  T%(       d<  [        R                  R                  U	R                  5       [&        R                  5      nU(       d  [        U5      (       a  TRF                  S   R                  5       (       d)  [%        U	R                  [        R                  5      (       a4  [        R                  R                  U	[        R                  " U5      US9n	OM[        U	R                  5       5      S:X  a  [        U5      S:  a  / n[        R                  R                  U	UUS9n	[        [        TRt                  5      5      nUS:  Gax  [%        U	[x        5      (       Gab  TRt                   GHD  nUR"                  [        ;   Ga  U	R                  5         [&        R<                  Rd                  R                  Rh                  [&        R<                  Rd                  R                  Rh                  [&        R<                  Rd                  R                  Rh                  /n/ nT R                  (       d=  UR                  [&        R<                  Rd                  R                  Rh                  5        [&        R                  R                  (       Ga  U[&        R<                  R                  R                  Rh                  [&        R<                  R                  R                  R                  [&        R<                  Rd                  R                  Rh                  [&        R<                  R                  R                  Rh                  [&        R<                  R                  R                  R                  [&        R<                  R                  R                  R                  [&        R<                  R                  R                  R                  /-  nU[&        R<                  R                  R                  Rh                  [&        R<                  R                  R                  R                  [&        R<                  R                  R                  R                  [&        R<                  R                  R                  Rh                  [&        R<                  R                  R                  Rh                  [&        R<                  R                  R                  R                  /-  n[&        R                  R                  (       a2  U[&        R<                  R                  R                  Rh                  /-  nUR"                  U;   aN  [        R                  R                  U	[        R                  " TRF                  S   R}                  5       5      SS9n	UR"                  U;   af  TUR                  S   L aT  [        R                  R                  U	[        R                  " [        TRF                  S   R                  5      5      5      n	UR                  S:X  d  GM  [%        U	R                  R                  [        [        45      (       d  GM  [B        R                  (       a@  U	R                  R                  5       S:X  a"  U	R                  R                  5       (       d  GM4  U	R{                  5         GMG     U	R                  n[%        U[        5      (       d  [%        U[        R                  [        R                  45      (       aR  UR                  n[%        U[        5      (       d1  [%        U[        R                  [        R                  45      (       a  MR  [%        U[        5      (       a2  UR                  [        TRt                  5      5      (       a	  U" U	T5      n	U	R                  [        TRt                  5      5        [%        U	[x        5      (       a.  U	R                  5       (       a  U" U	T5      n	U	R                  5         [%        U	[x        5      (       an  [%        U	R                  [        5      (       aO  U	R                  R                  n[%        U[        5      (       a$  UR                  SS9(       a  U	R{                  5         SSS5        SSS5        SSS5        SSS5        [        W	T5        T R                  U	5        [
        [        R                     " 5       nT R                  T$S  H  nUUR                  5       -  nM     T R                  T'S  H  nUUR                  5       -  nM     [         R                  GR                   GR                  nTR                  S:X  aH  [%        U	[        R                  5      (       a)  UGR                  U	5      (       a  UGR                  U	5        S)U$U'U 4S  jjn [         R                  GR                  (       a  TR                  S:X  a  U	$ G[        [         R                  GR                   GR                  TRF                  RI                  S!0 5      5      n!U!c   e[        S" U! 5       5      n"UU":  d'   S#U S$U" S%TGR                  5        S&U " 5        35       eT GR                  TU5        U	$ ! , (       d  f       GN= f! , (       d  f       GN= f! , (       d  f       GN$= f! , (       d  f       GN.= f)*z4Lower and execute a single FX node into Inductor IR.c                Z   > [         R                  S[        TR                  5      U 5        g )Nzlowering %s %s)r  r  r   format_node)msgr  s    r|   r  %GraphLowering.run_node.<locals>.debug  s    II&
1==(A3Gr~   c           	       > [         R                  R                  UR                  S   5      nUR                  S   R	                  5       n[        [        U5      5      S:  nU(       d  U(       a  [        U R                  5       5      S:X  ar  UTR                  ;   ab  T(       d[  T(       dT  [        R                  R                  U [        R                  " [        UR                  S   R                  5      5      5      n U $ )Nr  r      )r   _prims_common%is_non_overlapping_and_dense_or_falser   r  r   r   r  r\  r,   r  require_stride_orderget_stride_orderr   r  )r   r  denser  unbacked_symbols_in_stridesis_input_for_as_stridedis_user_visiblerv  s        r|   &maybe_apply_channels_last_stride_orderFGraphLowering.run_node.<locals>.maybe_apply_channels_last_stride_order  s     ''MMuE ffUm**,G*-.CG.L*MPQ*Q'/)*a/888'/==''6qvve}7J7JK Mr~   r   )CompilerBisectorr   inductorrM   c                    > [        T 5      $ rw   )reprr  s   r|   <lambda>(GraphLowering.run_node.<locals>.<lambda>*  s	    ar~   rK   Fr  flexible_layout-user_defined_triton_kernel_layout_constraintsneeds_fixed_stride_orderr  r*   z1Unknown triton_kernel_default_layout_constraint: r   r  r  c              3  >   #    U  H  oR                   S :H  v   M     g7f)r   Nr   )r   r  s     r|   r   )GraphLowering.run_node.<locals>.<genexpr>~  s     DGDGGx/Gs   c              3  @   >#    U  H  oR                   T;   v   M     g 7frw   )r   )r   r  as_strided_opss     r|   r   rJ    s      *:A$~-'rr  inductor_realize_to_stridesNr5  )allow_paddingTr   d   )	thresholdr~  c                    > TR                   TS   V s/ s H  n SU R                  5        SU  S3PM     nn UR                  S TR                  TS   5       5        SR	                  U5      $ s  sn f )Nunbacked_symbol_defs= in:

c              3  P   #    U  H  nS UR                  5        SU S3v   M     g7f)rR  rS  rT  N)get_unbacked_symbol_defs)r   r   s     r|   r   BGraphLowering.run_node.<locals>.format_new_defs.<locals>.<genexpr>O  s1      ?B ((C(C(E'FfRDPRS?s   $&z***
)r+  rV  extendr,  r   )r  r  buffer_watermarkoperation_watermarkrv  s     r|   format_new_defs/GraphLowering.run_node.<locals>.format_new_defsJ  s      <<(8(9::C ((D(D(F'GvcURTU:   HH //*=*>?  <<?"s   !A,unbacked_bindingsc              3     #    U  H8  n[         R                  R                  R                  R	                  X5      v   M:     g 7frw   )rf   r  rx  unbacked_renamingsr   )r   r  s     r|   r   rJ    s5      /
& KK!!4488>>&s   A Azfailed  >= z (inductor >= fx)
fx node is: z
new operations are:

)r2  rS  r   rN  )r   r  r  r   r   r  r   rS  )!torch._inductor.compiler_bisectorr@  r   r+  r,  r(   r   fetch_args_kwargs_from_envrY   r-  r,   r  current_originscurrent_stream_idxr%  r  rf   r   r   r   r   r   r  ru  r  rL   disable_subsystemrK   HigherOrderOperatorrb   r   r  r  r+   'triton_kernel_default_layout_constraintr   r   rH   rI   r   r  r   r   r  SymFloatSymBoolr   r  r
  run_noder   
as_stridedr   as_strided_as_strided_scatterresize	resize_asr   r  r   rF   rT  r  r  any_is_symbolicmaybe_get_strider9  r  r8  r  r  r   r   r6  r7  r   r  r\  FlexibleLayout stride_ordered_for_memory_formatchannels_last_is_viewrequire_exact_stridesrP   realize_hintr   mm_int_mmr  r   r   r   _has_mkldnnr   _linear_pointwiser   mkldnn_rnn_layeronednnqlinear_pointwiser  binary_tensorr   r    _convolution_transpose_pointwiseqconv_pointwiseqconv2d_pointwisehas_mklmkl_mkl_linearrz   r   r  rB   rC   delay_realize_cheap_outputs	num_readshas_large_inner_fnrE   
MutableBoxshould_realize_on_reuse
mark_reusehas_exceeded_max_readsr;   rP  r   r   rV  r^  r  rx  is_unbacked_symintr   r  r    r1  create_deferred_runtime_asserts))rv  r  r  r>  r@  originsis_call_functionrz   r{   r   r  r  r  inp_args
inp_kwargs	is_outputr  sym_stridesstride_orderrN  r:  r;  	num_usersr  need_fixed_layoutneed_fixed_channels_last_layout_datacurrnew_unbacked_defsr  r   rx  r[  r]  renamed_unbacked_bindingsrL  rY  r<  r=  rZ  r{  s)   ``                                 @@@@@r|   rk  GraphLowering.run_node  s   	H		"/		 	0 	Gt||,!$//2 $.qc?44?2::1=LD~d33G55a8II%%g.II(()>)>q)AB!!!$q! 'HHqxx)>)>??NN((33AHH==9!<<'99"K 
 ())!((N
 'HHuzz44ejj6T6TU  /q11 ())!((N
 'HH		 6 6 U UUBBFWWEFBB12  $H!'J+,66::6H+II'I#3A#6%5a%8
'@ "$&(f (?q'R4'R6'R!//$GF++AxTR&KFLzLzK{|  !** '(FF5MELL%..%--#P  VVE]//44F"W-a0Fb	)!, 		))11		**22		1199		%%--		((00N DAGGDDI4#C#CCO&) *:;''* '# vvzz7??J	E E  &&-..0#oo33CCWM**,7#%#6#6w#?L__AA&,WFvy11v{{BKK88  4*

5!5<<; ; #">>BB1EGffUm224G&3w<!+;**A/.A%655 " "//UUuE 1':;a? 0 8! 12a7!@!@@ / 7"$"3"3"T"T"OO-u/B/B# 73w<< 66%=1133z"KKKK8 8 &(__%I%I & " 3 3G <.; &J &F  #6??#45:s7|a?O*,%'__%J%J &} &K &F Jqww/0I1}FI!>!>GGD{{&;;++- "IINN??GG!IINN--55!IINN22::-)
 ;=7#-44UYY^^5O5O5W5WX 88///- %		 0 0 B B J J %		 0 0 B B I I %		 ? ? G G %		 0 0 B B J J %		 0 0 B B I I %		 0 0 B B I I %		 0 0 B B P P2 - < %		 0 0 G G O O %		 0 0 G G N N %		 0 0 H H O O %		 0 0 Q Q Y Y %		 0 0 @ @ H H %		 0 0 B B I I@ ;  %xx// 1eiimm6O6O6W6W5X X 1;;*;;%'__%I%I & " 3 3AFF5M4H4H4J K.2 &J &F !KK+JJ !TYYq\ 1%'__%I%I & " 3 3$B166%=CVCV$W!"&F ww(*%fkk&6&6I8NOO !' B B$*KK$9$9$;q$@(.(F(F(H(H ("NN,S $V $UJ77JBKK7= = "JJE %UJ77JBKK7= =
 eZ00U5R5RL6 6 DFANF !!#agg,/ &),,1N1N1P1P @J##% &),,FKK1T1T{{''dI.....=(k " % C /t 	61%v&&u||46<< 0 12C!=!=!?? 3//"5"67B!<!<!>> 8 GG$$..	
 DDM!65<<00,,V44!!&)		# 		# 77144=#8M. 6GG&&

3F(K
 !,,, %/ /
&/
 %
!
 !$== 	
'(-F,G H==?+ ,&&5&7%8:	
=
 	,,Q0ABY "! %$ CB /.s   /AK3AJ1AJs4AJ	-AJ	D
AJ	D1AJ	BAJBAJ1BAKJ
AJJAJJ
AJ.J)AJ1J1
AK 	J;AKK
AKc                  ^  [         R                  (       a  g S	U 4S jjn[        5       (       a|  UR                  [        R
                  R                  R                  R                  L aA  T R                  (       a0  T R                  U5      u  pEUS   S:w  a  U" US   US    S35        g g T =R                  U-  sl        [        R                  R                  R                  nU GH[  nT R                   R#                  U/ 5      nUR$                  U   n	UR'                  5       R)                  U	5      (       d{  S
S jn
U
" U	R*                  5      (       a$  U" XyR*                  :  U SU	R*                   35        U
" U	R,                  5      (       a$  U" XyR,                  :*  U SU	R,                   35        U H  n[/        UR0                  5      nUT R                  -
  nU(       a;  [3        U[4        S9nT R                   R7                  U/ 5      R9                  U5        Mi  U" UR0                  UR0                   5        M     GM^     g )Nc                t   > [         R                  " X5      nTR                  USS9  TR                  U5        g )NTr7  )r,   AssertScalarr=  r5  )r  r2  	assert_oprv  s      r|   make_assertBGraphLowering.create_deferred_runtime_asserts.<locals>.make_assert  s2    2I  T :##I.r~   r   Tz to be Truec                `    U [         [         * 4;   a  g [        U 5        g! [         a     gf = f)NFT)r)   r  	TypeError)r  s    r|   is_convertibleEGraphLowering.create_deferred_runtime_asserts.<locals>.is_convertible  s5    & 11#()F#'( )#()s     
--r`  z <= )r   )r  r#   r2  rS  r   rN  )r  r	   r   r  )r+   do_not_emit_runtime_assertionsr   r   r   r   r   _assert_scalarr   rQ  rc  r  rf   r^  r  rx  r  r   var_to_range _default_unspecified_value_rangeissubsetlowerupperr   r  r   rS  r   r   )rv  r  r  r  	node_argsr  rx  i0rasvrr  rafvsmissingi1s   `              r|   r  -GraphLowering.create_deferred_runtime_asserts  s    00@	/ %&&EIINN99AAA::1=LI|t#IaLYq\N+*FG $ ''+<<'((22I (((,,R4++B/ AACLLRPP) &bhh//#B((Nrd$rxxj4IJ%bhh//#B((Nrd$rxxj4IJB/8C!D$?$??G c2**55b"=DDRH#BGGy: ) (r~   c                    [         R                  (       a  [        S5      e[        R                  S;  a  [        S[        R                   35      eg )NzC++ codegen is disabled)linuxdarwinwin32zUnsupported platform )r+   disable_cpp_codegenr6   sysplatformr  s    r|   !validate_can_generate_cpp_wrapper/GraphLowering.validate_can_generate_cpp_wrapper  s@    %%()BCC<<;;(+@)OPP <r~   c                   U R                   R                  5       nUR                  S5        UR                  S5        [        U5      S::  d%   SR	                  SR                  U5      5      5       e[        U5      S:H  nU(       a  SOUR                  5       U l        U R                  (       a  U R                  5         [        U R                  5      U l        [        U R                  U R                  U R                  5      nUc   SU R                   S35       eUR                  UUUU5      U l        U R                   (       a0  U R                   R                  R"                  U R                  l        g g )	Nr  r   r*   zDoes not support mixing {}+r   zDevice z not supported)r%  r  r  r   formatr   r   r'  rM  r  r2   r=  r3   rN  rj  r>  r  _names_iter)rv  is_subgraphr  parent_wrapper_codepartition_signaturesr%  only_cpuwrapper_code_gen_clss           r|   init_wrapper_codeGraphLowering.init_wrapper_code  sP    ((--/U#V$< A% 	
'C'J'JHH\"(
 	
% |$)$,5,2B2B2D2241$2B2BC=d.. 
 $/ 	
d&&'~6	
/ 177 	
 ,0,=,=,J,J,V,VD) r~   c                   SS K nUR                  U R                  5      nUR                  U5      n/ nUR                  R                   HY  nUR
                  S:X  d  M  UR                  [        R                  R                  R                  L d  MH  UR                  U5        M[     / n0 n0 n/ n	0 n
U GH  nUR                  S    H\  nU HS  nX;   a  M
  [        U[        R                  R                  5      (       d  M5  [!        U5      X|'   UR                  U5        MU     M^     UR                  S   n[        R"                  R$                  R'                  UR                  S   UR                  S   UR)                  5        VVs0 s H@  u  pU[        U[        R                  R                  5      (       a  UR*                  S   OU_MB     snnUR                  S   5      n0 nUR                  R-                  U5         UR)                  5        H  u  pUU;   aJ  UR                  R/                  [        R0                  U4S	9n[!        U	5      UU'   U	R                  U5        MU  X;   a	  X   UU'   Mc  [!        U	5      X'   U	R                  U5        X   UU'   M     S S S 5        UXR2                  '   GM     X-   nUR                  R                   H&  nUR
                  S
:X  d  M  [5        U5      4Ul          O   UR9                  5         [        R                  R;                  U5      nUR=                  U5      n[!        U5      S:  a  U[!        U	5      S  n0 U l        U H  nSn/ nUR                  S    H|  n/ nU HW  n[        U[        R                  R                  5      (       d  UR                  U5        M?  SnUR                  UX|      5        MY     UR                  [5        U5      5        M~     U(       d  M  UU R>                  UR2                  '   M     US [!        U	5       U l         Xl!        g s  snnf ! , (       d  f       GN= f)Nr   r   gridr{   r  r  r  r  )rz   r   FT)"r  deepcopyre  r^  r   r   r   r   r   r  r  r   r{   r   r   r%   r   r   r   r  ra  r   inserting_beforer   clonerL  rF  rz   	recompileInterpreterr0  rY  rW  rX  )rv  r  r  	cloned_gmtriton_nodesr   grid_inputsvisited_gridstriton_inputskwargs_inputsvisited_kwargsr  r  r{   ry  rz  r  r  new_nodenew_outputsrunnerreturned_outputsgrid_outputsdynamic_grid	new_gridsnew_grids                             r|   extract_autotune_inputs%GraphLowering.extract_autotune_inputs	  s    	MM$,,/	~6OO))D?*KK599#9#9#X#XX##D) * ,.24(*-/)+ DF+C+ !#uxx}}55-0-=*#**3/   , [[*F--@@TTL)/0 !' . 
1ehhmm(D(Dqvve}!K . 56G *,J11$7"LLNDAG|#,??#@#@TUSW#@#X(+M(:
1%,,X6 *(6(9
1 (+M(:N%!((+$2$5JqM + 8 (2M))$M !P $1OO))Dww("";/1	 *
 	%%i0!::n5{a+C,>,@AL$&D!$$.0	 KK/D!H#)#uxx}}==$OOC0$'+ ]5G(HI  $ $$U8_5 0  <7@D))$))4 %" "22FC4F!G"/o 87s   AP8 BP>>
Q	c                  ^  [        U 4S jS 5       5      (       Ga;  SU 4S jjn[        R                  R                  (       a  [        R                  R                  (       ab  SnT R
                   H&  n[        U[        R                  5      (       d  M$  Sn  O   U(       a#  U" 5       nT R                  U5        [        T 5        T R                  5       $ T R                  (       d  T R                  5       $ ST l        T R                  5       R                  nU" 5       n[         R"                  R$                  R'                  5          U" U5        SSS5        AST l        T R(                  R+                  5         T R,                  R+                  5         T R.                  R+                  5         [0        R2                  R4                  R6                  R+                  5         [0        R2                  R4                  R8                  R+                  5         [:        R<                  " 5         [        R>                  " SS05         T R                  5       sSSS5        $ T R                  5       $ ! , (       d  f       GN = f! , (       d  f       g= f)	zA
For GPU, Triton kernels are autotuned and stored as cubin files
c              3  @   >#    U  H  oTR                   ;   v   M     g 7frw   )r%  )r   r
   rv  s     r|   r   9GraphLowering.codegen_with_cpp_wrapper.<locals>.<genexpr>u	  s     Iv***rr  )cudaxpuc                   >     SS jn [         R                  R                  R                  5       nUb  [	        [
        R                  [        5      (       d  UR                  (       a  UR                  R                  5         UR                   Vs/ s H
  nUc  M  UPM     nn[        R                  " U[
        R                  5       Vs/ s H  nU " U5      PM     nnOU[	        [
        R                  [        5      (       a  TR                  O[
        R                   Vs/ s H  nU " U5      PM     nnTR                  (       a  SSKJn  [#        TR$                  5       VVs/ s H<  u  pxUTR                  ;   d  M  [	        XW   [         R&                  5      (       d  M:  UPM>     n	nnU	 H3  nXW   n
[	        U
[         R&                  5      (       d   eU" U
5      XW'   A
M5     U$ s  snf s  snf s  snf s  snnf )Nc                N   U c  g [        U [        R                  [        R                  45      (       a  U R                  R
                  $ [        U [        5      (       a  [        U 5      $ [        U [        R                  5      (       d   S[        [        U 5      5      -   5       eU $ )Nz&Unknown type when creating real inputs)r   r   r  ri  r   hintr   r   r   rS  r  )rI  s    r|   materializeXGraphLowering.codegen_with_cpp_wrapper.<locals>.extract_real_inputs.<locals>.materializex	  s     y##Aenn'EFF vv{{*#Az22%ay()!U\\:: Ds4PQ7|S:  !r~   r*   )clone_preserve_strides)rI  z,torch.SymInt | torch.SymFloat | torch.Tensorr   zint | float | torch.Tensor)r   _guardsTracingContexttry_getr   rf   real_inputsre   output_stridesclearparams_flatrp  chainr  rD  
compile_fxr  r   r!  r   )r  tracing_contextparamr  rI  r  r  r   rL  rE  mutated_inprv  s              r|   extract_real_inputsCGraphLowering.codegen_with_cpp_wrapper.<locals>.extract_real_inputsw	  s   !C!/!" #(--">">"F"F"H".zMM;8 8 '55'66<<> &5%@%@#%@E  %@   # "+amm!L#!LA $A!L   #K  *!--EE !//!"/#/  $A/   # &&B *343D3D)E*)EIC4#6#66  '{'7F )E ' *  2 '2&6)+u||DDDD+A++N('  2 #"W#
##*s*   G&G&G+"G0%G5?G5"G5FTNztriton.autotune_at_compile_time)r    list[int | float | torch.Tensor]) r   r+   tritonautotune_at_compile_timeautotune_with_sample_inputsr,  r   r,   UserDefinedTritonKernelr  rt   codegenrQ  rM  compile_to_modulecallr   ru  rv  rw  r7  r  r6  r<  rf   r^  r  precomputed_replacementsinv_precomputed_replacementsr-   resetpatch)rv  r  user_defined_kernelsr   r  compileds   `     r|   codegen_with_cpp_wrapper&GraphLowering.codegen_with_cpp_wrappero	  s    IIIID#L }}55 ==<<+0("oo%b"*D*DEE370! . ,&9&;44[A8>||~%}}  <<>) $) 1138813[[11HHJ[) K $( $$**,''--/''--/  99??A  ==CCE\\#De"LM<<> NM <<>!! KJ NMs   	I(>I:(
I7:
Jc                    SSK Jn  [        R                  " SS5         U" U R                  5      U l         SSS5        g! , (       d  f       g= f)z
(Re)initializes the scheduler member.  When initializing the scheduler, no CUBIN
files should be generated (to avoid biasing any benchmarks and pessimizing
fusion decisions).
r*   )	Schedulerztriton.store_cubinFN)rV  r  r+   r	  r,  )rv  r  s     r|   _update_schedulerGraphLowering._update_scheduler	  s2     	)\\.6&t7DN 766s	   ?
Ac                P   [        SSS9   U R                  5         U R                  5         [        R                  R                  U R                  U R                  R                  5        U R                  R                  U 5        U R                  R                  5         [        R	                  S[        R                  R                  5        U R                  R                  U R                   5      nU R                  R#                  5         UsS S S 5        $ ! , (       d  f       g = f)NzGraphLowering.codegenTlog_pt2_compile_eventzFFinished codegen for all nodes. The list of kernel names available: %s)r   r  r  rf   r  draw_orig_fx_graphre  rV  r   r>  push_codegened_graphr  r  r^  ro  generater  pop_codegened_graph)rv  r   s     r|   r  GraphLowering.codegen	  s    1N""$""$GG&&t||T^^5I5IJ2248NN""$IIX00
 &&//0A0ABF113! ONNs   DD
D%c                l   [        SSS9   UR                  U l        UR                  U l        UR                  U l        UR                  U l        UR
                  U l        UR                  U l        U R                  5         U R                  R                  5         SSS5        g! , (       d  f       g= f)a  
This is a more compact version of the `codegen()` above
where we codegen this graph as a subgraph of some parent
graph. The parent graph is passed as an argument: the
intention is to inline codegening of the subgraph in
the parent graph's wrapper code (including the generated
kernels). The wrapper code is not finalized (via `.generate()`
call), as this will be done in the parent graph's `codegen()`.
zGraphLowering.codegen_subgraphTr  N)
r   r>  r=  rM  r%  r&  r'  r  rV  r  )rv  parent_graphs     r|   codegen_subgraphGraphLowering.codegen_subgraph
  s     :RVW , 9 9D*55DO+77D , 9 9D+77D+77D""$NN""$ XWWs   BB%%
B3c                    Sn/ n/ nU R                   R                   HL  nUR                  5       nX-  nUR                  XES-  45        UR                  XDR	                  5       45        MN     XU4$ )Nr   r5  )rV  r   get_read_write_buffers_sizesr   get_estimated_runtime)rv  total_bytesnode_countsnode_runtimesr   	num_bytess         r|   r  GraphLowering.count_bytes
  sy    
 NN((D99;I$K1n56  $(B(B(D!EF	 ) 66r~   zCallable[[str], None] | Nonesave_output_codec                p    [        SSSSS9   U R                  5       sS S S 5        $ ! , (       d  f       g = f)NzGraphLowering.compile_to_modulecode_genT,inductor_code_gen_cumulative_compile_time_us)
phase_namer  dynamo_compile_column_us)r   _compile_to_moduler  s    r|   r  GraphLowering.compile_to_module.
  s4    -!"&%S	
 **,
 
 
s   '
5c                j   U R                   (       a  U R                  5       OU R                  5       u  p[        U[        5      (       a  U R                  U5      nO/[        U[        5      (       a  UnO[        S[        U5       35      eUR                  c   e[        UR                  5        [        R                  SUR                  5        [        R                  " SUR                  5        [        R                   (       a%  [#        SUR                   3[$        R&                  S9  [        U[        5      (       au  [(        R                  R+                  UR                  5        [(        R                  R-                  [.        R0                  R3                  UR                  5      S   S-   5        U$ )Nz Unrecognized wrapper code type: Output code written to: %szCompiled module path: )filer   .debug)rM  r  r  r   rd   _compile_to_module_linesr0   NotImplementedErrorr  __file__ru   r  r  rr   r  r+   benchmark_kernelprintr  stderrrf   output_coder  ospathsplitext)rv  r>  r  mods       r|   r,   GraphLowering._compile_to_module7
  s6    04/?/?D))+T\\^ 	 l$455//=C&;<<C%243E2FG  ||'''%		.=93<<H""*3<<.9

Kl$9::GG-GGLL))#,,7:XEF
r~   c                  ^^ SSK Jn  [        R                  R                  (       aw  U R
                  R                  R                  5       nUR                  SS5      nSU-   U R
                  R                  R                  5       -   S-   nUTR                  -   Tl
        [        R                  b  [        R                  TR                  5        [        R                  " STR                  5        [        R                   " 5       n["        R$                  " UTR                  S9   TR&                   VVs/ s H  u  pgXgR(                  4PM     nnnUR+                  TR                  5      u  n	m[        R                  " S	T5        [,        R                  R/                  T5        [,        R                  R1                  [2        R4                  R7                  T5      S
   S-   5        [9        SU4S jU4S jS9  [=        SSS9   UR?                  U	TU0 U R@                  EU RB                  EU RD                  ES9n
S S S 5        Xl#        TU l$        Xl%        [        RL                  (       a6  [        RN                  (       a!  W
RQ                  5       nU
RS                  USSS9  W
$ s  snnf ! [:         a    [9        SU4S jS9  e f = f! , (       d  f       N= f)Nr*   )PyCodeCachez"""z\"\"\"z&r"""
Compile-time auto-tuning block: 
z"""
zOutput code: 
%s)coder/  r   r1  inductor_output_codec                 H   > T [         R                  R                  T 5      S.$ )N)filename	file_path)r9  r:  abspath)r:  s   r|   rD  8GraphLowering._compile_to_module_lines.<locals>.<lambda>
  s     $!#!6r~   c                    > T R                   $ rw   r  r  s   r|   rD  rF  
      <#5#5r~   )
payload_fnc                    > T R                   $ rw   r  r  s   r|   rD  rF  }
  rH  r~   zPyCodeCache.load_by_key_pathTr  )linemapattrs)timesrepeat)*	codecacher?  r+   r  r   r>  kernel_autotune_defsgetvaluereplacekernel_autotune_callsrH  r   r&  rr   r  rU   inductor_meta_from_configrV   begin_compileline_mapr  writerf   r8  r  r9  r:  r;  r   r  r   load_by_key_pathr/  r2  r3  r_  r`  ra  benchmark_harnessprofile_bandwidth_outputget_argsbenchmark_compiled_module)rv  r>  r?  rP  tuning_codeinductor_metaline_nor   rK  r   r<  rz   r:  s    `          @r|   r2  &GraphLowering._compile_to_module_linesV
  s    	+==11#'#4#4#I#I#R#R#T #7#?#?{#S 7&' ##99BBDE 	  "-|/A/A!AL))5**<+=+=>1<3E3EF&@@B**=|?Q?QR	 &2%:%:%:MG **+%:   $)),*<*<=IC!!">EGG%GGLL))$/2X=> & 6 8PTU..nn.. 44	 / 	C V $##(G(G<<>D))$a)B
[  	&5
 	  VUs+   %J( 4J"BJ( 7K"J( (K
Kc                   / n[         R                  " S5      n[         R                  " S5      nU H  n[        U[        R                  5      (       a+  UR                  U R                   S[        U5       35        MM  [        U[        R                  5      (       a+  UR                  U R                   S[        U5       35        M  UR                  UR                  5       5        M     U$ )Nr   _none_shape)
rp  rq  r   r,   NoneAsConstantBufferr   rL  rS  rD   r  )rv  r  namesshape_counternone_counterr   s         r|   _get_output_namesGraphLowering._get_output_names
  s    !* q)!D$ 7 788		{%\0B/CDED"":":;;		{&m1D0EFGT]]_- " r~   c                8    U R                  U R                  5      $ rw   )rh  r  r  s    r|   get_output_namesGraphLowering.get_output_names
  s    %%d&8&899r~   c                :   XR                   ;   =(       ar    U R                   U   R                  5       S:H  =(       aK    [        U R                   U   R                  5       5      S:H  =(       a    [	        U R                   U   5      S:H  =(       d    XR
                  ;   $ )Nr*   r   r  )r!  r  r   r  r?   r$  r  s     r|   is_unspec_argGraphLowering.is_unspec_arg
  s     %%% B!!$'113q8BD%%d+44671<B   1 1$ 78EA	3
 222	3r~   )`r  r  r]  r(  r)  ro  rg  rQ  rY  rW  rX  r  rl  r*  r+  rt  r_  ra  r`  r  r  r-  r  r5  r/  rM  rK  rZ  rB  ru  r&  rc  r=  r'  r%  rb  r  ri  r  rA  r  r.  rN  r1   r  rR  r   r!  r"  r  r<  r  rn  r  r  r  r  rC  rm  rP  r9  rE  rD  r  rL  rF  rI  rH  r0  r1  r;  rk  r\  r  r3  r,  re  r#  rs  rU  r  rO  r7  r8  r6  r  rV  r:  r4  r  r2  rj  r   rr  r>  r$  )NNNFFNNFFFNNNNNNFN)(rw  torch.fx.GraphModuler  zSequence[object] | Nonerx  zShapeEnv | NonerR  
int | NonerM  r  rQ  r  r  zbool | NonerA  z1Callable[[list[ir.ExternKernelNode]], Any] | Noner  r  r  r  r  r  r-  zdict[str, int] | Noner  
str | Noner  rr  r  zGraphLowering | NonerL  rr  r  zSequence[int] | NonerN  r  r  z3Callable[..., dict[Any, Callable[..., Any]]] | Noner   rN  )r   rN  )r  torch.Tensorr   z1tuple[Sequence[int | Expr], Sequence[int | Expr]])r  rs  r   z)tuple[list[sympy.Expr], list[sympy.Expr]])r   zLir.TensorBox | ir.StorageBox | ir.Buffer | WorkspaceArg | ir.TorchBindObjectr   zSequence[Expr])r   z-ir.Buffer | WorkspaceArg | ir.TorchBindObjectr   r	   )r
   z)torch._inductor.ir.IRNode | device | Noner  r.   r   r  )T)r  rp   r  r  r   r  )r   torch.device)r
   rt  r   Iterator[None]ra  )rw  rm   r  r  r   r  )rL  rS  r   rS  )rw  rp  r  zlist[torch.Tensor]r  rS  r   r  )r   zOrderedSet[Node])rL  rS  r   rN  )r
   rt  r   rN  )r   z,torch._subclasses.fake_tensor.FakeTensorMode)r  rS  r   z4ir.TensorBox | ir.Buffer | ir.TorchBindObject | None)r  
sympy.Exprr   rN  )r  rS  r   z-ir.TensorBox | ir.Buffer | ir.TorchBindObject)r  rS  r   ztorch.dtype)r  rS  r   z
int | Expr)rz   r   r   r   )r   zir.Operationr   rS  )r<  z	ir.Bufferr8  r  r   rS  )rA  	list[str]r   rS  )rO  rM  r   rN  )rL  rS  r   rs  )rL  rr  r  r   r   rS  rw   )r  r   rL  rr  r   rF   )rL  rS  ry  ztorch.device | Noner   rS  )r   rS  rz   ztuple[object]r{   dict[str, object]r   zExpr | TensorBox | None)r   rg   rz   r   r{   dict[str, Any]r   r   )r  rs  r   r  )r   rS  rz   z	tuple[()]r{   rx  r   zLConstant | TensorBox | ShapeAsConstantBuffer | ir.Subgraph | TorchBindObject)r   r   rz   r   r{   r   r   r   )r   ztorch.fx.node.Targetrz   z"tuple[torch.fx.node.Argument, ...]r{   rx  r   rN  )r   r   )r   ru  )r  r   r  
tuple[Any]r  ry  r  rz  r  ry  r   rN  )r  r   r   rq  )r  r   r   rN  )r  r   r   object)r  r   r  zOrderedSet[sympy.Symbol]r   rN  FNNN
r  r  r  rr  r  zPythonWrapperCodegen | Noner  zGraphPartitionSignature | Noner   rN  )r  r  r   rN  )r   z)tuple[ValueWithLineMap, ValueWithLineMap])r  r   r   rN  )r   zVtuple[int, list[tuple[BaseSchedulerNode, int]], list[tuple[BaseSchedulerNode, float]]])r   CompiledModule)r>  rd   r   r~  )r  r   r   rw  )r   rw  )rL  rS  r   r  )L__name__
__module____qualname____firstlineno____annotations__r  r~  r  r  r  r  r  r  r  
contextlibr   r  r  staticmethodr  r  r  r[  r  r  propertyr  r  r  r   r%  r  r0  r5  r=  rB  rP  rU  rY  rg  rl  rd  r~  typing_extensionsoverrider   r  r  r  r  r   r  r  r  r  r%  r-  rk  r  r  r  r  r  r  r  r  r  r&  r  r,  r2  rh  rk  rn  __static_attributes____classcell__r{  s   @r|   r   r   d  sD   ""
 37%)#!"&"!$48)-(,-104 MQ+TC TC 0TC #	TC
 TC TC TC  TC!TC TC TC TC 2TC 'TC  &!TC" +#TC$ %TC& .'TC( )TC* K+TC, 
-TC TCl1# # 	:# J	2## 
#*EAE	EM9M  M 
	M<:4 ( ( ^ ^@
 
 +
 	

 

6>@A
D  	=$FQQ	6Q8,
8& FK (
	
 :	
&&Rkk k "	k
 
!kZ ]8  ]8~ 5 54747 47 "	47
 
V47l j
$j
 1j
 "	j

 
j
  j
X  $ $ $ $I:I: I: #	I:
 I: #I: 
I:V 6 6#$]~U;U;3KU;	U;nQ "$(;??C#W#W "#W 9	#W
 =#W 
#WJ]0>]0	]0~w"	2w"r	8&%*7
7" 6:29->G,G	GR:3 3r~   r   c                  ^   ^  \ rS rSrSrSU 4S jjr    S         SU 4S jjjrSrU =r$ )	r  i
  z
Mostly a helper class for the subgraph lowering. The main goal is to call
init_wrapper_code with the subgraph related arguments.
c                2   > Xl         [        TU ]  " U0 UD6  g rw   )r  r
  r  )rv  r  rz   r{   r{  s       r|   r  SubgraphLowering.__init__
  s    $)&)r~   c                `   > [         TU ]  SU R                  U R                  R                  S9  g )NT)r  r  r  )r
  r  rL  r  r>  )rv  r  r  r  r  r{  s        r|   r  "SubgraphLowering.init_wrapper_code
  s.     	!)) $ 8 8 	" 	
r~   )r  )r  r   rz   r   r{   r   r   rN  r|  r}  )	r  r  r  r  __doc__r  r  r  r  r  s   @r|   r  r  
  sU    
* "$(;??C

 "
 9	

 =
 

 
r~   r  )rz   r   r{   r   r   rN  )r   rv  r   ztorch.dtype | None)r   r   r   r  )r   rm   r   rS  r   z,Tensor | torch._C.ScriptObject | GraphModule)r   rn   r   dict[Node, tuple[int, ...]])r   r  r   zdict[Node, object])r   rn   r   r  r   rN  )r   r%   r   r  )
__future__r   r  r   rp  loggingr  r9  r&  r  rJ  r  collectionsr   r   typingr   r   r   r   r	   r   torch._loggingtorch.fxr
   r   torch._decompr   torch._dynamo.utilsr   r   "torch._library.fake_class_registryr   torch._library.opaque_objectr   r   r   torch._library.utilsr   r   r   torch._prims_commonr   r   torch._subclasses.fake_tensorr   torch._utils_internalr   %torch.fx.experimental._backward_stater   torch.fx.experimental.sym_noder   r   %torch.fx.experimental.symbolic_shapesr   r   r   r    r!   r"   r#   r$   torch.fx.noder%   torch.fx.passes.reinplacer&   torch.utils._mode_utilsr'   torch.utils._ordered_setr(   torch.utils._sympy.numbersr)   r  r+   r,   r-   codegen.commonr.   r/   r0   r1   r2   r3   r4   r5   excr6   r7   r8   r9   fx_utilsr:   r;   r<   r=   r>   r?   r@   rA   rB   rC   rD   rE   rF   rG   r  rH   rI   rJ   rK   rL   rM   rN   rO   rP   rQ   rR   rS   rT   runtimerU   runtime.autotune_cacherV   r  rW   ru  rX   rY   rZ   r[   r\   r]   r^   r_   r`   ra   rb   rc   rd   virtualizedre   rf   collections.abcrg   rh   ri   rj   typesrk   torch._higher_order_ops.effectsrl   rm   torch.fx.graphrn   codegen.wrapperro   dependenciesrp   rV  rq   r~  torch._inductor.codecacherr   	getLoggerr  r  _logginggetArtifactLoggerr  r   r   rq  rT  r@  )torch._inductor.fb.triton_kernel_metadatart   torch._inductor.fb.utilsru   r   r   r   r   r   r   r   r   r  r   r  rx   r~   r|   <module>r     s#   "      	 	 
   # % / /        , 4 ? 
 ; 7 5 : ? L	 	 	  1 / / - ! !	 	 	  %       $ 8 &    ( FF ;$$5!,"77N 5 !00<Hyy~~$??, 	 9(
	!1$50V/V/+FV/	V/r*S%3EHH(( S%3lJ
} 
r~   