
    3j                      % S SK Jr  S SKrS SKrS SKrS SKrS SKrS SKrS SKrS SK	r	S SK
r
S SKJrJrJrJrJr  S SKJrJr  S SKJr  S SKJr  S SKJrJrJrJrJrJrJrJrJrJ r J!r!  S SK"J#r#J$r$J%r%J&r&J'r'J(r(  S S	K)J*r*  S SK+r+S S
K+J,r,J-r-J.r.  S SK/J0s  J1s  J2r3  S SK4J5s  J6r7  S SK8r9S SK:r9S SK;J6s  J<r=  S SK>J?r?  S SK@JArA  S SKBJCrC  S SKDJErE  S SKFJGrG  S SKHJIrI  S SKJJKrKJLrL  S SKMJNrNJOrOJPrPJQrQJRrRJSrS  S SKTJUrUJVrVJWrWJXrXJYrYJZrZJ[r[J\r\J]r]J^r^J_r_  S SK`Jara  S SKbJcrc  S SKdJere  S SKfJgrgJhrhJiriJjrj  S SKkJlrl  SSKmJnrnJoro  SSKpJqrqJrrrJsrsJtrtJuru  SSKoJvrvJwrwJxrxJyryJzrzJ{r{  SSK|J}r}  SSK~JrJrJrJr  SSKJr  SS KJrJr  SS!K6JrJrJrJrJrJrJrJrJrJrJrJrJrJrJrJrJrJrJrJrJrJr  SS"KJrJrJr  \(       a+  S S#KTJr  S S$K`Jr  S S%KJr  SS&KJr  SS'KJr  SS(KJr  SS)K6Jr  O\rS*\S+'    S SKr\GR`                  rS,r\&" S.5      r\ " S/5      r\ " S05      r\ " S15      r\\,-  rS*\S2'   \\-  \,-  rS*\S3'   \9GRx                  GRz                  \9GRx                  GR|                  -  rS*\S4'   \GR                  " \5      r\R*                  " \	GR                  S5S69r\9GR>                  GR                  r \!\S7\\S74   S8S9\\!\\\S74   S7S8S94   S-     4   rS*\S:'   GS;S; jr\GR                  " S,S<9 " S= S>5      5       rGS<S? jrGS=S@ jrGS>SA jrGS>SB jr      GS?SC jr/ SDQr/ SEQr GS@     GSASF jjrGSBSG jr GS@     GSASH jjr\GSCGSDSI jj5       r\ GSC     GSESJ jj5       r GSC     GSFSK jjr    GSGSL jr    GSHSM jrGSISN jrGSISO jrGSJSP jr        GSKSQ jr      GSLSR jrGSMSS jrGSNST jrSU r " SV S95      r\" S-S<9 " SW SX5      5       r\ " SY SZ\5      5       rGSOS[ jr\ " S\ S]\5      5       r\ " S^ S_\5      5       r\" S`5      \" Sa5      \" Sb5      \" Sc5      \" Sd5      \" Sd5      \" Se5      Sf.rSg\Sh'    GSP       GSQSi jjr\ " Sj Sk\5      5       rS\-" S 5      4       GSRSl jjr\\\,   \\,   /\4   rS*\Sm'    " Sn So\5      r " Sp Sq\5      r " Sr Ss\5      r\ " St Su\5      5       r\ " Sv Sw\5      5       r\ " Sx Sy\5      5       rGSSSz jrGSSS{ jr     GST             GSUS| jjr      GSVS} jrGSWS~ jr\ " S S\5      5       r\ " S S\5      5       r\ " S S\5      5       r\ " S S\5      5       r\ " S S\5      5       r\ " S S\5      5       r\ " S S\5      5       r\ " S S\5      5       r " S S\5      r\ " S S\5      5       r\ " S S\5      5       Gr \ " S S\5      5       Gr      GSXS jGrGSYS jGr " S S5      Gr\ " S SG\5      5       Gr " S SG\5      Gr " S SG\5      Gr " S SG\5      Gr " S S\5      Gr	 " S SG\5      Gr
\ " S SG\5      5       Gr " S SG\5      Gr\" S-S<9 " S S\\r5      5       Gr\" S-S<9 " S SG\\5      5       Gr " S SG\5      Gr " S SG\5      Gr " S SG\5      Gr\ " S S\5      5       Gr\ " S S\5      5       Gr\" S-S<9 " S SG\5      5       Gr\GR                  " S,S<9 " S S5      5       Gr " S SG\5      Gr " S SG\5      Gr\\-  G\-  \-  G\\\-  \-  G\-     -  Gr " S S5      Gr " S SG\5      Gr " S SG\5      Gr " S SG\5      Gr " S SG\5      Gr " S SG\5      Gr  " S SG\5      Gr!    GSZS jGr"\" S-S<9 " S SG\5      5       Gr# " S SG\#5      Gr$ " S SG\$5      Gr%\" S-S<9 " S SG\#5      5       Gr&\" S-S<9 " S SG\&5      5       Gr' " S SG\'5      Gr( " S SG\&5      Gr) " S SG\5      Gr* " S SG\&5      Gr+ " S SG\+5      Gr, " S SG\+5      Gr- " S SG\&5      Gr. " S SG\&5      Gr/ " S SG\&5      Gr0 " S SG\&5      Gr1 " S SG\&5      Gr2 " S SG\25      Gr3 " S SG\)5      Gr4 " S SG\&5      Gr5 " S SG\&5      Gr6 " S SG\'5      Gr7 " S SG\&5      Gr8 " S SG\&5      Gr9 " S SG\&5      Gr: " GS  GSG\&5      Gr;\" S-S<9 " GS GS5      5       Gr< " GS GSG\)5      Gr=\" S-S<9 " GS GSG\=5      5       Gr> " GS GS	G\=5      Gr?\ " GS
 GSG\5      5       Gr@ " GS GSG\&5      GrA " GS GSG\A5      GrB " GS GSG\A5      GrCGS[GS jGrD " GS GSG\=5      GrE\GR                   " GS GS\5      5       GrF " GS S7G\F5      GrG " GS GSG\F5      GrH\" S-S<9 " GS GS\5      5       GrIGS\GS jGrJ\" S-S<9 " GS GSG\&5      5       GrK\" S-S<9 " GS GS G\&5      5       GrL    GS]GS! jGrM\" S-S<9 " GS" GS#G\&5      5       GrN " GS$ GS%G\=5      GrO " GS& GS'\5      GrP\ " GS( GS)G\P5      5       GrQ\ " GS* GS+G\P5      5       GrR\ " GS, GS-G\P5      5       GrS\ " GS. GS/G\P5      5       GrT " GS0 GS1G\=5      GrU " GS2 GS3G\U5      GrV " GS4 GS5G\U5      GrW " GS6 GS7G\U5      GrXGS^GS8 jGrYGS^GS9 jGrZGS_GS: jGr[g! \ a    SrS-r G	NJf = f(`      )annotationsN)Callable	GeneratorIterableIteratorSequence)AbstractContextManagernullcontext)Enum)partial)AnycastClassVarLiteraloverloadSupportsFloatSupportsIntTYPE_CHECKING	TypeAliasTypeVarUnion)assert_neverNeveroverride	ParamSpecSelfTypeIs)patch)ExprIntegerSymbol)identity)GraphModuleSerializer)can_auto_functionalize)metricsget_free_symbols)FakeScriptObject)get_opaque_obj_repris_opaque_value)compute_required_storage_lengthis_boolean_dtype(is_contiguous_for_memory_format_or_falseis_float_dtypemake_channels_last_strides_for
StrideType)&_remove_effect_token_unbacked_bindingscompute_unbacked_bindingsfree_symbolsfree_unbacked_symbolsGuardOnDataDependentSymNodehas_free_unbacked_symbolsIterateExprsrebind_unbackedresolve_unbacked_bindingsShapeEnvSymTypes)Node
OrderedSet)_disable_current_modes)CleanDivFloorDivModModularIndexing)SymT   )configdependencies)BackendFeatureCodegenSymbolget_scheduling_for_deviceindex_prevent_reorderingKernel)Depextract_free_symbols#extract_input_node_reduction_rangesextract_read_writesSymbolUsageCollectorOpsHandlervar_builder)LoopBody)OpCounterCSEOpCountResultReductionType	StoreMode)benchmarker)DevicePropertiesReductionHint)argsortargsort_symcache_on_selfcache_on_self_and_argsceildivconvert_shape_to_inductorconvert_shape_to_symintdeveloper_warningdo_bench_using_profilingdtype_from_sizeget_dtype_sizeget_kernel_metadataGPU_ALIGN_BYTESir_dataclass
is_dynamicis_gpu	sympy_dotsympy_index_symbolsympy_index_symbol_with_prefixsympy_product
sympy_substensor_is_aligned)opsOpsValueV)SympyBoolean)Argument)IntLikeType)CUTLASSTemplate)PythonWrapperCodegen)GraphLowering)IndentedBufferr   rw   TF_P_T_U_V_IntLike_NumLike_OpOverloadsz  prefix	TensorBoxr!   IRNode_NodeOrNodesc                .    [        U [        [        45      $ N)
isinstanceintr    xs    L/home/wildlama/miniconda3/lib/python3.13/site-packages/torch/_inductor/ir.py
_is_staticr      s    a#w((    )frozenc                  R    \ rS rSr% S\S'   S\S'   S\S'   S\S	'   S
\S'   S\S'   Srg)GraphPartitionSignature   OrderedSet[sympy.Symbol]symbol_inputsz0dict[str, IRNode | sympy.Expr | TorchBindObject]input_nodeslist[IRNode]output_nodeszdict[str, bool]input_deallocationboolskip_cudagraph	list[str]constant_names N__name__
__module____qualname____firstlineno____annotations____static_attributes__r   r   r   r   r      s/     ,+ BA (' r   r   c                &   ^ SU4S jjmT" U 5        g )Nc                  > U c  g [        U [        [        45      (       a  U  H  nT" U5        M     g [        U [        5      (       a   U R	                  5        H  nT" U5        M     g [        U [
        [        [        [        [        R                  R                  R                  [        [        [        [         ["        4
5      (       d   S[%        U 5       S35       eg )NzFound zE, which is not a supported top level IR node. See [Note: Inductor IR])r   listtupledictvalues
ExpandViewDynamicScalarAssertScalarr   sympylogicboolalgBooleanr   r   EffectfulKernelShapeAsConstantBufferOpaqueMultiOutputtype)nodesnode_check_tensorboxs     r   r   %validate_ir.<locals>._check_tensorbox   s     =e}-- & t$$ & ' ! KK''//#)%   e%jk r   )r   _NodeOrNodes | NonereturnNoner   )node_or_nodesr   s    @r   validate_irr      s    > ]#r   c                b   ^  [        T [        5      (       d   [        T 5      5       eSU 4S jjnU$ )Nc                 0   > [        [        T5      " U 0 UD6$ r   )getattrrq   )argskwargsnames     r   fnops_wrapper.<locals>.fn  s    sD!42622r   )r   objectr   r   r   rr   )r   strr   )r   r   s   ` r   ops_wrapperr     s+    dC  ,$t*, 3 Ir   c           
     f   ^ [        [        U [        [        U 5      5      5      5      mSU4S jjnU$ )Nc                   > [        U 5      [        T5      :X  d   e[        [        U 5      5       Vs/ s H
  oTU      PM     sn$ s  snf r   lenrange)indexi	inv_orders     r   reindex inverse_reorder.<locals>.reindex  sC    5zS^+++-23u:->?->il#->???   Ar   Sequence[_T]r   r   )r   zipr   r   )orderr   r   s     @r   inverse_reorderr     s*    Sc%j 123I@ Nr   c                   ^  SU 4S jjnU$ )Nc                   > [        U 5      [        T5      :X  d   e[        [        U 5      5       Vs/ s H
  oTU      PM     sn$ s  snf r   r   )r   r   r   s     r   r   same_reorder.<locals>.reindex#  sB    5zSZ''').s5z):;):AeAh):;;;r   r   r   )r   r   s   ` r   same_reorderr   "  s    < Nr   c                   ^ ^ SU U4S jjnU$ )Nc                    > T" T" U 5      5      $ r   r   )r   reindex1reindex2s    r   r    fuse_reindexing.<locals>.reindex.  s    ((r   )r   r   r   zSequence[_V]r   )r   r   r   s   `` r   fuse_reindexingr   *  s    ) ) Nr   )   r      rE   )   r   r   r   rE   c                j    Ub  [        S U  5       5      (       a  [        U 5      nU$ [        X5      nU$ )z)
Convert strides to fill order (argsort)
c              3  b   #    U  H%  n[        U[        [        R                  45      v   M'     g 7fr   r   r   r   r    .0ss     r   	<genexpr>!get_fill_order.<locals>.<genexpr>>  s#     QS
1sEMM.B C CS   -/)allr[   r\   )seq	shape_env
sorted_idxs      r   get_fill_orderr   8  s=     CQSQQQ$+CL
  !0
r   c                    [        U 5       VVs0 s H  u  pX!_M	     nnn[        [        U 5      5       Vs/ s H  oCU   PM	     nnU$ s  snnf s  snf )zx
Convert stride order to fill order
For channel last format,

stride order = [3, 0, 2, 1] and fill order = [1, 3, 2, 0]
)	enumerater   r   )r   idxposlookupr   
fill_orders         r   stride_order2fill_orderr   F  sR     (1'78'783ch'7F8%*3u:%67%6)%6J7 97s
   AAc                    [        X5      n[        [        U 5      5       Vs/ s H  nSPM     nn[        U5       H	  u  pVXTU'   M     U$ s  snf )z!
Convert strides to stride order
r   )r   r   r   r   )r   r   r   _outr   elems          r   get_stride_orderr  R  sL     !/s >JCHo
&o1oC
&Z(D	 )J 's   A
c                    g r   r   r   replace_symbols_with_hintss     r   ir_node_to_tensorr  _  s    RUr   c                    g r   r   r  s     r   r  r  c  s     r   c                   U c  g U(       a%  [         R                  R                  R                  nO[        nU R                  5        Vs/ s H
  o2" U5      PM     nn[        U 5      (       a0  U R                  5       R                   Vs/ s H
  o2" U5      PM     nnO[        R                  U5      nU R                  5       nU R                  5       n[        U5      n[        U5      n[         R                  R                  R                  R                  5          [         R"                  " XEXgS9R%                  5       nS S S 5        U$ s  snf s  snf ! , (       d  f       W$ = f)N)sizestridedtypedevice)rs   graphsizevarsoptimization_hintr"   get_sizeis_storage_and_layout
get_layoutr	  FlexibleLayoutcontiguous_strides	get_dtype
get_devicera   r   suppress_guardstorchempty_stridedzero_)	r   r  shape_fnr   r  r	  r
  r  ts	            r   r  r  i  s    	y "77##55!".AHQKD.Q'(||~'<'<='<!(1+'<=2248KKME\\^F"4(D$V,F	
			#	#	3	3	5E

%' 	
 
6 H / > 
6	5 Hs   	EE$E
E c                D    [        U [        5      (       a
  U (       d  S /$ U $ r   )r   r   values    r   may_convert_to_optionalr    s!     %u vLr   c                @   [        U [        5      (       d  U c  U $ [        U [        R                  5      (       a  U R                  $ [        U [
        [        45      (       a  [        U R                  5       5      $ [        SU  S[	        U 5      R                   S35        g )Nzget_device_type(: ))r   r   r  r  r   r   
OutputSpecget_device_typer  r   r   r   s    r   r$  r$    sz     !SQY	Au||	$	$vv	A
+	,	,q||~..#A3ba)9)9(:!<=r   c                    [        U 5      nUS;   a  [        [        U S35      S:X  a  ggUb  [        U5      =nc  gSSKJn  [        U[        5      (       d   [        U5      5       e[        X#5      $ )N)cpucudaxpu_backendtritonTFrE   )TritonScheduling)	r$  r   rF   rJ   codegen.tritonr+  r   r   
issubclass)r   r  device_schedulingr+  s       r   	is_tritonr/    sz    QF ''6fXX./8;!:6!BBK0'..G5F0GG.'::r   c                    [        U 5      S:H  $ )Nr&  )r$  r   s    r   is_cpur1    s    1&&r   c                j  ^ [        U [        5      (       aM  U R                  5       b<  [        U R	                  5       5      (       d  [        U R                  5       5      (       a  g[        R                  " U4S jU R	                  5       S S  5       6 n[        R                  " [        R                  " U R	                  5       S   S5      [        R                  " U R                  5       S   S5      5      n[        R                  " X#5      n[        R                  R                  R                  U5      $ )NFc              3  f   >#    U  H&  n[         R                  " [        UT5      S 5      v   M(     g7fr   N)r   EqrB   )r   r   	alignments     r   r   -is_aligned_realized_tensor.<locals>.<genexpr>  s(     	F2EQ%((3q)$a
(
(2Es   .1rE   )r   r   maybe_get_strider4   
get_strider  r   AndOrr5  Lers   r  r  guard_or_false)r   r6  aligned_stridesaligned_last_dim
is_aligneds    `   r   is_aligned_realized_tensorrB    s    q&!!' 00 ..ii	F!,,."2E	FO xx#Q'!**,r2BA)F ?=J 77**:66r   c                ~   [        U5      [        U 5      :X  a  [        U 5      [        U5      :X  d   e[        X U5       H{  u  p4n[        R                  R                  R                  US5      (       a  M7  [        R                  R                  R                  [        R                  " XE5      5      (       a  M{    g   g)zH
Returns true if the strides are equal, ignoring dimensions of size 1 .
rE   FT)	r   r   rs   r  r  statically_known_leqr>  r   r5  )strides1strides2shapedims1s2s         r   significant_strides_equalrK    s     u:X&3x=CM+III5H57700a88ww..uxx/?@@ 6 r   c                t   [        U 5      (       d  U $ [        S [        XR                  5       5       5       5      (       a  U $ [	        XR                  5       U R                  5       5      (       d  U $ [        U 5      u  p#/ UR                  Qn[        U R                  5       5       H<  u  pV[        R                  R                  R                  US5      (       d  M6  X   XE'   M>     [        UR                  UR                  UR                   UUR"                  UR$                  5      n['        [)        X'S95      $ )a  
Tries to match the strides of the tensor to those in the meta_strides. Strides of insignificant
dimensions - size 0 or 1 - will be updated.

If there are real stride differences (NHWC vs NCHW), or the tensor is not realized, then the input will be returned
c              3  x   #    U  H0  u  p[         R                  R                  R                  X5      v   M2     g 7fr   rs   r  r  statically_known_equalsr   rI  rJ  s      r   r   2try_match_insignificant_strides.<locals>.<genexpr>  s1      7FB 	
00887   8:rE   datalayout)r  r   r   r:  rK  r  as_storage_and_layoutr	  r   rs   r  r  rD  FixedLayoutr  r
  r  offset	is_pinnedr   ReinterpretView)tensorstridesstorage
old_layout
new_strider   r   
new_layouts           r   try_match_insignificant_stridesra    s    !((
 '#4#4#67   $W.?.?.A6??CTUU/7G%:$$%J&//+,7700A66#JJM - J _'EFFr   c                    U R                   R                  SS9S   n[        UR                  5       VVs/ s H  u  p#UPM	     snnUR                  S'   SSKJn  U" U 5        g s  snnf )Noutputopr   user_visible_output_idxs)record_original_output_strides)r  
find_nodesr   r   metatorch._inductor.compile_fxrg  )gmoutput_noder   r   rg  s        r   gm_original_output_stridesrm    sd    ((%%%215K#K$4$454554K/0 J"2&4s   A#c                    [        5       nU  H9  nU[        UR                  5       SS9-  nU[        UR                  5       SS9-  nM;     [	        U5      $ )NFunbacked_only)r>   r'   r  r:  r   )inputssym_varsinps      r   get_symbolic_inputsrt    sP    !+H$S\\^5II$S^^%5UKK  >r   c                   [        U [        5      (       a  U R                  n [        U [        5      (       a  U R	                  5       n [        U [
        5      (       a  U R                  n [        U [        5      (       a  U R                  5       $ S $ r   )r   r   rT  BaseViewunwrap_view
StorageBoxBufferget_namer   s    r   try_get_namer{    sc    !YFF!XMMO!Z  FF%a001::<:d:r   c                  J   \ rS rSr% Sr\" 5       rS\S'   SrS\S'   \	R                  " SS	9rS
\S'   \	R                  " SS	9rS\S'   \	R                  " SS	9rS\S'   \	R                  " SS	9rS\S'   \	R                  " SS	9rS\S'   \\R$                  SRS j5       5       r\\R$                      SSS j5       5       r\STS j5       rSUS jrSVS jrSWS jrSXS jrSYS jrSZS jrS[S jrS\S jrSXS jrS]S^S  jjr S_       S`S! jjr SaS" jr!SbS# jr"ScS$ jr#SdS% jr$SeS& jr%SfS' jr&SgS( jr'ShS) jr(SiS* jr)\*SjS+ j5       r+SkS, jr,SgS- jr-SlS. jr.SmSnS/ jjr/SoS0 jr0SpS1 jr1SgS2 jr2SqS3 jr3SrS4 jr4SsS5 jr5SiS6 jr6StS7 jr7SlS8 jr8SgS9 jr9SmSuS: jjr:SvS; jr;SWS< jr<SUS= jr=SWS> jr> Sw     SxS? jjr?SyS@ jr@SzSA jrA Sw     S{SB jjrBS|SC jrCS}SD jrDS~SE jrESSF jrF Sw   SSG jjrGSlSH jrHShSI jrISgSJ jrJSgSK jrKSSL jrLSSM jrMStSN jrNSSO jrO\P(       a  \*SaSP j5       rQSQrRgSQrRg)r   i%  zBase class for all intermediate representation (IR) nodes in TorchInductor.

Note:
    This is an abstract base class. Most methods raise NotImplementedError
    and must be overridden by concrete subclasses.
zClassVar[OrderedSet[Any]]_current_originsNzClassVar[int | None]_current_stream_idxF)initOrderedSet[Any]originslist[str] | None	tracebacktorch.fx.Node | Noneorigin_nodedict[str, Any]r   
int | None
stream_idxc              #     #    [         R                  nX-  [         l         S v   U[         l        g ! U[         l        f = f7fr   )r   r}  )r  olds     r   current_originsIRNode.current_origins:  s4      %%"%-	*&)F#cF#s   A1 A>Ac              #     #    [         R                  nU [         l         S v   U[         l        g ! U[         l        f = f7fr   )r   r~  )r  r  s     r   current_stream_idxIRNode.current_stream_idxD  s2     
 ((%/"	-),F&F&s   ?/ ?<?c                L    [        U [        [        [        [        [
        45      $ r   )r   ComputedBufferInputsKernelInputBufferrZ  TemplateBuffer)r   s    r   is_realized_nodeIRNode.is_realized_nodeP  s&    	
 		
r   c                ,    [         R                  U 5      $ r   )r   createselfs    r   wrap_for_loweringIRNode.wrap_for_lowering]  s    %%r   c                0    [         R                  XU5        g r   )r   __setattr__)r  attrr  s      r   _post_init_setattrIRNode._post_init_setattr`  s     	4u-r   c                J   [        U R                  5      nU R                  SU5        U R                  S[        R                  (       a  [
        R                  " 5       OS 5        U R                  SS 5        U R                  S0 5        U R                  SU R                  5        g )Nr  r  r  r   r  )r>   r}  r  rF   debug_ir_tracebackr  format_stackr~  )r  r  s     r   __post_init__IRNode.__post_init__f  s    T223	73V5N5N//1TX	
 	t4r2d.F.FGr   c                B    [        S U R                  5        5       5      $ )Nc              3  8   #    U  H  oR                   v   M     g 7fr   r   r   deps     r   r   (IRNode.get_read_names.<locals>.<genexpr>r       ?.>s((.>   r>   	get_readsr  s    r   get_read_namesIRNode.get_read_namesq      ?dnn.>???r   c                    U R                   $ r   )r  r  s    r   get_tracebackIRNode.get_tracebackt  s    ~~r   c                    U R                   $ r   r  r  s    r   get_origin_nodeIRNode.get_origin_nodew      r   c                    g r   r   r  s    r   get_defining_opIRNode.get_defining_opz      r   c                    / $ )z'Return subgraphs contained in this noder   r  s    r   get_subgraphsIRNode.get_subgraphs}      	r   c                   [        5       nU R                  n[        U [        5      (       a-  U R	                  5       nU R
                  (       a  [        U/5      nU H  n[        US5      (       a.  UR                  (       a  UR                  UR                  5        MB  [        R                  R                  R                  R                  S0 5      R                  UR                  / 5      n[        U[        5      (       d  M  U HQ  n[        R                  R                  R                   R                  US 5      nU(       d  M@  UR                  U5        MS     GM     U$ )Nstack_trace	postToPre)r>   r  r   ExternKernelr  r  hasattrr  addr  	_inductordebug _inductor_post_to_pre_grad_nodesgetr   r   #_inductor_pre_grad_node_stack_trace)r  stack_tracesr  r  r   pre_grad_nodes	node_namer  s           r   get_stack_tracesIRNode.get_stack_traces  s    )3,,dL))..0K$k]3Dt]++0@0@  !1!12 OO))JJNN# c$))R(  ".$77!/I--QQUU%t  
 #{$((5 "0 , r   c                6   S[        U SS5       3nU(       a  [        U5      S:  a  US S  S3nU R                  5       (       d  U/$ / nU R                  5        H8  nUR                  S5        X4R	                  S5      -  nUR                  S	5        M:     U/U-   $ )
Nzorigins=r   @   =   z...zstack_traces = {
})r   r   r  appendsplit)r  shortenr  stack_trace_strr  s        r   common_reprIRNode.common_repr  s    WT9b9:;s7|b( "c*G$$&&9002K""#560066O""3' 3 y?**r   c                .   [        U5      [        U R                  U5      5      -   n[        [        [        U5      5      nU(       a5  [	        SR                  U5      5      n[        U 5      R                   SU S3$ [        U 5      R                   SU S3$ )Nz,
z(
z
)(r"  )r   r  mapr   indentjoinr   r   )r  linesr  	multiline	new_liness        r   
str_helperIRNode.str_helper  s     Ud4#3#3G#<==Se_%uzz%01I4j))*#i[<<4j))*!E7!44r   c                    U R                   $ r   r
  r  s    r   r  IRNode.get_dtype      zzr   c                D     U R                  5       $ ! [         a     g f = fr   )r  NotImplementedErrorr  s    r   maybe_get_dtypeIRNode.maybe_get_dtype  s&    	>>##" 		    
c                2    [        S[        U 5       S35      e)Nz#get_layout() is not implemented by !r  r   r  s    r   r  IRNode.get_layout  s    !$GT
|ST"UVVr   c                D     U R                  5       $ ! [         a     g f = fr   )r  r  r  s    r   maybe_get_layoutIRNode.maybe_get_layout  &    	??$$" 		r  c                "    U R                  5       $ r   )r  r  s    r   get_output_specIRNode.get_output_spec  s      r   c                D     U R                  5       $ ! [         a     g f = fr   )r  r  r  s    r   maybe_get_output_specIRNode.maybe_get_output_spec  s(    	''))" 		r  c                >    [        U R                  5       [        5      $ )z4True for single tensor output (excludes MultiOutput))r   r  Layoutr  s    r   has_tensor_outputIRNode.has_tensor_output  s    $446??r   c                2    [        S[        U 5       S35      e)Nz!get_size() is not implemented by r  r  r  s    r   r  IRNode.get_size  s    !$Ed4j\QR"STTr   c                D     U R                  5       $ ! [         a     g f = fr   )r  r  r  s    r   maybe_get_sizeIRNode.maybe_get_size  %    	==?"" 		r  c                "    U R                  5       $ r   r  r  s    r   rG  IRNode.shape  s    }}r   c                4    [        U R                  5       5      $ r   )rn   r  r  s    r   	get_numelIRNode.get_numel  s    T]]_--r   c                    [         R                  R                  R                  [        R
                  " U R                  5       S5      5      $ Nr   rs   r  r  statically_known_truer   r5  r  r  s    r   is_zero_elementsIRNode.is_zero_elements  0    ww55ehht~~?OQR6STTr   c                0    [        S[        U 5       35      e)a  
If the IRNode refers to data which has not been materialized (e.g.,
it is a Pointwise/Reduction that could potentially have more
compute fused into it), realize the IRNode into physical memory,
ending the possibility of fusing into it, but allowing, e.g., multiple
users to access the data without having to recompute.

Check StorageBox.realize for a particularly notable implementation.

TODO(ezyang): I think, in principle, every IRNode should have an
implementation of this, and most of the time no-op is OK, but you
really do have to audit each IRNode for this, so for now, raise
an error if it's not implemented.  Note that some code in graph.py
will catch this thrown error and suppress it with a warning.
zrealize NYI on r  r  s    r   realizeIRNode.realize  s      "ODJ<"@AAr   c                0    [        S[        U 5       35      e)Nzcodegen_reference NYI on r  r  writers     r   codegen_referenceIRNode.codegen_reference  s    !$=d4j\"JKKr   c                    g r   r   r  s    r   r  IRNode.get_device  r  r   c                0    U R                  5       nUc   eU$ r   )r  r  r  s     r   get_device_or_errorIRNode.get_device_or_error  s    "!!!r   c                    gNFr   r  s    r   has_exceeded_max_readsIRNode.has_exceeded_max_reads      r   c                >    [        [        U 5      R                  5      er   r  r   r   r  s    r   make_loaderIRNode.make_loader      !$t*"5"566r   c                >    [        [        U 5      R                  5      er   r,  r  s    r   make_indexerIRNode.make_indexer  r/  r   c                >    [        [        U 5      R                  5      er   r,  r  s    r   r:  IRNode.get_stride  r/  r   c                D     U R                  5       $ ! [         a     g f = fr   )r:  r  r  s    r   r9  IRNode.maybe_get_stride  r  r  c                >    [        [        U 5      R                  5      er   r,  r  s    r   rz  IRNode.get_name  r/  r   c                D     U R                  5       $ ! [         a     g f = fr   )rz  r  r  s    r   maybe_get_nameIRNode.maybe_get_name!  r
  r  c                z     U R                  5       [        R                  R                  ;   $ ! [         a     gf = fr'  )rz  rs   r  graph_inputsr  r  s    r   is_input_bufferIRNode.is_input_buffer'  s4    	==?agg&:&:::" 		s   *- 
::c                    gr'  r   r  	thresholds     r   has_large_inner_fnIRNode.has_large_inner_fn-  r*  r   c                    g r   r   r  userss     r   
mark_reuseIRNode.mark_reuse0      r   c                    g r   r   r  s    r   realize_hintIRNode.realize_hint3  rJ  r   c                >    [        [        U 5      R                  5      er   r,  r  s    r   rw  IRNode.unwrap_view6  r/  r   c                >    [        [        U 5      R                  5      er   r,  r  s    r   freeze_layoutIRNode.freeze_layout9  r/  r   c                >    [        [        U 5      R                  5      er   r,  r  r   allow_paddings      r   freeze_layout_with_stride_order&IRNode.freeze_layout_with_stride_order<       "$t*"5"566r   c                >    [        [        U 5      R                  5      er   r,  r  r   s     r   freeze_layout_with_fill_order$IRNode.freeze_layout_with_fill_orderA  r/  r   c                >    [        [        U 5      R                  5      er   r,  r  r	  s     r   freeze_layout_with_same_order$IRNode.freeze_layout_with_same_orderD  r/  r   c                >    [        [        U 5      R                  5      er   r,  r  exact_stridesrU  s      r    freeze_layout_with_exact_strides'IRNode.freeze_layout_with_exact_stridesG  rX  r   c                >    [        [        U 5      R                  5      er   r,  r  s    r   get_read_writesIRNode.get_read_writesL  r/  r   c                6    U R                  5       R                  $ r   rg  readsr  s    r   r  IRNode.get_readsO      ##%+++r   c                4    [        U R                  5       5      $ r   )r   r  r  s    r   	num_readsIRNode.num_readsR  s    4>>#$$r   c                >    [        [        U 5      R                  5      er   r,  r  s    r   get_storage_numelIRNode.get_storage_numelU  r/  r   c                >    [        [        U 5      R                  5      er   r,  r  rp  s     r   get_free_symbol_usesIRNode.get_free_symbol_usesX  rX  r   c                >    [        [        U 5      R                  5      er   r,  r  s    r   get_reduction_typeIRNode.get_reduction_type]  r/  r   c                >    [        [        U 5      R                  5      er   r,  r  s    r   get_reduction_sizeIRNode.get_reduction_size`  r/  r   c                    gr'  r   r  s    r   	is_externIRNode.is_externc  r*  r   c                    gr'  r   r  s    r   is_no_opIRNode.is_no_opf  r*  r   c                >    [        [        U 5      R                  5      er   r,  r#  s     r   constant_to_deviceIRNode.constant_to_devicei  r/  r   c                >    [        [        U 5      R                  5      er   r,  r  s    r   get_mutation_namesIRNode.get_mutation_namesl  r/  r   c                >    [        [        U 5      R                  5      er   r,  r  s    r   get_operation_nameIRNode.get_operation_nameo  r/  r   c                >    [        [        U 5      R                  5      er   r,  r  s    r   get_inputs_that_alias_output#IRNode.get_inputs_that_alias_outputr  r/  r   c                    g r   r   r  s    r   r
  IRNode.dtypew  s    (+r   r   )r  zOrderedSet[Node]r   Generator[None, None, None])r  r  r   r  r   r   r   r   r   r   )r  r   r  r   r   r   r   r   r   OrderedSet[str])r   r  r   r  r   zOperation | Noner   zlist[Subgraph]T)r  r   r   Sequence[str])TT)r  zSequence[object]r  r   r  r   r   r   r   torch.dtype)r   ztorch.dtype | Noner   r  )r   zLayout | Noner   r#  )r   zOutputSpec | Noner   r   r   Sequence[Expr])r   Sequence[_IntLike] | None)r   z)_IntLike | sympy.Rel | Sequence[_IntLike]r   r   r   
str | Noner   r  zIndentedBuffer | Noner   r   r   torch.device | Noner   torch.devicer   $Callable[[Sequence[Expr]], OpsValue]r    Callable[[Sequence[Expr]], Expr]r   Sequence[_IntLike]r   r   rB  r  r   r   rG  r   r   r   Fr   Sequence[int]rU  r   r   r   r   r  r   r   r	  r  r   r   rc  r  rU  r   r   r   r   dependencies.ReadWritesr   zOrderedSet[Dep]r   r   r   r   rp  r   r   r   r  r  r   r   r   r  )Sr   r   r   r   __doc__r>   r}  r   r~  dataclassesfieldr  r  r  r   r  staticmethod
contextlibcontextmanagerr  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  propertyrG  r  r  r  r  r  r$  r(  r-  r1  r:  r9  rz  r:  r>  rC  rH  rL  rw  rQ  rV  r[  r_  rd  rg  r  ro  rr  rv  ry  r|  r  r  r  r  r  r  r   r
  r   r   r   r   r   r   %  s    3=,/>04-4  +00e<G_<"-"3"3"?I?(3(9(9u(EK%E"-"3"3"?K?(..E:J
:*  * --	$-  - 

 

&.	H@ B+  PT
5%
504
5HL
5	
5W!@U  .UB$L
777777 ;@7"7377	7
77 HM7/7@D7	7
7,%7 %*7!7	!7
777777 	+ 
+ r   c                      \ rS rSrSS jrSS jrSS jrSS jrSS jrSS jr	SS jr
SS	 jrSS
 jrSS jrSS jrS S jrS!S jrS"S jrS#S jrS$S jr S%   S&S jjrS'S jrSrg)(	Operationi{  c                     S U l         0 U l        g r   )operation_name_config_patchesr  s    r   r  Operation.__post_init__}  s    *./1r   c                    [         er   r  r  s    r   r  Operation.get_device      !!r   c                @    [        U S5      (       d   eU R                  $ Nr  )r  r  r  s    r   r  Operation.get_origin_node  s!    t]++++r   c                @    [        U S5      (       d   eU R                  $ )Nr  )r  r  r  s    r   get_originsOperation.get_origins  s    tY''''||r   c                @    [        U S5      (       d   eU R                  $ )Nr  )r  r  r  s    r   get_stream_idxOperation.get_stream_idx  s    t\****r   c                8    U R                   c   eU R                   $ r   )r  r  s    r   r  Operation.get_operation_name  s     ""..."""r   c                    U R                   $ )zHGet config patches for this operation (e.g., coordinate_descent_tuning).r  r  s    r   get_config_patchesOperation.get_config_patches  s    ###r   c                    Xl         g)z&Set config patches for this operation.Nr  )r  patchess     r   set_config_patchesOperation.set_config_patches  s    &r   c                    gr'  r   r  s    r   r  Operation.is_extern  r*  r   c                    gr'  r   r  s    r   r  Operation.is_no_op  r*  r   c                    [         er   r  r  s    r   rg  Operation.get_read_writes  r  r   c                &    XR                  5       ;   $ r   )r  r  r   s     r   
is_user_ofOperation.is_user_of  s    **,,,r   c                B    [        S U R                  5        5       5      $ )Nc              3  8   #    U  H  oR                   v   M     g 7fr   r  r  s     r   r   +Operation.get_read_names.<locals>.<genexpr>  r  r  r  r  s    r   r  Operation.get_read_names  r  r   c                6    U R                  5       R                  $ r   rj  r  s    r   r  Operation.get_reads  rm  r   c                    [         er   r  r  s    r   get_outputsOperation.get_outputs  r  r   c                    [        5       $ r   r=   r  s    r   get_unbacked_symbol_defs"Operation.get_unbacked_symbol_defs  
    |r   c                    [        5       $ )a  
When unbacked_only=True:
Returns the unbacked symbols which are required to be in scope in
order to successfully perform codegen for this buffer.  For example,
a buffer that corresponds to an extern kernel call that takes i0 as
an argument would return {i0} here.  This is used to generate necessary
dependencies that ensure we actually bind i0 in codegen before you
try to use it.

Note that this is NOT transitive; in particular, if this buffer takes
in as input another buffer with dynamic shape (e.g., (i0,)), we will
not report it here, because you will already have a dependency
on that buffer, which will eventually have a dependency on i0 if
necessary.

When unbacked_only=False:
Similar to `unbacked_only=True` but including all free symbols
instead of only free unbacked symbols.
r=   ru  s     r   rv  Operation.get_free_symbol_uses  s    , |r   c                    g)z
Gets extra global memory size needed by this buffer.
Some algorithms (e.g. group gemm) may require extra global memory in the generated code.
r   r   r  s    r   get_workspace_sizeOperation.get_workspace_size  s    
 r   )r  r  Nr  r  r  )r   r  )r   r  r  )r   r  )r  r  r   r   r  r  )r   r   r   r   r  r  r   list[Buffer]r   r   r  r  r  )r   r   r   r   r  r  r  r  r  r  r  r  r  r  rg  r  r  r  r  r  rv  r  r   r   r   r   r  r  {  sr    2" #$'"-@," %*!	!0r   r  c                     \ rS rSr% S\S'   S\S'   S\S'   S\S	'   \" S 5       S!   S"S
 jj5       rS#S jrS$S jr\r	S%S jr
S&S jrS'S jrS'S jr\S(S j5       r\\R$                  4S)S jj5       r\S*S j5       rS+S jr\S$S j5       rS,S-S jjrS!S.S jjrS/S jrS0S jrS1S jrS2S jrS'S jrS3S jrS4S jr S r!g)5Loopsi  r  r  r  r
  Callable[..., Any]inner_fnr  rangesc                   ^ [        5       R                  " / U4S jU R                   5       QU R                  T5      P76 $ )Nc              3  <   >#    U  H  n[        UT5      v   M     g 7fr   r&   r   erp  s     r   r   -Loops.get_free_symbol_uses.<locals>.<genexpr>  s     F+Qq-00+   )r>   unionr  inner_fn_free_symbolsru  s    `r   rv  Loops.get_free_symbol_uses  s>     |!! 
F$++F
&&}5
 	
r   c                   U R                  SU R                  R                   S3[        U R                  5      U R                  5       /U Vs/ s H  o" S[        X5       3PM     sn-   SU R                  < 3/-   5      $ s  snf )N'=origin_node=)r  r  r   r   r
  inner_fn_strr   r  )r  namesr   s      r   _to_strLoops._to_str  s    DKK$$%Q'DJJ!!#
 <AA54q,-.5AB d..1234
 	
 Bs   B
c                $    U R                  S5      $ Nr  r  r  s    r   __str__Loops.__str__      ||K((r   c                    U R                   $ r   r  r  s    r   r  Loops.get_device      {{r   c                    U R                   $ r   r  r  s    r   r  Loops.get_origin_node  r  r   c                    U R                   $ r   r  r  s    r   r  Loops.get_size  r'  r   c                    U R                   $ r   r  r  s    r   get_pointwise_sizeLoops.get_pointwise_size  r'  r   c                    UR                  SS 5      nUR                  SS 5      nU " U0 UD6nUR                  SU5        UR                  SU=(       d    UR                  5        [        R	                  U5      $ )Nr  r  )popr  r  r   r  )clsr   r   r  tbrs         r   r  Loops.create   sm    jj5ZZT*   	
]K8	["*;<""r   c                    [        U 5       VVs/ s H0  u  p#US:X  a  [        R                  R                  O
[	        X5      PM2     snn$ s  snnf NrE   )r   r   SZerorm   )r  r   nr   s       r   _indexLoops._index  sI     "&)
) FEGGLL(Fv(QQ)
 	
 
s   7A
c                |   [        [        R                  " 5       5      n[        R                  " U5         [        R
                  " [        SS5         U R                  " U R                  5       6   UR                  5       sS S S 5        sS S S 5        $ ! , (       d  f       O= f S S S 5        g ! , (       d  f       g = fNallow_indexingT)
rT   rs   MockHandlerset_ops_handlerr   r   r  r
  inner_fn_argsgetvalue)r  	opcounters     r   inner_fn_opcountLoops.inner_fn_opcount  sy     1	i(LL)94@MM4--/0%%' A@ )(@@@ )((s#   B--B?	B-
B 	B--
B;c                :    U R                  U R                  5      4$ r   )r:  r  r  s    r   rA  Loops.inner_fn_args  s    DKK(**r   c                t    [         R                  R                  " U R                  /U R	                  5       Q76 $ r   )rs   KernelFormatterHandlerir_to_stringr
  rA  r  s    r   r  Loops.inner_fn_str   s3    ''44MM
 ..0
 	
r   Nc                z    Uc  Sn[        U[        R                  5      nU R                  5       R                  U:  $ r  )maxrF   realize_opcount_thresholdrD  num_opsrA  s     r   rC  Loops.has_large_inner_fn&  s9    I	6#C#CD	$$&..::r   c                `    U R                  U R                  5      n[        U R                  X!S9$ Nro  )r:  r  rN   r
  )r  rp  r   s      r   r  Loops.inner_fn_free_symbols,  s%    DKK(#DMM5VVr   c                    U R                  U R                  5      n[        U5      n[        R                  " U5         U R                  U5        S S S 5        UR                  $ ! , (       d  f       UR                  $ = fr   )r:  r  rQ   rs   r@  r
  usages)r  symbolr   handlers       r   collect_inner_fn_symbol_usage#Loops.collect_inner_fn_symbol_usage1  s^    DKK(08g&MM%  ' ~~ '& ~~s   A##
A<c                   [         R                  " [        SS5         U R                  5       (       aJ  [	        U R                  5       U R                  5       U R                  5       5      R                  sS S S 5        $ [	        U R                  5       U R                  5       5      R                  sS S S 5        $ ! , (       d  f       g = fr=  )	r   r   r  ry  rP   r-  r  r|  rk  r  s    r   r  Loops.get_reads:  s    \\.*:DA&&((*$$&MMO++- % BA +$$&MMO % BAAs   AB8=1B88
Cc                H    [        U R                  5       R                  5      $ r   )r>   rD  read_buffersr  s    r   r  Loops.get_read_namesH  s    $//1>>??r   c                H    [        U R                  5       R                  5      $ r   )r   rD  r]  r  s    r   ro  Loops.num_readsK  s    4((*7788r   c                2    [        S[        U 5       S35      e)Nz+get_reduction_size() is not implemented by r  r  r  s    r   r|  Loops.get_reduction_sizeN      !9$t*QG
 	
r   c                2    [        S[        U 5       S35      e)Nz+get_reduction_type() is not implemented by r  r  r  s    r   ry  Loops.get_reduction_typeS  rc  r   c                2    [        S[        U 5       S35      e)Nz+constant_to_device() is not implemented by r  r  r#  s     r   r  Loops.constant_to_deviceX  rc  r   r   r  r  )r  r  r   r   r  r  r  r  )r   r   r   r   r   r   )r  r  r   rD   r   r  )r   rU   r   zSequence[Sequence[_IntLike]]r   r  rp  r   r   OrderedSet[Symbol])rV  r!   r   r  r  r  r  r  r  )"r   r   r   r   r   r^   rv  r  r!  __repr__r  r  r  r-  classmethodr  r  rD   INDEXr:  r]   rD  rA  r  rC  r  rX  r  r  ro  r|  ry  r  r   r   r   r   r  r    s      G$$)
!
	!
 %
	
) H  	# 	# :>** 
 
 ( (+ 
 

;W
@9




r   r  c                   UR                   (       a   [        R                  " [        S5      U5      $ [        R                  " SU5      $ )Nnanr   )is_floating_pointrq   constantfloat)r   r
  s     r   nop_loader_fnrs  ^  s1    ||E%L%00||Au%%r   c                  d    \ rS rSrS
S jrSS jr\rSS jrSS jr        SS jr	SS jr
Srg	)	Pointwiseie  c                t    U R                  5       (       a  [        [        U R                  S9$ U R                  $ Nr  )r  r   rs  r
  r
  r  s    r   r-  Pointwise.make_loaderg  s,      ""=

;;}}r   c                $    U R                  S5      $ r  r   r  s    r   r!  Pointwise.__str__n  r#  r   c                    / $ r   r   r  s    r   r|  Pointwise.get_reduction_sizes  s    	r   c                    g r   r   r  s    r   ry  Pointwise.get_reduction_typev  r  r   c                |    U R                  5       n[        R                  " U=(       d    SU" U5      U" U5      5      $ Nunnamed)r-  rq   storer  output_nameindexervarsloaders        r   store_outputPointwise.store_outputy  s2     !!#yy1	74=&,OOr   c                    U R                  5       n[        R                  " [        SU5      " U5      n[	        UU R
                  UU R                  S9$ FMove this to a given device. Requires that all reads are to constants.override_devicer  r
  r
  r  )r-  r   r   ConstantBufferru  r
  r  r  r  r  s      r   r  Pointwise.constant_to_device  sI    !!#n.?HP**;;	
 	
r   r   Nr  r  )r   zSequence[sympy.Expr]r  )r  r  r  !Callable[[Sequence[Expr]], Never]r  r  r   r   r  )r   r   r   r   r-  r!  rk  r|  ry  r  r  r   r   r   r   ru  ru  e  sR    ) HPP 3P 	P
 
P	
r   ru  c                  R    \ rS rSr% S\S'   SrS\S'   S
S jr        SS jrS	rg)Scatteri  r  output_indexerNrW   scatter_modec           	         U R                  5       n[        R                  " [        SU5      " U5      n[	        UU R
                  UU R                  U R                  U R                  S9$ )r  r  )r  r
  r
  r  r  r  )	r-  r   r   r  r  r
  r  r  r  r  s      r   r  Scatter.constant_to_device  s[    !!#n.?HP**;;..**
 	
r   c                    U R                  5       nUc  Sn[        R                  " UU" U R                  U5      5      U" U5      U R                  S9$ )Nr  )mode)r-  rq   r  r  r  r  s        r   r  Scatter.store_output  sT     !!##KyyD''-.4L""	
 	
r   r   r  )r  r  r  r  r  r  r   r   )	r   r   r   r   r   r  r  r  r   r   r   r   r  r    sB    44"L)"


 3
 	

 

r   r  
logical_ormaximumminimummulr  bitwise_xor)anyrM  minprodsumdotxor_sumz"dict[str, Callable[..., OpsValue]]REDUCTION_COMBINE_FNc                   ^ ^^ T [         ;   a	  [         T    $ T S;   a        SUUU 4S jjnU$ T S:X  a        SS jnU$ [        ST  35      e)Nargmaxargminc                  > U u  p#Uu  pETS:X  a  [         R                  " X$5      nO[         R                  " X$5      n[         R                  " X$5      n[	        T5      (       a  [         R
                  " X"5      n[         R
                  " XD5      n	[         R                  " U[         R                  " X5      5      n[         R                  " U[         R                  " X5      5      nT(       a  [         R                  " X55      O[         R                  " X55      n
[         R                  " U[         R                  " Xz5      5      n[         R                  " XbU5      [         R                  " XcU5      4$ )Nr  )	rq   ltgteqr.   ner  logical_andwhere)aba_valuea_indexb_valueb_indexmaskequala_isnanb_isnantiearg_break_ties_leftr
  reduction_types              r   argmax_combine_fn3get_reduction_combine_fn.<locals>.argmax_combine_fn  s     !G G)vvg/vvg/FF7,Ee$$&&2&&2~~dCFF7,DEucoog.OP ' w(VVG- 
 >>$(CDD		$1		$1 r   welford_combinec                \    U u  p#nUu  pVnXR-
  nXG-   n	Xy-  n
X(U
-  -   X6-   X-  U-  U
-  -   U	4$ r   r   )r  r  a_meana_m2a_weightb_meanb_m2b_weightdelta
new_weight	w2_over_ws              r   welford_combine_fn4get_reduction_combine_fn.<locals>.welford_combine_fn  s]     &'"F(%&"F(OE!,J -I**emh6BB r   zunknown reduction_type=)r  tuple[object, object]r  r  r   tuple[OpsValue, OpsValue])r  #tuple[OpsValue, OpsValue, OpsValue]r  r  r   r  )r  r  )r  r
  r  r  r  s   ```  r   get_reduction_combine_fnr    s     --#N33	/	/	$	)>	&	 	: ! 	,	,	2	2	 1	  "! "$;N;K"LMMr   c                  J  ^  \ rS rSr% S\S'   S\S'   S\S'   S\S	'   S!S
 jr\r\" S 5      S"S#U 4S jjj5       rS$S jr	S%S jr
          S&S jrS'S jrS(S jrS"S#S jjrS)S jr\ S*                   S+S jj5       r\          S,S j5       r\\R*                  S4                   S-S jj5       r\      S.S j5       r\      S.S j5       r\        S/S j5       r\      S0S j5       r\ S*               S1S jj5       r\            S2S j5       r\                        S3S j5       r\ S*                     S4S jj5       r\                      S5S j5       rS r U =r!$ )6	Reductioni  r  reduction_rangesrV   r  r  	src_dtyperZ   reduction_hintc                $    U R                  S5      $ )N)r  r  r  r   r  s    r   r!  Reduction.__str__  s    ||LMMr   c                |   >^ [         TU ]  T5      [        5       R                  " U4S jU R                   5       6 -  $ )Nc              3  <   >#    U  H  n[        UT5      v   M     g 7fr   r&   r  s     r   r   1Reduction.get_free_symbol_uses.<locals>.<genexpr>  s     P:OQq-00:Or  )superrv  r>   r  r  r  rp  	__class__s    `r   rv  Reduction.get_free_symbol_uses  s7    w+M:Z\=O=OP$:O:OP>
 
 	
r   c                    U R                   $ r   )r  r  s    r   r|  Reduction.get_reduction_size  s    $$$r   c                    U R                   $ r   )r  r  s    r   ry  Reduction.get_reduction_type      """r   c           	         [         R                  " U R                  U R                  U R                  U R                  X45      5      n[         R                  " U=(       d    SU" U5      U5        g r  )rq   	reductionr
  r  r  r
  store_reduction)r  r  r  r  reduction_varsr  s         r   r  Reduction.store_reduction  sR     JJNNMM$/	
 	K49gdmUKr   c                X    [        U R                  5      [        U R                  5      -   $ r   )r   r  r  r  s    r   index_lengthReduction.index_length#  s!    4;;#d&;&;"<<<r   c                    U R                  U R                  5      nU R                  U R                  [        R                  5      nX4$ r   )r:  r  r  rD   R0_INDEX)r  r   rindexs      r   rA  Reduction.inner_fn_args&  s6    DKK(T22DMMBr   c                    U R                  U R                  5      nU R                  U R                  [        R                  5      n[        U R                  X#US9$ rR  )r:  r  r  rD   r  rN   r
  )r  rp  r   r  s       r   r  Reduction.inner_fn_free_symbols+  sF    DKK(T22DMMB#MM5
 	
r   c                   U R                  5       n[        R                  " [        SU5      " U5      n[	        UU R
                  UU R                  U R                  U R                  U R                  [        R                  S9$ )r  r  r  r
  r
  r  r  r  r  r  )r-  r   r   r  r  r
  r  r  r  r  rZ   DEFAULTr  s      r   r  Reduction.constant_to_device2  sk    !!#n.?HP**;;!22..nn(00	
 		
r   Nc	                   U[        U5      /n	[        R                  R                  R	                  U	5      (       d  [
        R                  S4$ [        R                  R                  R                  U5      n
[        R                  R                  R                  [        U5      5      nUS:H  =(       dV    [        R                  R                  U [        R                  5      (       + =(       a    US;  =(       a    [        R                  nUS:X  a  [
        R                  S4$ [        R                  " U 5      nUR                  nSnU(       a]  [         R"                  " [        R$                  R&                  U SS9n[         R"                  " [        R$                  R&                  U SS9nO      SS	 jnUnUS:X  a  U" X5      nUS:X  a  [
        R(                  U4$ Ub  [+        U[,        5      (       a  [.        R0                  " [2        S
S5         [5        U5      u  nnS S S 5        Wbj  Wbg  [        R                  R                  R7                  [        UU-   5      5      nU
U:X  a,  [8        R;                  SUUUUU5        [
        R(                  S4$ [
        R(                  U4$ X::  d  XS-  S-  :  a  [
        R                  S4$ [=        U UUUUUS:w  a  UOSU[
        R                  S9nSS jnU" U5      u  nnU(       a  U" U5      u  nn[?        U5      S:X  a  [
        R                  S4$ [@        RB                  " URE                  5       URG                  5       5      u  u  nnnSnSnU H  n[        R                  R                  RI                  UU5      n [        R                  R                  RK                  U U[M        URO                  5       5      5      n![Q        S U! 5       5      n"U"(       a  US-  nM  US-  nM     UU:  a  [
        R(                  U" X5      4$ [
        RR                  U" X5      4$ ! , (       d  f       GN&= f)NrE   scanr  r      T)inner_reductionFc                    gr6  r   )reduction_numel_hint
numel_hints     r   inner_reduction_splits4Reduction.num_splits.<locals>.inner_reduction_splitso  s     r   r>  zUse previous IRNode's range and reduction_ranges instead of split. current ranges: %s, current reduction ranges: %s, current split: %d, new ranges: %s, new reduction ranges: %sr8  r   r  r  c           	     `  ^	 U R                  5       nUc   e[        S [        UU R                  5       U R	                  5       S9U S9nUR                  5       nUR                  c   eUR                   V s/ s H=  n [        U [        5      (       d  M  [        U [        R                  5      (       a  M;  U PM?     nn / nSn[        UR                  S S9 H  m	[        U	4S jU 5       5      (       d  M  UR                  T	R                  5        T	R                   ["        R$                  R&                  ;   d  Md  ["        R$                  R&                  T	R                      n[)        UR*                  SS 5      nUR-                  5         [)        UR*                  SS 5      U:w  d  M  SnM     XV4$ s  sn f )	Nr  r
  r  r   rU  rT  Fc                    U R                   $ r   r  r   s    r   <lambda>@Reduction.num_splits.<locals>.get_read_indices.<locals>.<lambda>  s    affr   keyc              3  T   >#    U  H  oTR                   R                  ;   v   M     g 7fr   )r   r3   )r   r3  mds     r   r   AReduction.num_splits.<locals>.get_read_indices.<locals>.<genexpr>  s     F:aBHH111:   %(r	  T)r  r  r  r  r  rg  
range_varsr   r   r   Numbersortedrk  r   r  r   r   rs   r  name_to_bufferr   rU  decide_layout)
r3  r  cbread_writesr  indiceschangedbuforiginal_strider
  s
            @r   get_read_indices.Reduction.num_splits.<locals>.get_read_indices  sn   \\^F%%%%!++-
 B ,,.K ))555 %///Aa& /9!U\\/J /  
 GG[..4DEF:FFFNN288,ww!''"8"88gg44RWW=*1#**h*M))+"3::x>/Q&*G F ##!s   4F+F+2F+r   c              3  B   #    U  H  oS :H  =(       d    US:  v   M     g7f)r   rE   Nr   r   s     r   r   'Reduction.num_splits.<locals>.<genexpr>  s     9AQ!a%   )r  r   r  r   r   r   )r3  r  r   ztuple[Sequence[Expr], bool])*rn   rs   r  r  all_unbacked_explicitly_hintedrZ   r  r  has_featurerH   REDUCE_TO_SINGLE_ELEMENTrF   split_reductionsrY   r  multi_processor_count	functoolsr   choicesreduction_split_factorINNERr   r   r   r   r  rO   !replace_backed_symbols_with_hintslogr  r  r   rG   index_vars_squeezer  r|  simplify_with_rangesstride_hintsr   keysr   OUTER)#r  	dst_dtyper  r
  r  r  r  reduction_numel
input_nodeexprsr  r  should_splitpropsnum_smmin_elements_per_threadr  outer_reduction_splitsr  
new_rangesnew_reduction_rangesextracted_numel_hintr3  r  r  r  r   r  ranges1	num_outer	num_innerr   jr\  outers#                                      r   
num_splitsReduction.num_splitsA  s    !-"78ww>>uEE ((!++ ww//AA/RWW%%77f8MN
%/ 
##FN,S,STT (( '' 	 U" ((!++ ''/,,"$@I@Q@Q		00&$A" AJ@Q@Q		00&%A"
&)  &<" ?*+?LEz$**E11%*Z*K*K\\.2BDI <JG", J
 ).B.N((JJ)*7K*KL )
 ,/CC		G #,!&0	  -22B66 &&-- ;aZ"_, ((!++--;v-E>5(00	
!	$F ,A.)!,JGQw<1 ((!++'3'F'FJJL!..0(
$NW 		A  55aAAgg&&33>4#7G
 999EQ	Q	  y  &&(>$)   !&&(>$)  Y JIs   Q  
Qc                  ^ ^^^^^ [         R                  R                  R                  T5      m[	        X#5      mSUUU4S jjmUS;   a4  [        T[        R                  T5      5      m      SUU 4S jjmU4S j$ T mT$ )z1Convert inner_fn from a reduction to an pointwisec                   >^  [         R                  " TU U4S j[        R                  " T Vs/ s H  n[	        U5      PM     sn6  5       5      $ s  snf )Nc              3  6   >#    U  H  nT" TU5      v   M     g 7fr   r   )r   r  r   value_fns     r   r   =Reduction._unroll_reduction_fn.<locals>.fn.<locals>.<genexpr>  s&      # UF++#s   )r"  reduce	itertoolsproductr   )r   r   
combine_fnr  rC  s   ` r   r   *Reduction._unroll_reduction_fn.<locals>.fn  sN    ##"+"3"3,<=,<q%(,<=# 
 >s   Ar  r  c                   > U Vs/ s H  n[         R                  " U5      PM     nnT" X5      [        R                  " T" U5      [        R
                  5      4$ s  snf r   )r   expandrq   
index_exprr  int64)r   r  r   flatten_indexr
  s      r   rC  0Reduction._unroll_reduction_fn.<locals>.value_fn  sO     4::6a%,,q/6:U+NN=#8%++F  ;s    Ac                   > T" U 5      S   $ r6  r   )r   r   s    r   r  0Reduction._unroll_reduction_fn.<locals>.<lambda>  s    E1r   )r   r  r   r   )r   r  r  r  r   r  )rs   r  r  guard_int_seqr  _fixed_indexerr  r  )r
  r  r  r  rH  rO  r   rC  s   ``  @@@@r   _unroll_reduction_fnReduction._unroll_reduction_fn  s     77++99:JK-nH
		 		 11* 112BCM
)3E*  .-HIr   c
                $  ^^^^^^ [         R                  R                  R                  [	        T5      5      mTS:X  a`  SU4S jjn
U
" S5      U
" S5      U
" S5      U
" S5      S.mTT;   d
   T S35       eSUUU4S jjn[
        R                  UUU[        U5      S9$ TS:X  a-  TS;   a	  SU4S	 jjnO	SUU4S
 jjn[
        R                  UTXS9$ [        T[        5      (       at  [        T5      [        R                  :  aW  [	        U5      S:w  d  [        UR                  5      (       a.  TS:w  a(  [
        R                  UTU R                  TTTU5      US9$ U R!                  UTUTUTTTU	5	      u  pSU4S jjnU" U5      nU["        R$                  :X  a  UnUS:X  a\  U	c   e[&        R(                  " [*        SS5         [-        U	5      u  nnSSS5        Wc   eWc   eU R/                  UTUTUTUUTU5
      $ US:  a  U R1                  UTUTUTTUUU	5
      nSn[        R2                  R4                  (       a&  [        U[6        5      (       a      SS jnU" U5      nU(       am  [        UR8                  [:        5      (       d   [        UR8                  5       5       eUR8                  R<                  S   Ul        TUl         UUl!        TUl"        U$ [6        R                  [;        UTTUTTUUS95      nU$ ! , (       d  f       GN5= f)za
Create a reduction node. May split the reduction to multiple layers to expose
more parallelism.
r   c                $  > T[         R                  :X  a  [        U 5      $ TR                  (       a0  [        U [        5      (       d   [        U 5      5       e[        U 5      $ [        U [        5      (       d   [        U 5      5       e[        U 5      $ r   )	r  r   rp  r   r   r   rr  r   r   )valr-  s    r   py_cnst!Reduction.create.<locals>.py_cnst2  sm    

*9$00%c=99D49D9 :%%c;77BcB7s8Or   rE   )r  r  r  r  z* not supported for zero-dimension tensors!c                8   > [         R                  " TT   T5      $ r   rq   rq  )r   r-  r  rtypes_to_initss    r   const_fn"Reduction.create.<locals>.const_fnH  s    ||ON$CYOOr   r  rJ  c                2   > [         R                  " ST5      $ r  r]  )r   r-  s    r   r   Reduction.create.<locals>.fnV  s    <<955r   c                r   > T Vs/ s H  n[         R                  R                  PM     nnT" X5      $ s  snf r   r   r7  r8  )r   r   reduction_indexr
  r  s      r   r   rb  [  s2    =M&N=Muww||=MO&N#E;; 'O   $4r  c                l   > [        T5      (       a  U $ U S:  a  [        U [        R                  5      $ U $ r6  )r   rM  rF   min_num_split)r  r.  s    r   _maybe_increase_split/Reduction.create.<locals>._maybe_increase_split  s2    /**qy5&"6"677r   r8  Nr>  Tc                L   U R                  5       n[        U5      S:w  a  g [        [        U5      5      nU[        R
                  R                  ;  a  g [        R
                  R                  U   n[        U[        5      (       d  g UR                  R                  5       c   eU$ r6  )r  r   nextiterrs   r  r  r   r  rT  ry  )cur_node
read_namesbufnamer  s       r   _find_split_reduction/Reduction.create.<locals>._find_split_reduction  s     "*!8!8!:J:!+#"4
#34Gagg&<&<<#''009C%c>::#88668DDDJr   r  )rY  r   r   zbool | float | int)r   r   r   rr   )r  r   r   r   )rn  r   r   zComputedBuffer | None)#rs   r  r  simplifyrn   ru  r  r   r   r    r   rF   unroll_reductions_thresholdrj   r   rU  r>  rZ   r  r   r   r  rO   !create_multilayer_existing_rangescreate_multilayerr*  mix_order_reductionr   rT  r  r  _split_size_original_inner_fn_original_ranges_original_reduction_ranges)r1  r  r-  r  r
  r  r  r  r  r/  rZ  r_  r   hintr  ri  r6  r7  r   split_reductionrq  r.  r^  s     ` ` ``             @@r   r  Reduction.create  s~   $ ''**33MBR4STa$ qz"1:
qz	O "_4 !""LM4P P ##!F|	 $   a!556 6
< < ##Y $  
 00O$v'I'IIv&!+vfkk/B/B%' ##11.	  $   nn

	 &e,
 ]222!NB;)))n.>E3V40
0 F )))'33388 $  QY'' C #O}}00ZY5O5O'*$ #8"< "/"6"6	BB O0012B />.B.B.S.STU.V+5=2390=M:J!!1-#-	
 
k FEs   L  
Lc           
        U S;   aL  [        U5      (       a  [        S5      $ [        U5      (       a  g[        R                  " U5      R
                  $ U S;   aL  [        U5      (       a  [        S5      $ [        U5      (       a  g[        R                  " U5      R                  $ [        U5      (       a  SOSn[        U5      (       a  SOSnUUUUUX"U4X"U4[        S5      U4S	.U    $ )
N)rM  r  z-infF)r  r  infTr   rE   )r  r  r  r  r  welford_reducer  online_softmax_reduce)r.   rr  r,   r  iinfor  rM  )r  r
  zeroones       r   default_accumulatorReduction.default_accumulator  s     ..e$$V}$!%(({{5)---..e$$U|#!%(({{5)---(//uQ&u--d1#40 $D1&+FmT%:	
 	 		r   c                :    U S:X  a  g[         R                  X5      $ )Nr  r   )r  r  r  r
  s     r   default_valueReduction.default_value  s!     --,,^CCr   c                    U S:X  a  U$ U S::  a*  US::  a$  U[         R                  :X  a  [         R                  $ U S::  a*  US::  a$  U[         R                  :X  a  [         R                  $ U$ )Nr8     i      )rZ   r,  
OUTER_TINY)r  r  r  s      r   _multilayer_second_step_hint&Reduction._multilayer_second_step_hint  sg     B;!!C<J#-.MDWDW2W +++TMc!-"5"55 +++r   c                   Uc  g[         R                  R                  R                  UR	                  5       U5      (       d  gUR                  5          [        U5        UR                  5       n[        USS 5       H8  u  pE[         R                  R                  R                  US5      (       d  M6  Us  $    g! [         a     gf = f)z
If we are reducing over the full tensor, and it is non-dense in the last dimension,
reindex so we reduce over the dense dimension. initially just handle complete
reduction case
Nr8  rE   )
rs   r  r  rO  r  r  rV  r  r:  r   )r1  r.  r/  r\  r   r   s         r   $check_for_split_dense_dim_reindexing.Reduction.check_for_split_dense_dim_reindexing%  s     ww77  "O
 
 	!*- '')gcrl+DAww771== ,  # 		s   B: :
CCc                &  ^^^^^
^ U R                  TU5      n[        R                  UT/U5      m[        R                  R
                  R                  [        R                  " [        TU5      S5      5      (       + m
      SUUUU
UU4S jjn	U	$ )Nr   c                "  >^^ Uu  nU Gt mnTU-  U-   mSUU
UU4S jjnT(       ac  [        T5      n[        R                  " [        R                  " TU5      [        R                  " TU5      5      n[        R                  " XST	5      $ U" 5       $ )Nc                 $   > T" TT" T /5      5      $ r   r   )r  r  	new_indexr   s   r   bodyCReduction._multilayer_wrap_loader.<locals>.wrapper_fn.<locals>.bodya  s    i');<<r   )r   rr   )rd   rq   r  rM  masked)r   re  reduction_blockr  index_dtyper  r  r  
block_sizedefaultr  	need_maskr.  r   s         @@r   
wrapper_fn5Reduction._multilayer_wrap_loader.<locals>.wrapper_fnZ  s     "1_*/'Y ?2_DG= = -o>vvNN7K8NN?K@ zz$g66vr   )r   Sequence[Symbol]re  r  r   rr   )
r  Viewdynamic_reshape_indexerrs   r  r  r  r   r5  rB   )r1  r  r  r.  r  r  r  r/  dense_indexr  r  r   s    ` ` ``   @@r   _multilayer_wrap_loader!Reduction._multilayer_wrap_loaderE  s     >>Z
 ../
 ((>>HHS%0!4
 
		#	6F		 	( r   c                   ^^^ [        S T 5       5      (       d   ST< 35       e[        R                  U[        U5      [        U5      -   5      m      SUUU4S jjnU$ )Nc              3  *   #    U  H	  oS :H  v   M     g7f)rE   Nr   r   r3  s     r   r   DReduction._multilayer_wrap_loader_existing_ranges.<locals>.<genexpr>y  s     3?a6?   z8Only enabled for numel_hint == 1, found original_ranges=c           	        > U S [        T5       nU [        T5      S  nT" UT" [        U5      [        U5      -   5      5      $ r   )r   r   )merged_indexnew_reduction_indexoriginal_idxr  r  original_rangesr   s       r   r  EReduction._multilayer_wrap_loader_existing_ranges.<locals>.wrapper_fn  sQ     ((>#o*>?L$S%9%;<Ii(51D+EEF r   )r  r  r  r  r   rr   )r   r  r  r   )r1  r  r  original_reduction_rangesr6  r7  r  r   s    ``    @r   '_multilayer_wrap_loader_existing_ranges1Reduction._multilayer_wrap_loader_existing_rangesp  s     3?333 	
G6HI	
3 ..%uZ'85AU;V'V
		(		!/		 		 		 r   c                  ^ U[         R                  [         R                  4;  a  UO[         R                  n[        R                  UUUUUUU	U5      nUR                  5         UR                  5       m      SU4S jjn[        R                  R                  R                  [        U5      5      nU R                  XU5      nXWS[        U5       :X  d   e[        R                  [	        UUUUU[        U5      S U	UUS95      $ )I
Break a large reduction up into multiple smaller reductions
recursively
c                   > T" / U QUQ5      $ r   r   )r   re  intermediate_loaders     r   intermediate_fn;Reduction.create_multilayer_helper.<locals>.intermediate_fn  s     ''A'A'ABBr   Nr  )r   r  re  r  r   rr   )r  float16bfloat16rr  r  r  r  r-  rs   r  r  r  rn   r  r   r   )r1  r  r-  r  r  r  r  r6  r7  r  r  r  intermediate_dtypeintermediater  r  r  s                   @r   create_multilayer_helper"Reduction.create_multilayer_helper  s$   0  ??  	
 !'' 	
 	*668	C%	C8J	C	C
 WW%%77o8VW
99~
 -Cs?/C"DDDD(&!+C,@,B!C-#-	
 	
r   c                    [        U5      n[        XS-
  -   U5      nU R                  Xr5      nU R                  UUUUUUU
5      nU R	                  UUUUUU/ UQUPU/UUU	5      $ )r  rE   )rn   rA   r  r  r  )r1  r  r-  r  r
  r  r  r  r  r  r/  r.  r  r  r  s                  r   rv  Reduction.create_multilayer  s    & ((89o;UC
##N>00

 ++feL
 	
r   c                j    U R                  UUUUU5      nU R                  UUUUUU/ UQUQUU	SU
5      $ )r  r8  )r  r  )r1  r  r-  r  r
  r  r  r6  r7  r  r  r  s               r   ru  +Reduction.create_multilayer_existing_ranges  sc    $ @@% 

 ++%+o+
+ 
 	
r   r   r  r  ri  r  r  )
r  r  r  r  r  r  r  r  r   r   r  r   zSequence[Sequence[Expr]]r  r   )r  r  r-  r  r  r  r
  zCallable[_P, OpsValue]r  r  r  r  r  zReductionType | Literal['scan']r.  r   r/  IRNode | Noner   tuple[ReductionHint, _IntLike])
r
  z<Callable[[Sequence[_IntLike], Sequence[_IntLike]], OpsValue]r  r  r  r   r  r  r   z(Callable[[Sequence[_IntLike]], OpsValue])r  r  r-  r  r  r  r
  r	  r  r  r  r  r  rV   r  rZ   r/  r  r   r   r  r   r
  r  r   _NumLike | Sequence[_NumLike])r  r   r  r   r  rZ   r   rZ   )r.  r   r/  r  r   r  )r  Callable[..., OpsValue]r  r  r.  r   r  r   r  r   r  r  r/  r  r   Callable[..., object])r  4Callable[[Sequence[Expr], Sequence[Expr]], OpsValue]r  r  r  r  r6  Sequence[Integer]r7  r  r   z@Callable[[Sequence[sympy.Expr], Sequence[sympy.Expr]], OpsValue])r  r  r-  r  r  r  r  r	  r  r  r  r  r6  
list[Expr]r7  list[Integer]r  rV   r  r   r  rZ   r   r   )r  r  r-  r  r  r  r
  r	  r  r  r  r  r  rV   r  r   r  rZ   r/  r  r   r   )r  r  r-  r  r  r  r
  r	  r  r  r  r  r6  r  r7  r  r  rV   r  rZ   r   r   )"r   r   r   r   r   r!  rk  r^   rv  r|  ry  r  r  rA  r  r  r  r>  rU  rl  rZ   r  r  r  r  r  r  r  r  r  rv  ru  r   __classcell__r  s   @r   r  r    s   ((!!!!N HK(
 
 )

%#LL 3L 	L
 )L 
L=


  %)jjj j )	j
 #j -j 8j j "j 
(j jX )N),) ) 	)
 
2) )V  )6(=(=$(NN N 	N
 %N N )N &N &N "N 
N N` $/	& > DD$/D	&D D %(:G	   &4A	 >  %)('( -( "	(
 ( ( /( "( 
( (T D ( $2	
 & 0 
J 8 =
=
 =
 	=

 '=
 (=
 $2=
 =
 ,=
 &=
 =
 &=
 
=
 =
~  %)+
+
 +
 	+

 %+
 +
 )+
 &+
 +
 &+
 "+
 
+
 +
Z $
$
 $
 	$

 %$
 ($
 $2$
 "$
 ,$
 &$
 &$
 
$
 $
r   r  c                    ^ ^^ SUU U4S jjnU$ )1A closure containing math to read a given elementc                   > Tb  [        U 5      [        T5      :X  d   e[        U 5      [        T5      :X  d   eTn[        U TT5       H  u  p#nUS:w  d  M  XU-  -   nM     U$ r6  )r   r   )r   resultr   stszrX  r  r	  s        r   r  _fixed_indexer.<locals>.indexer*  sj    !c%jCK&???5zSY&&&ufd3KCRQw(* 4 r   )r   r  r   r   r   )r  r	  rX  r  s   ``` r   rT  rT  #  s      Nr   INNER_FN_TYc                  z   ^  \ rS rSr% S\S'                     SU 4S jjr          SS jrSrU =r$ )	MultiOutputReductioni9  r   output_indexc
                   >^ [        T5      (       a  T4m[        T5      S:X  a  TS   n
O      SU4S jjn
[        TU ]  UUU
UUUUUS9  Xl        g )NrE   r   c                4   >^ ^ [        U U4S jT 5       5      $ )Nc              3  4   >#    U  H  o" TT5      v   M     g 7fr   r   )r   r   r   reduction_idxs     r   r   @MultiOutputReduction.__init__.<locals>.loader.<locals>.<genexpr>S  s     HiR]33i   )r   )r   r  	inner_fnss   ``r   r  -MultiOutputReduction.__init__.<locals>.loaderP  s     HiHHHr   r  )r   r  r  r  r   ztuple[OpsValue, ...])callabler   r  __init__r  )r  r  r-  r  r  r  r  r  r  r  r  r  s      `       r   r  MultiOutputReduction.__init__<  s     I"I y>Qq\FI#I4BI%I
 	-)) 	 		
 )r   c           	     N   [         R                  " U R                  U R                  U R                  U R                  X45      5      n[        U[        [        45      (       d   [        U5      5       eXPR                     n[         R                  " U=(       d    SU" U5      U5      $ r  )rq   r  r
  r  r  r
  r   r   r   r   r  r  )r  r  r  r  r  r   r  s          r   r  $MultiOutputReduction.store_reductiona  s     JJNNMM$/	
 &5$-00>$v,>0(()"";#;)WT]ERRr   )r  )r  r  r-  r  r  z#INNER_FN_TY | Sequence[INNER_FN_TY]r  r  r  r  r  rV   r  r  r  rZ   r  r   )
r  r  r  r  r  r  r  r  r   r   )	r   r   r   r   r   r  r  r   r  r  s   @r   r  r  9  s    #)#) #) 7	#)
 "#) ,#) &#) #) &#) #)JSS 3S 	S
 )S 
S Sr   r  c                  j    \ rS rSr\\R                  S4                   SS jj5       rSrg)OnlineSoftmaxReductionis  Nc
           	        ^^^^^^^ [        UUUUUUU4S j[        U5       5       5      n
U
 H  nUR                  5         M     U
$ )z.
Create the reduction disregarding splitting.
c              3  p   >#    U  H+  n[         R                  [        TTTTTS TTU5	      5      v   M-     g7f)r  N)r   r  r  )	r   
output_idxr  r-  r
  r  r  r  r  s	     r   r   0OnlineSoftmaxReduction.create.<locals>.<genexpr>  sO      
 0
 $$+"
  0s   36)r   r   r  )r1  r  r-  r  r
  r  r  
num_outputr  r/  resultsr  s    `````` `   r   r  OnlineSoftmaxReduction.createt  sB       
 
 $J/
 
  AIIK r   r   )r  r  r-  r  r  r  r
  r	  r  r  r  r  r  r   r  rZ   r/  r  r   Sequence[TensorBox])	r   r   r   r   rl  rZ   r  r  r   r   r   r   r  r  s  s     )6(=(=$(!! ! 	!
 %! ! )! ! &! "! 
! !r   r  c                      \ rS rSr\\R                  4               SS jj5       r\      SS j5       r	\                  S	S j5       r
Srg)
WelfordReductioni  c                  ^^^^^ US;   d   e[         R                  R                  (       a   S5       e[        R                  R
                  R                  [        T5      5      nSUUU4S jjn	US:X  a  U	" S5      n
U	" S5      nU	" S5      nXU4$ US:X  aD      SUUUU4S jjmUS:X  a  T" US   5      U	" S5      U	" S5      4$ [        U4S jU 5       5      $ [        R                  TTTUS   TTUUS	9u  pU[        R                  :X  a  UnUS:  a  U R                  TTUTTUUU5      $ [        S
5       Vs/ s H)  n[        R!                  [#        TTUTTUTUU5	      5      PM+     nnU H  nUR%                  5         M     U$ s  snf )N)r  r  zGwelford reduction usage is explicitly disabled, please check you configc                V   >^  SUU 4S jjn[         R                  TTU[        T5      S9$ )Nc                2   > [         R                  " TT5      $ r   r]  )r   r
  rY  s    r   r
  8WelfordReduction.create.<locals>.const.<locals>.inner_fn  s    || r   r  r   r  r   rr   ru  r  r   )rY  r
  r  r
  r  s   ` r   const&WelfordReduction.create.<locals>.const  s7      ##!F|	 $  r   r   rE   c                V   >^  SU U4S jjn[         R                  TTU[        T5      S9$ )Nc                r   > T Vs/ s H  n[         R                  R                  PM     nnT" X5      $ s  snf r   rd  )r   r   re  r  r  s      r   r
  7WelfordReduction.create.<locals>.copy.<locals>.inner_fn  s2    =M&N=Muww||=MO&N!#77 'Orf  r  r  r  )r  r
  r  r
  r  r  s   ` r   copy%WelfordReduction.create.<locals>.copy  s7    8 8 !''!%<	 (  r   r  c              3  4   >#    U  H  nT" U5      v   M     g 7fr   r   )r   r   r  s     r   r   *WelfordReduction.create.<locals>.<genexpr>  s     :	"T"XX	r  )r  r.  r   )rY  r   r   r   )r  r  r   r   )rF   mtiadisable_welford_reductionrs   r  r  rs  rn   r   r  r>  rZ   r  rv  r   r   r  r  r  )r1  r  r
  r  r  r  r  r  r.  r  meanm2weightr|  r  r  r  r  r  s    `` ``            @r   r  WelfordReduction.create  s    !FFFF;;88 	
U	
8 ''**33MBR4ST	 	 a8DqB1XFV##aL  !11IaL)58U1X==:	:::&  **aL)+ + 	
 ]222!N19(( 	 	2 $Ah
 '
  $""
 ' 	 
  AIIK %
s   <0F	c                    g)N)r   r   r   r   r  s     r   r  WelfordReduction.default_value	  s     r   c	                  ^ ^^^^^^ [        T5      m[        R                  R                  R	                  [
        R                  " [        TT5      S5      5      (       + n	U	(       aB  US:w  a<          S
U4S jjn
T R                  UTUS   [        U
SS9[        U
SS94UTSTUS9$ [        TTS-
  -   T5      m[        R                  UT[        UU UUU4S jU 5       5      / UQTPT/UU5      nU H  nUR                  5         M             SS jm[        R                  R                  R                  [        U5      5      nT R!                  TX5      n[        R                  UT[        U4S	 jU 5       5      UT/SU5      $ )r  r   r  c                2   > [         R                  " UT5      $ r   r]  )r   r  r  r
  s      r   rq  4WelfordReduction.create_multilayer.<locals>.constant5	  s     ||E511r   r  rE   )r  r
  r  r  r  r  r  r  c           
   3  L   >#    U  H  nTR                  UTTTTS S9v   M     g7f)r   r  N)r  )r   r  r  r1  r.  r  r  s     r   r   5WelfordReduction.create_multilayer.<locals>.<genexpr>M	  s>      
 (F ++$# ,  (s   !$c                    U" / U QUQ5      $ r   r   )r   re  r  s      r   intermediate_loader_fnBWelfordReduction.create_multilayer.<locals>.intermediate_loader_fn`	  s    
 4E4O455r   c              3  T   >#    U  H  n[        TUR                  5       S 9v   M     g7f))r  N)r   r-  )r   r   r  s     r   r   r  p	  s&      &A .q}}G&r  )r   r  r  r  r  r   r   rr   )r   r  re  r  r  r  r   rr   )rn   rs   r  r  r  r   r5  rB   rv  r   rA   r  r  r   r  r  r  )r1  r  r
  r  r  r  r  r  r  r  rq  intermediatesr   r  r  r  r.  s   ` `  ` `      @@@r   rv  "WelfordReduction.create_multilayer	  s     ((89((>>HHS%0!4
 
	 +<<2#24B2KN22
 ((aLHA.HA.
 !10- )   o;UC
(// 
 
 (
 
 feL#
& AIIK 	6!	6+	6 9	6 		6 WW%%77f8MN
99:
  && &  G
 	
r   r   N)r  r  r
  r  r  Sequence[Callable[..., Any]]r  r  r  r  r  rV   r  rZ   r   r  r  )r  r  r
  r  r  r  r  r  r  r  r  rV   r  r   r  rZ   r   r  )r   r   r   r   rl  rZ   r  r  r  r  rv  r   r   r   r   r  r    s    )6(=(=yy y 0	y
 y (y &y &y 
y yv $/	& 
 \
\
 \
 0	\

 \
 (\
 &\
 \
 &\
 
\
 \
r   r  c                    ^  \ rS rSr% S\S'   S\S'   S\S'   S\S'   S	\S
'   S\S'   S\S'   S\S'   \" S 5      S S!U 4S jjj5       rS"U 4S jjr          S#S jrS$S jr	S%S jr
S%S jrS%S jrS&S jrS'S jrS S!S jjr\\R$                  4SS.                   S(S jjj5       r\                  S)S j5       rSrU =r$ )*Scani|	  r  scan_rangesr  =Callable[[tuple[Any, ...], tuple[Any, ...]], tuple[Any, ...]]rH  zFCallable[[Sequence[_IntLike], Sequence[_IntLike]], Sequence[_IntLike]]r   rZ   r  r   r  tuple[torch.dtype, ...]dtypestuple[Callable[..., Any], ...]r  c                   >^ [         TU ]  T5      [        5       R                  " U4S jU R                   5       6 -  [        5       R                  " U4S jU R
                   5       6 -  $ )Nc              3  <   >#    U  H  n[        UT5      v   M     g 7fr   r&   r  s     r   r   ,Scan.get_free_symbol_uses.<locals>.<genexpr>	       O>N"1m44>Nr  c              3  <   >#    U  H  n[        UT5      v   M     g 7fr   r&   r  s     r   r   r%  	       Hi"1m44ir  )r  rv  r>   r  r  r  r  s    `r   rv  Scan.get_free_symbol_uses	  s]     G(7l  Od>N>NO l  HdiiH		
r   c                   > [        U R                  5      [        U R                  5      -   [        U R                  5      :X  d   e[        TU ]  5         g r   )r   r  r  r  r  r  r  r  s    r   r  Scan.__post_init__	  =    4;;#d&6&6"773tyy>IIIr   c                "  ^ U R                  X45      m[        U4S jU R                   5       5      n[        R                  " U R
                  U R                  U5      n[        R                  " U=(       d    SU" T5      X`R                     5      $ )Nc              3  2   >#    U  H  o" T5      v   M     g 7fr   r   r   r
  r   s     r   r   'Scan.store_reduction.<locals>.<genexpr>	       D^x}}^   r  )	r   r   r  rq   r  r!  rH  r  r  )r  r  r  r  	scan_varsr   r  r   s          @r   r  Scan.store_reduction	  sk     ll4+DT^^DD$++t?yy$9gclF;L;L4M
 	
r   c                    g)Ncustomr   r  s    r   ry  Scan.get_reduction_type	  s    r   c                    U R                   $ r   )r  r  s    r   r|  Scan.get_reduction_size	  r  r   c                    U R                   $ r   r  r  s    r   r  Scan.get_size	      yyr   c                    U R                   $ r   r  r  s    r   r-  Scan.get_pointwise_size	  r'  r   c                X    [        U R                  5      [        U R                  5      -   $ r   )r   r  r  r  s    r   r  Scan.index_length	  !    4;;#d&6&6"777r   c                    U R                  U R                  5      nU R                  U R                  [        R                  5      nU R                  X5      nU4$ r   )r:  r  r  rD   r  r   r  r   r  r   s       r   rA  Scan.inner_fn_args	  C    DKK(T--t}}=ll5)vr   c                    U R                  U R                  5      nU R                  U R                  [        R                  5      nU R                  X#5      n[        U R                  XAS9$ rR  )r:  r  r  rD   r  r   rN   r
  r  rp  r   r  r   s        r   r  Scan.inner_fn_free_symbols	  M    DKK(T--t}}=ll5)#DMM3TTr   T)can_fallback_to_atenc                 ^^^ / US T QUTS-   S  QmUT   /m[         R                  R                  U[        R                  5      (       d  S /[        U5      -  $ [        U5      S:  aB  [         R                  R                  U[        R                  5      (       d  S /[        U5      -  $ [         R                  R                  n
U
R                  [        T5      5      n[        U5      [        U5      :X  d   eU
R                  [        R                  " US5      5      (       a=  [        [        U5      5       Vs/ s H  n[        R                  UX,   X<   US9PM     sn$ U R!                  UUS   US   TTTUUS9u  p}["        nUS:  at  [$        R&                  R(                  S L =(       d    [*        =(       a	    [,        S:  =(       a    [        U5      S:H  nU(       d  U(       a  S /[        U5      -  $ SnO[.        nSUUU4S jjn[        [        U5      5       Vs/ s H/  n[0        R                  U" S	UX,   UX<   UUTTUUUUS.U	D65      PM1     nnU H  nUR3                  5         M     U$ s  snf s  snf )
NrE   r  r   )r  r
  r
  axispointwise_rangesr  rH  
scan_numelz3.3.0c                   > [        U5      [        T5      :X  d   e[        U 5      [        T5      :X  d   e/ U S T QUQU TS  Q$ r   r   )r   
scan_indexrN  rO  r  s     r   r   Scan.create.<locals>.reindex
  S    z?c+&6666u:%5!6666>U5D\>J>tu>>r   )r  r
  r!  r
  r  r  r  r  rH  r   r  r  )r   r  rS  r  r   r  r   )rs   r  r  rH   SCANr   TUPLE_REDUCTIONr  rs  rn   r  r   r=  r   ru  r  r>  r  r  versionhip
has_tritontriton_version	SplitScanr   r  )r1  r  r!  r  r  rN  rH  r  rL  r   r  rP  r  r>  	scan_typesupports_splitr   r  r  rO  r  s        `             @@r   r  Scan.create	  s    =T%4[<4q
+;<Dzlww""6>+>+>??6CK''v;?177#6#6N22$
 $
 6CK''77##&&}['AB
6{c)n,,, ))%((:q*ABB %*#f+$6 %7L   ! .&4	 !  %7  &)^^)q\-#!! &4 	&
" 	> !!T)Wj.V^w=V% v;!#  "' 6CK//!"J%		? 	?. !&c&k 2%
$ !3#  ! .!&4'+ +)##1!- " !3% 	 
* FNN  AP
s   ;$I3 6I8c	                L   ^^ SUU4S jjn	[         R                  UUUU	UUSUS9$ )Nc                ,   > T" / U S T QUQU TS  Q5      $ r   r   )r   r  rN  r
  s     r   r  #Scan.num_splits.<locals>.wrapper_fn7
  s*    Fc%4jF=F3tu:FGGr   r  )r  r-  r  r
  r  r  r  r.  )r   r  r  r  r   rr   )r  r>  )
r1  r  r
  r
  rN  rO  r  rH  rP  r  s
      ``     r   r>  Scan.num_splits*
  sA    	H 	H ###(!& $ 	
 		
r   r   r  ri  r  )
r  r  r  z%Callable[[Sequence[_IntLike]], Never]r  r  r4  r  r   r   r  r  r  rh  )r  r  r!  r   r  z+tuple[Callable[[Sequence[Expr]], Any], ...]r  r  rN  r   rH  r  r  rZ   rL  r   r   r   r   Sequence[TensorBox | None])r  r  r
  r  r
  r  rN  r   rO  r  r  r  rH  r  rP  r   r   r  )r   r   r   r   r   r^   rv  r  r  ry  r|  r  r-  r  rA  r  rl  rZ   r  r  r>  r   r  r  s   @r   r  r  |	  s   
MMSS!!##-- F#
 
 $
 

 7
 	

 $
 

 8U  )6(=(=` &*`` (` ?	`
 ` ` R` &` #` ` 
$` `D 

 
 7	

 
 (
 #
 R
 
 
(
 
r   r  c                      \ rS rSrSrg)r\  iG
  r   N)r   r   r   r   r   r   r   r   r\  r\  G
  s    r   r\  c                  n  ^  \ rS rSr% S\S'   S\S'   S\S'   S\S'   S	\S
'   S\S'   S\S'   S\S'   S\S'   \" S 5      SSU 4S jjj5       rS U 4S jjr          S!S jrS"S jr	S#S jr
S#S jrS#S jrS$S jrS%S jrSSS jjr\\R$                  4                   S&S jj5       rSrU =r$ )'SortiL
  r  sort_rangesr  z:Callable[[Sequence[Expr], Sequence[Expr]], Sequence[Expr]]r   rZ   r  r   r  r   r!  r"  r  r   stable
descendingc                   >^ [         TU ]  T5      [        5       R                  " U4S jU R                   5       6 -  [        5       R                  " U4S jU R
                   5       6 -  $ )Nc              3  <   >#    U  H  n[        UT5      v   M     g 7fr   r&   r  s     r   r   ,Sort.get_free_symbol_uses.<locals>.<genexpr>b
  r&  r  c              3  <   >#    U  H  n[        UT5      v   M     g 7fr   r&   r  s     r   r   rm  e
  r(  r  )r  rv  r>   r  rh  r  r  s    `r   rv  Sort.get_free_symbol_uses]
  s]     G(7l  Od>N>NO l  HdiiH		
r   c                   > [        U R                  5      [        U R                  5      -   [        U R                  5      :X  d   e[        TU ]  5         g r   )r   r  rh  r  r  r  r+  s    r   r  Sort.__post_init__i
  r-  r   c                6  ^ U R                  X45      m[        U4S jU R                   5       5      n[        R                  " U R
                  XPR                  U R                  5      n[        R                  " U=(       d    SU" T5      X`R                     5      $ )Nc              3  2   >#    U  H  o" T5      v   M     g 7fr   r   r0  s     r   r   'Sort.store_reduction.<locals>.<genexpr>u
  r2  r3  r  )
r   r   r  rq   sortr!  ri  rj  r  r  )r  r  r  r  r  r   r  r   s          @r   r  Sort.store_reductionm
  so     ll40DT^^DD$++v{{DOOLyy$9gclF;L;L4M
 	
r   c                    g)Nru  r   r  s    r   ry  Sort.get_reduction_type{
  s    r   c                    U R                   $ r   )rh  r  s    r   r|  Sort.get_reduction_size~
  r  r   c                    U R                   $ r   r<  r  s    r   r  Sort.get_size
  r>  r   c                    U R                   $ r   r  r  s    r   r-  Sort.get_pointwise_size
  r'  r   c                X    [        U R                  5      [        U R                  5      -   $ r   )r   r  rh  r  s    r   r  Sort.index_length
  rC  r   c                    U R                  U R                  5      nU R                  U R                  [        R                  5      nU R                  X5      nU4$ r   )r:  r  rh  rD   r  r   rE  s       r   rA  Sort.inner_fn_args
  rG  r   c                    U R                  U R                  5      nU R                  U R                  [        R                  5      nU R                  X#5      n[        U R                  XAS9$ rR  )r:  r  rh  rD   r  r   rN   r
  rI  s        r   r  Sort.inner_fn_free_symbols
  rK  r   c	                T  ^^^ / US T QUTS-   S  QmUT   /m[         R                  R                  U[        R                  5      (       d  S /[        U5      -  $ [         R                  R                  n
U
R                  [        T5      5      n[        R                  R                  (       a  [        R                  R                  nOHSn[        R                  R                  =(       a%    U
R                  [        R                  " X5      5      nU(       d  S /[        U5      -  $ [        U5      [        U5      :X  d   eU
R                  [        R                  " US5      5      (       a=  [!        [        U5      5       Vs/ s H  n["        R%                  UX.   X>   US9PM     sn$ SUUU4S jjn[!        [        U5      5       Vs/ s H3  n[&        R%                  [)        SUX.   UX>   UUTTUUUUUS.U	D65      PM5     nnU H  nUR+                  5         M     U$ s  snf s  snf )NrE   r  r  c                   > [        U5      [        T5      :X  d   e[        U 5      [        T5      :X  d   e/ U S T QUQU TS  Q$ r   rR  )r   
sort_indexrN  rO  rh  s     r   r   Sort.create.<locals>.reindex
  rU  r   )r  r
  r!  r
  r  r  r  rh  r   r  r  ri  rj  )r   r  r  r  r   r  r   )rs   r  r  rH   SORTr   r  rs  rn   rF   r*  decompose_sort_opspersistent_reductionsr  r   r=  r   ru  r  r   rg  r  )r1  r  r!  r  r  rN  ri  rj  r  r   r  
sort_numelis_persistent_kernel
max_rblockr  r   r  r  rO  rh  s        `            @@r   r  Sort.create
  s!    =T%4[<4q
+;<Dzlww""6>+>+>??6CK''77##&&}['AB
 ==++#)==#F#F J33 U22588J3ST ! $6CK''6{c)n,,, ))%((:q*ABB %*#f+$6 %7L   ! .&4	 !  %7 	? 	?0 !&c&k 2'
& !3%  ! .!&4'+ +##1!-!) $ !3' 	 
, FNN  Q
s   $H 	:H%r   r  ri  r  )
r  r  r  r  r  r  r  r  r   r   r  r  r  r  )r  r  r!  r   r  z'tuple[Callable[[list[Expr]], Any], ...]r  r  rN  r   ri  r   rj  r   r  rZ   r   r   r   rd  )r   r   r   r   r   r^   rv  r  r  ry  r|  r  r-  r  rA  r  rl  rZ   r  r  r   r  r  s   @r   rg  rg  L
  sC    
GG!!##--L F#	
 	
 $	
 

 2
 	

 '
 

 8U  )6(=(=QQ (Q ;	Q
 Q Q Q Q &Q Q 
$Q Qr   rg  c                :     [        U SS9  g! [         a     gf = f)NFfreezeT)rV  r  r   s    r   r  r  
  s&    a. s   
 
c                     [        U SS9u  pUR                  5       (       a  UR                  5         UR                  5       $ ! [         a     gf = fNFr  )rV  should_pad_stridespad_stridesis_contiguousr  )r   _bufferrU  s      r    is_contiguous_storage_and_layoutr  
  sS    /%@ $$&& ##%% s   A A 
AAc           	        [        U [        5      (       a  [        U R                  UUUUUS9$ [        U [        5      (       a5  [        U R                  UUUUUS9u  pgX R                  R                  5       4$ [        U [        5      (       a  U(       as  U(       a6  U R                  5         U R                  5       R                  5       (       d   eO6Ub  U R                  X4S9  O#Ub  U R                  XTS9  OU R                  5         [	        U 5      U R                  5       4$ [        U [        5      (       a#  [        U R                  US9u  pXR                  4$ [        e)z
Try to simplify x into a StorageBox and a Layout.

allow_padding only affect how we apply stride_order. When allow_padding
is True, we have the freedom to add padding when applying the stride_order.
r  want_contiguousstride_orderrU  rc  rU  r  )r   r   rV  rT  rx  r  ry  rQ  r  rV  rd  r  rZ  rU  r  )	r   r  r  r  rU  rc  r   rU  buffers	            r   rV  rV  
  sJ    !Y$FF+%''
 	
 !Z  )FF+%''
	 &&##%%%!V!||~335555)11  2  *22! 3  !!}alln,,!_%% *FF
	 xx
r   c                ^     [        U SS9u  p#UR                  U5      $ ! [         a     gf = fr  )rV  is_stride_orderedr  )r   r  r  rU  s       r   "is_stride_order_storage_and_layoutr  ;  s8    /%@''55 s    
,,c                   [        U [        [        45      (       a  [        U R                  5      $ [        U [
        5      (       a}  U R                  n[        R                  R                  R                  UR                  [        UR                  5      -  [        5      (       + n[        U R                  5      =(       d    U$ [        U [        5      (       a+  U R!                  5       [        R                  R"                  ;   $ gr'  )r   r   rx  is_unalignedrT  rZ  rU  rs   r  r  statically_known_multiple_ofrX  re   r
  rg   ry  rz  unaligned_buffers)r   rU  has_unaligned_layouts      r   r  r  E  s    $J/00DII&&$((#$77#3#3#P#PMMN6<<88/$
  
 DII&>*>>$}}!''";";;; r   c                     \ rS rSr% S\S'   \" S 5      SSS jj5       rSS jrSS jrSS jr	\
S S j5       rS!S	 jrS"S
 jrS#S jrS$S jrS%S jrS&S jrS'S jrS(S jrS)S jrS*S jrS'S jrS'S jrS+S jrS,S jrS-S jrS.S jrSrg)/rv  iW  r   rT  c                8    U R                   R                  U5      $ r   rT  rv  ru  s     r   rv  BaseView.get_free_symbol_uses[  s    yy--m<<r   c                    [        SU  35      e)Nzmake_reindexer NYI on r  r  s    r   make_reindexerBaseView.make_reindexer_  s    !$:4&"ABBr   c                p   ^^ U R                   R                  5       mU R                  5       mSUU4S jjnU$ )Nc                    > T" T" U 5      5      $ r   r   r   innerr   s    r   r  &BaseView.make_indexer.<locals>.indexerf      &&r   )r   r  r   r   )rT  r1  r  )r  r  r  r   s     @@r   r1  BaseView.make_indexerb  s4    		&&(%%'	' 	' r   c                p   ^^ U R                   R                  5       mU R                  5       mSUU4S jjnU$ )Nc                    > T" T" U 5      5      $ r   r   r  s    r   r  $BaseView.make_loader.<locals>.loadero  r  r   r  )rT  r-  r  )r  r  r  r   s     @@r   r-  BaseView.make_loaderk  s4    		%%'%%'	' 	' r   c                6    U R                   R                  5       $ r   )rT  r  r  s    r   r
  BaseView.dtypet  s    yy""$$r   c                6    U R                   R                  5       $ r   rT  r  r  s    r   r  BaseView.get_layoutx      yy##%%r   c                6    U R                   R                  5       $ r   rT  r  r  s    r   r  BaseView.get_device{  r  r   c                    g r   r   r  s    r   r  BaseView.get_origin_node~  r  r   c                6    U R                   R                  5       $ r   rT  rz  r  s    r   rz  BaseView.get_name      yy!!##r   c                "    U R                  5       $ r   r  r  s    r   r-  BaseView.get_pointwise_size      }}r   c                8    U R                   R                  U5      $ r   rT  rH  rF  s     r   rH  BaseView.mark_reuse      yy##E**r   c                6    U R                   R                  5       $ r   rT  r(  r  s    r   r(  BaseView.has_exceeded_max_reads      yy//11r   c                6    U R                   R                  5       $ r   rT  r  r  s    r   r  BaseView.realize      yy  ""r   c                8    U R                   R                  5         g r   rT  rL  r  s    r   rL  BaseView.realize_hint  s    		 r   c                6    U R                   R                  5       $ r   rT  rr  r  s    r   rr  BaseView.get_storage_numel      yy**,,r   c                6    U R                   R                  5       $ r   rT  r  r  s    r   r  BaseView.is_extern      yy""$$r   c                    [        U R                  [        5      (       d   [        U R                  5      5       eU R                  R	                  5       $ r   )r   rT  rv  r   is_module_bufferr  s    r   r  BaseView.is_module_buffer  s9    $))X..?TYY?.yy))++r   c                6    U R                   R                  5       $ r   rT  r  r  s    r   r  BaseView.get_read_names      yy''))r   c                    [         R                  " [        SS5         [        U R	                  5       U R                  5       5      R                  sS S S 5        $ ! , (       d  f       g = fr=  )r   r   r  rP   r-  r  rk  r  s    r   r  BaseView.get_reads  sD    \\.*:DA&  " e	 BAAs   2A
A'c                z    U n[        U[        5      (       a#  UR                  n[        U[        5      (       a  M#  U$ r   )r   rv  rT  )r  r   s     r   rw  BaseView.unwrap_view  s1    H%%A H%%r   c                    U R                  5       n[        R                  " [        SU5      " U5      n[	        UU R                  5       UU R                  5       S9$ r  )r-  r   r   r  ru  r  r  r  s      r   r  BaseView.constant_to_device  sN    !!#n.?HP.."==?	
 	
r   r   Nr  ri  r   *Callable[[Sequence[Expr]], Sequence[Expr]]r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  )r   r   r   r   r   r^   rv  r  r1  r-  r  r
  r  r  r  rz  r-  rH  r(  r  rL  rr  r  r  r  r  rw  r  r   r   r   r   rv  rv  W  s    
LJ'= (=C % %&&$+2#!-%,*	
r   rv  c                  `    \ rS rSr% S\S'   \S
S j5       r\SS j5       rSS jr	  SS jr
Srg	)r   i  r  r  c                   [         R                  R                  nU Vs/ s H  n[        R                  " U5      PM     nnU R                  5       nS/[        U5      [        U5      -
  -  [        U5      -   n[        U5      [        U5      :X  d   e[        [        U5      5       H  nX   S:X  a  XE   c   eXE   X'   M  XE   b0  [         R                  R                  R                  XE   5      (       a  MQ  [        U5      (       a  Mc  [        U5      (       a  Mu  X   nXE   nUc   eUc   eXg-
  nUR                  USS9S:X  a  M   SU R                  5        SU SU 35       e   U$ s  snf )zReplace `-1` with correct sizesNr8  r   fallbackzBroadcast failed in ExpandView(, z) on dimension )rs   r  r  r   rL  r  r   r   r   is_size_one_or_falser6   r  )	r   new_sizer  r   old_sizer   v1v2diffs	            r   _normalize_sizeExpandView._normalize_size  sy    77##-56XELLOX6::<6S]S]:;d8nL8}H---s8}%A{b {...&k$(8(8(M(M) ) . /99 [[~%~~%~w..!" /  	 6ajjl^2hZ_`^ab- &< E 7s    E)c                   U R                  X5      n[        U5      (       Ga   [        U5      u  p4[        U5      [        UR                  5      -
  nUS:  d   e[
        R                  R                  /U-  n[        UR                  UR                  5       H^  u  pxUR                  [        R                  R                  R                  U5      (       d  UO[
        R                  R                  5        M`     [        UR                   UR"                  [%        U5      UUR&                  UR(                  5      n	[+        X9S9$ [-        XS9$ )Nr   rS  )rT  r  )r  r  rV  r   r  r   r7  r8  r   r	  r  rs   r  r  r  rW  r  r
  r   rX  rY  rZ  r   )
r1  r   r  r]  r^  skipr_  r	  r  r`  s
             r   r  ExpandView.create  s
   &&q3 ##"7":Gx=3z#77D199'',,$.J #J$5$5z G!!77++@@FF  !H %!!  X!!$$J #CCq00r   c                    U R                   $ r   r<  r  s    r   r  ExpandView.get_size   r>  r   c                   ^^ U R                  5       nU R                  R                  5       m[        U5      [        T5      -
  m    SUU4S jjnU$ )Nc                   > [        U TS  5      n [        U 5      [        T5      :X  d   e[        [        T5      5       H*  nTU   S:X  d  M  [        R                  R
                  X'   M,     U $ r6  )r   r   r   r   r7  r8  )r   r   actualr   s     r   r   *ExpandView.make_reindexer.<locals>.reindex
  s_     tu&Eu:V,,,3v;'!9>$ww||EH ( Lr   r   r  r   r  )r  rT  r   )r  targetr   r  r   s      @@r   r  ExpandView.make_reindexer  sU     ##%6{S[(		!				 		 r   r   N)r   r   r  r  r   r  )r   r   r  r  r   rv  r  r  )r   r   r   r   r   r  r  rl  r  r  r  r   r   r   r   r   r     sA    
% %N 1 14	3r   r   c                  `    \ rS rSr% S\S'   \S
S j5       r\SS j5       rSS jr  SS jr	Sr
g	)PermuteViewi  r  dimsc           
        U R                  U5      n[        U5      [        [        [        U5      5      5      :X  d   e[	        U5      (       a  [        U5      u  p4[        UR                  UR                  U Vs/ s H  oTR                  U   PM     snU Vs/ s H  oTR                  U   PM     snUR                  UR                  5      n[        X6S9$ [        XS9$ s  snf s  snf )NrS  )rT  r  )_map_neg_dimsr>   r   r   r  rV  rW  r  r
  r  r	  rX  rY  rZ  r  )r1  r   r  r]  r^  r   r`  s          r   r  PermuteView.create  s      &$:eCI.>#???? ##"7":G$!!  -12T#T2/34t!""1%t4!!$$J #CC-- 34s   :CC$c                X    U Vs/ s H  o"S:  a  UO[        U5      U-   PM     sn$ s  snf r  rR  )r1  r  rH  s      r   r  PermuteView._map_neg_dims/  s+    @DEaxSY_4EEEs   'c                   [        U R                  U R                  5      5      [        [        [	        U R                  5      5      5      :X  d   eU R
                  R                  5       nU R                   Vs/ s H  o!U   PM	     sn$ s  snf r   )r>   r  r  r   r   rT  r  )r  r  r   s      r   r  PermuteView.get_size3  sq    $,,TYY78J#dii.!=
 
 	
 
 yy!!#!%+AQ+++s   8B	c                Z  ^ [        U R                  5       VVs0 s H  u  pX!_M	     snnm[        [        U R                  5      5       Vs/ s H  nTU   PM
     snm[	        T5      [	        [        [        U R                  5      5      5      :X  d   e    SU4S jjnU$ s  snnf s  snf )Nc                8   > T Vs/ s H  oU   PM	     sn$ s  snf r   r   )r   r   invs     r   r   +PermuteView.make_reindexer.<locals>.reindexA  s     '**c!Hc***s   r  )r   r  r   r   r>   )r  r   r<  r   r  s       @r   r  PermuteView.make_reindexer:  s     !*$)) 45 4qt 45$S^454!s1v45#*U3tyy>-B"CCCC	+!	+	+
  65s   B"B(r   N)r   r   r  r  r   rv  )r  r  r   	list[int]r  r  )r   r   r   r   r   rl  r  r  r  r  r   r   r   r   r  r    sB    
. .$ F F,	3r   r  c                  V    \ rS rSr\SS.SS jj5       r\    S	S j5       rS
S jrSr	g)SqueezeViewiI  N)rH  c          	        [        U5      (       Ga]  [        U5      u  p4/ n/ nUbF  [        U[        5      (       d   [	        U5      5       eSU::  a  U[        UR                  5      :  d   e[        [        UR                  UR                  5      5       H  u  nu  pUcT  [        R                  R                  R                  U5      (       d$  UR                  U5        UR                  U	5        M]  M_  Xr:w  a$  UR                  U5        UR                  U	5        M  US:X  a  M   S5       e   [        UR                   UR"                  UUUR$                  UR&                  5      n
[)        X:S9$ Uce  [*        R-                  UUR/                  5        Vs/ s H5  n[        R                  R                  R                  U5      (       a  M3  UPM7     sn5      $ UR/                  5       U   S:X  d   e[*        R-                  U[        UR/                  5       5       VVs/ s H  u  p{Xr:w  d  M  UPM     snn5      $ s  snf s  snnf )Nr   rE   zexpected squeezed size to be 1rS  )r  rV  r   r   r   r   r  r   r   r	  rs   r  r  r  r  rW  r  r
  rX  rY  rZ  r  r  r  )r1  r   rH  r]  r^  r  r_  r   r  r	  r`  r   s               r   r  SqueezeView.createK  s    ##"7":GHJ!#s++6T#Y6+CxC#joo*>$>>>%.s:??JDUDU/V%W!>D;77++@@FF -"))&1 G x -"))&1#qyJ*JJy &X %!!  !!$$J #CC;;; ZZ\)77++@@C )  ::<$)));;q1::<1H"U1HAH11H"UVV #Vs   2H9

H9
H>+H>c                   ^^ U  Vs/ s H  oS:w  d  M
  UPM     nn[        U 5       VVs/ s H  u  p1US:w  d  M  UPM     snnm[        U 5      mSUU4S jjnX$4$ s  snf s  snnf )NrE   c                   > [        U 5      [        T5      :X  d   U  ST 35       e[        R                  R                  /T-  n[	        TU 5       H	  u  p#X1U'   M     [        U5      $ )N )r   r   r7  r8  r   r   )r   r  r   r   lengthnot_ones       r   r   %SqueezeView.squeezer.<locals>.reindex  sb    u:W-C%'/CC-%*WW\\NV$;Igu-!"# .##r   )r   r  r   ztuple[Expr, ...])r   r   )r  r   r  r   r   r"  r#  s        @@r   squeezerSqueezeView.squeezery  sc      $.t!AvAt.!*4;AF1;T	$ 	$    /;s   	AAA A c                    [        S5      e)Nzuse SqueezeView.create())AssertionError)r  rT  s     r   r  SqueezeView.__init__  s    788r   r   )r   r   rH  r  r   r   )r  r  r   z>tuple[list[int], Callable[[Sequence[Expr]], tuple[Expr, ...]]])rT  r   r   r   )
r   r   r   r   rl  r  r  r%  r  r   r   r   r   r  r  I  sC    48 +W +WZ !!	G! ! 9r   r  c                  ~    \ rS rSr% S\S'   S\S'     SS jrSS jrSS jr\r\	        SS	 j5       r
SS
 jrSrg)GenericViewi  r  r  r  r   c                    U R                   $ r   )r   r  s    r   r  GenericView.make_reindexer  s     ||r   c                   [        [        U R                  5      5       Vs/ s H  n[        [        R
                  U5      PM     nn[        U R                  U5      5      nSSR                  [        [        U5      5       SU 3$ s  snf )Nzlambda r  r!  )r   r   r  rm   rD   rm  r   r   r  r  r   )r  r9  	index_old	index_news       r   reindex_strGenericView.reindex_str  sv    CHTYYCX
CXa*4::q9CX 	 
 i01	3sI#6789+FF	
s   $Bc                z    U R                  U R                  SU R                   3SU R                  5        3/5      $ )Nsize=zreindex=)r  rT  r  r1  r  s    r   r!  GenericView.__str__  s=    YY%		{+x8H8H8J7K-LM
 	
r   c                $    U " U[        U5      US9$ )NrT  r  r   )r   )r1  r   r  r   s       r   r  GenericView.create  s     X@@r   c                    U R                   $ r   r<  r  s    r   r  GenericView.get_size  r>  r   r   Nr  r  )r   r   r  r  r   r  r   rv  r  )r   r   r   r   r   r  r1  r!  rk  rl  r  r  r   r   r   r   r+  r+    sp    
77	3
G

 HAA !A <	A
 
A Ar   r+  c                      \ rS rSrSr\S
S j5       r\\SS j5       5       r	\      SS j5       r
\ S       SS jj5       r\ S       SS jj5       rS	rg)r  i  z
This class handles tensor reshaping by computing appropriate index transformations
to map the new shape back to the original storage layout.
c                   [         R                  " U 5      n [         R                  " U5      n[        R                  R                  R
                  R                  nU" [         R                  " U S5      5      (       a  X-   n U $ r  )r   rL  rs   r  r  r   evaluate_exprLt)r   r  r=  s      r   handle_negative_indexView.handle_negative_index  s[    ll3||D!((22@@#q)***C
r   c                  ^ ^^^ [        T[        5      (       d   [        T5      5       eT R                  UR	                  5       T5      u  mm[
        R                  R                  R                  TT5      (       a  U$ [        [        T5      5      S:  =(       d    [        [        T5      5      S:  n[        U5      n        SS jm    SU UUU4S jjnST;   a  SU4S jjnT " U[        T5      US9$ U(       a  T" UT[        R                  T5      5      $ [        U5      (       d  U" U5      $ [!        USS9u  pxUR"                  n	[
        R                  R                  R%                  T5      n
[
        R                  R                  R%                  U	5      n[
        R                  R                  R%                  T5      nSSKJn  U" U
UUUS	9nUb  U Vs/ s H@  n[+        US
5      (       a  UR,                  R.                  O[0        R2                  " U5      PMB     nn[5        UR6                  UR8                  TUUR:                  UR<                  5      n[?        UUS9$ U" U5      $ s  snf )Nr   c                    [        U SS9u  p4[        UR                  UR                  UUUR                  UR
                  5      n[        X5S9$ )NT)r  rS  )rV  rW  r  r
  rX  rY  rZ  )rs  r  r_  r]  r^  r`  s         r   create_reinterpret_view,View.create.<locals>.create_reinterpret_view  sT     #8T"RG$!!  !!$$J #CCr   c                   >  TR                  TT5      nT" U [        T5      US9$ ! [         a5    [        R	                  U 5      n T" U T[
        R                  T5      5      s $ f = f)z
Handle the case where view is not possible with current strides.
Try dynamic_reshape_indexer first; if it fails with unbacked
symbols (guard_or_false can't resolve comparisons), fall back
to making the tensor contiguous.
r7  )r  r   r5   r  require_contiguousr  r  )r   r   r1  rC  r  r  s     r   "handle_unbacked_or_dynamic_reshape7View.create.<locals>.handle_unbacked_or_dynamic_reshape  sm    
55hIXHH.  !33A6.x!B!B8!L s   "& <A%$A%c                4   > [        S/[        T5      -  5      $ r  )r   r   )r   r  s    r   fake_reindex!View.create.<locals>.fake_reindex  s    aS3x=011r   r7  Fr  )_compute_stridesize_obliviousr   rS  )rs  r   r  r  r_  r  r   rZ  r   r   r   r   )r   r   r   ztuple[int, ...]) r   r   r   resolve_negative_sizer  rs   r  r  statically_known_list_equalsr   r4   r  r   r  r  r  rV  r	  to_symints_or_intstorch._subclasses.fake_implsrL  r  r   exprr   r    rW  r  r
  rX  rY  rZ  )r1  r   r  unbacked_symbols_in_sizesr  rG  rJ  r]  r^  
old_strideold_size_symintold_stride_symintnew_size_symintrL  new_stride_symintr   r_  r`  rC  r  s   ` `               @@r   r  View.create  so    (H--=tH~=- 66qzz|XN( 77888LLH %h/014 8(23a7 	" 9;	D	D#1	D?M	D	D			 	, =2 ADNLII *8^>>xH 
 %Q''5a88 4AeD&&
 ''**==hGGG,,??
K''**==hG@ ,4	
 ( +*A  'q&11u}}Q7GG*  
 %!!  !!$$J #
CC 2!44!s   AI1c                P   U Vs/ s H,  n[         R                  R                  R                  U5      PM.     nnU  Vs/ s H,  n[         R                  R                  R                  U5      PM.     n n[	        U5      n[        [        U5      5       HI  nX   S:X  d  M  [        R                  R                  X'   [        [        U 5      [        U5      5      X'     O   [         R                  R                  R                  [        U 5      [        U5      5        X4$ s  snf s  snf )Nr8  )rs   r  r  rs  r   r   r   r   r7  Oner@   rn   check_equals)r  r  r   r   s       r   rP  View.resolve_negative_size2  s     ;CC(QAGG$$--a0(C:BC(QAGG$$--a0(C>s8}%A{b #ggkk&}X'>h@WX	 & 	
%%mH&=}X?VW!! DCs
   3D3D#Nc                     U R                  XU5      nU$ ! [        [        [        4 a=    [	        U5      /nU R                  X5      nU R                  XR5      n[        Xg5      n U$ f = fr   )_dynamic_reshape_indexerr(  r5   
IndexErrorrn   r   )r1  r  r  	dense_dimr   flatr   r   s           r   r  View.dynamic_reshape_indexerC  su    	:228yQG   ;ZH 	:!(+,D33HCH33DCH%h9G	:s    AA('A(c                `  ^^^ [         R                  R                  R                  mSU4S jjn[	        [        U5      5       Vs/ s H  n[        [        R                  U5      PM     snm[        [        TU5      5      n[        U 5      nUSL=(       a&    U[        U5      S-
  :g  =(       a    [        U5      S:H  nU(       a'  Uc   eUR                  U5      nUR                  U5        / mU(       Ga  U(       Ga  UR                  5       n	UR                  5       u  pU	S:X  a=  TR                  [        R                  R                  5        UR                  X45        GOzUS:X  a  UR                  U	5        GOaU" X5      S:X  a  TR                  U
5        GOBU" X5      S:  ar  U" X5      S:  a+  UR                  5       u  pX-  U
-   n
X-  nU" X5      S:  a  M+  TR                  U
5        [         R                  R                  R!                  X5        OU" X5      S:  a  [        R                  R"                  nU	nTR                  [%        XU5      5        X-  nU" X5      S:  aA  UR                  5       nTR                  [%        XU5      5        X-  nX-  n	U" X5      S:  a  MA  [         R                  R                  R!                  X5        O[&        eU(       a
  U(       a  GM  U(       al  UR                  5       n	[         R                  R                  R!                  U	S5        TR                  [        R                  R                  5        U(       a  Ml  U(       aE  UR                  5       u  p[         R                  R                  R!                  US5        U(       a  ME  UbB  [        U5      S:X  a3  TR)                  5         TR                  5       nTR+                  UU5        OTR)                  5         [        T5      [        U 5      :X  d   e    SUU4S jjnU$ s  snf )z7
Perform a reshape entirely by modifying indexing math
c                  > T" [         R                  " X5      5      (       a  gT" [         R                  " X5      5      (       a  gT" [         R                  " X5      5      (       a  g[        R
                  R                  R                  X5      (       a  g[        R
                  R                  R                  X5      (       a  g[        [         R                  " X5      5      e)z
Compare two symbolic sizes, returning -1 if a < b, 0 if a == b, 1 if a > b.

For unbacked symbols, guard_or_false returns False, so we fall back
to divisibility checks.
r   r8  rE   )	r   r5  r>  Gtrs   r  r  r  r5   )r  r  r>  s     r   compare_sizes4View._dynamic_reshape_indexer.<locals>.compare_sizes_  s     ehhqn--ehhqn--ehhqn-- ww<<QBBww<<QBB-ehhqn==r   NrE   r   c                   >^ [        U 5      [        T5      :X  d   [        U 5      [        T5      45       e[        [        TU 5      5      m[        U4S jT 5       5      $ )Nc              3  <   >#    U  H  n[        UT5      v   M     g 7fr   )ro   )r   r   replacementss     r   r   AView._dynamic_reshape_indexer.<locals>.reindex.<locals>.<genexpr>  s     HiA|44ir  )r   r   r   r   )r   rm  r  	view_exprs    @r   r   .View._dynamic_reshape_indexer.<locals>.reindex  sO     u:T*CSZT,CC*D% 01LHiHHHr   )r  r   r  r   r   r   r  )rs   r  r  r>  r   r   rm   rD   VIEWr   r   r0  r  r   r7  r8  r^  r]  rC   r(  reverseinsert)r  r  rc  ri  r   	stack_new	stack_oldreordering_dense_dimold_dimsize_oldvarsize_newvar2	size_new2divisormodulus
dense_exprr   r>  r  ro  s                     @@@r   ra  View._dynamic_reshape_indexerT  su    ))88 	>J CHHBV
BVQ*499a8BV
 T8,-	N	 T! #S^a//#H" 	
  (((mmI.GW%	I }}H%MMOMC1}  .  #1Q  *x2a7  %x2Q6#H7!;&/mmoOD/C/C'3H $H7!;   %  --hAx2Q6''++"  w!GH!+#H7!;'mmoG$$_S7%KL%/G'1H	 $H7!;
   --hA$$; II>  }}HGG))(A6UWW\\* i
 %MMOMCGG))(A6 i  S]a%7"JY
39~X...	I!	I	I 	I Y
s   $P+r   )r   r   r  r   r   r   )r   r   r  r  r   r   )r  r  r  r  r   ztuple[list[Expr], list[Expr]]r   )r  r  r  r  rc  r  r   &Callable[[Sequence[_T]], Sequence[_V]])r  r  r  r  rc  r  r   r  )r   r   r   r   r  r  r?  rl  r   r  rP  r  ra  r   r   r   r   r  r    s    
   k5  k5Z " ",:"	&" "  
 !%	$ % 	
 
0    !%z z z z 
4	z zr   r  c                     ^  \ rS rSr% SrS\S'   SU 4S jjrSS jr\rSS jr	SS jr
SS	 jr\SS
 j5       rSS jrSS jrSS jrSS jrSS jrSS jr\" S 5       S   SS jj5       rS S!S jjrS"S jrSrU =r$ )#rZ  i  z*Pretend our storage has a different layoutr  rU  c                   > [         TU ]  5         [        U R                  [        5      (       a0  [
        R                  U SU R                  R                  5       5        g g )NrT  )r  r  r   rT  rv  r   r  rw  r+  s    r   r  ReinterpretView.__post_init__  sC    dii**tVTYY-B-B-DE +r   c                P    U R                  U R                  U R                  /5      $ r   )r  rT  rU  r  s    r   r!  ReinterpretView.__str__  s&    		
 	
r   c                6    U R                   R                  5       $ r   r  r  s    r   rz  ReinterpretView.get_name  r  r   c                .    U R                   R                  $ r   )rU  r  r  s    r   r  ReinterpretView.get_device  s    {{!!!r   c                    g r   r   r  s    r   r  ReinterpretView.get_origin_node  r  r   c                .    U R                   R                  $ r   )rU  r
  r  s    r   r
  ReinterpretView.dtype  s    {{   r   c                @    [        U R                  R                  5      $ r   )r   rU  r  r  s    r   r  ReinterpretView.get_size  s    DKK$$%%r   c                @    [        U R                  R                  5      $ r   )r   rU  r	  r  s    r   r:  ReinterpretView.get_stride  s    DKK&&''r   c                   ^  SU 4S jjnU$ )Nc                Z  > TR                   R                  5       n[        R                  " TR	                  5       U" U 5      5      nTR                   R
                  TR                  R
                  :w  a6  [        R                  " UTR
                  TR                  R
                  5      $ U$ r   )rU  r1  rq   loadrz  r
  rT  to_dtype_bitcast)r   r  
tmp_loaderr  s      r   r  +ReinterpretView.make_loader.<locals>.loader  sp    kk..0G$--/75>BJ{{  DIIOO3++J

DIIOOTT!!r   r   r  r   rr   r   r  r  s   ` r   r-  ReinterpretView.make_loader  s    	" r   c                6    U R                   R                  5       $ r   )rU  r1  r  s    r   r1  ReinterpretView.make_indexer      {{''))r   c                    U R                   $ r   rU  r  s    r   r  ReinterpretView.get_layout  r'  r   c                    g r   r   r  s    r   rQ  ReinterpretView.freeze_layout  rJ  r   c                    [        U R                  R                  U5      [        U R                  R                  U5      -  [        U R                  R                  U5      -  $ r   )r'   rU  r  r	  rX  ru  s     r   rv  $ReinterpretView.get_free_symbol_uses  sQ    
 T[[--}=t{{11=ABt{{11=AB	
r   c           	     p   [         R                  R                  R                  U R                  U R
                  R                  U R
                  R                  U R
                  R                  Ub  UR                  O#[         R                  R                  R                  U R
                  R                  S9$ rw  )rs   r  wrapper_codecodegen_reinterpret_viewrT  rU  r  r	  rX  	writeliner
  r  s     r   r  !ReinterpretView.codegen_reference  s     ww##<<IIKKKKKK & 2F8L8L8V8V++## = 
 	
r   c                    gr6  r   r  s    r   ro  ReinterpretView.num_reads%      r   r   r  r  r  r  r  r  r  r  r  r  r  r   r  r  )r   r   r   r   r  r   r  r!  rk  rz  r  r  r  r
  r  r:  r-  r1  r  rQ  r^   rv  r  ro  r   r  r  s   @r   rZ  rZ    s    4NF

 H$" ! !&(	* -.$)
!
	!
 /

 r   rZ  c                  n    \ rS rSr% SrS\S'   \SS j5       rSS jr\r	\
SS j5       rSS jrSS	 jrS
rg)	DtypeViewi)  z(Pretend our storage has a different typer  target_dtypec                    [        U5      (       aX  [        U5      u  p4[        UR                  UUR                  UR
                  UR                  UR                  5      n[        X5S9$ [        XS9$ )NrS  )rT  r  )
r  rV  rW  r  r  r	  rX  rY  rZ  r  )r1  r   	new_dtyper]  r^  r`  s         r   r  DtypeView.create/  sj     ##"7":G$!!!!!!$$J #CCa88r   c                P    U R                  U R                  U R                  /5      $ r   )r  rT  r  r  s    r   r!  DtypeView.__str__>  s     		4+<+<=>>r   c                    U R                   $ r   )r  r  s    r   r
  DtypeView.dtypeC  s       r   c                6    U R                   R                  5       $ r   rT  r  r  s    r   r  DtypeView.get_sizeG  r  r   c                P   ^ ^ T R                   R                  5       mSUU 4S jjnU$ )Nc                |   > [         R                  " T" U 5      TR                  TR                  R                  5      $ r   )rq   r  r  rT  r
  )r   r  r  s    r   r  %DtypeView.make_loader.<locals>.loaderM  s*    ''c
D4E4EtyyWWr   r  rT  r-  )r  r  r  s   ` @r   r-  DtypeView.make_loaderJ  s(    		%%'	X 	X r   r   N)r   r   r  r  r   rv  r  r  r  r  )r   r   r   r   r  r   rl  r  r!  rk  r  r
  r  r-  r   r   r   r   r  r  )  sE    29 9? H! !$r   r  c                  v    \ rS rSrSr\          SS j5       r\  S             S	S jj5       rSrg)
	SliceViewiS  ziView that represents a slice along a single dimension.

Corresponds to tensor[..., start:end:step, ...].
c                  ^ ^^^^	^
 [         R                  R                  m
UR                  5       U   m[	        S X4T4 5       5      (       a!  [
        R                  m	[
        R                  mOR[	        S X4T4 5       5      (       a!  [
        R                  m	[
        R                  mOT
R                  m	T
R                  mSUU	U
4S jjm          SUU U4S jjnU" USTS5      nU" XCTT5      nX44$ )zb
Normalize start and end such that both are in the range
[0, x.get_size()[dim]] and start <= end.
c              3  8   #    U  H  n[        U5      v   M     g 7fr   )r4   r   r   s     r   r   0SliceView.normalize_start_end.<locals>.<genexpr>d  s     H1GA$Q''1Gr  c              3     #    U  HJ  n[        U[        5      (       d  M  UR                  [        R                  [        R
                  5      v   ML     g 7fr   )r   r   hasr   MinMaxr  s     r   r   r  g  s;      
 ,!T" (AEE%))UYY''+s
   A5Ac                   > TR                  X5      (       a  U OT" X5      nTR                  X25      (       a  UnU$ T" X25      nU$ r   )statically_known_geqrD  )r   lowerupperclamped_lowerclamped_fullmax_funcmin_funcr  s        r   clamp,SliceView.normalize_start_end.<locals>.clamps  s^    221<<(1BT 
 00FF  
   m3 
  r   c                D   > U c  U$ TR                  U T5      n T" XU5      $ r   )r?  )rY  r  r  r  r  r1  dim_sizes       r   
clamp_wrap1SliceView.normalize_start_end.<locals>.clamp_wrap~  s.     {++C:CU++r   r   )r   r   r  r   r  r   r   r   )
rY  r  r  r   r  r   r  
Expr | intr   r  )
rs   r  r  r  r  r   r  r  evaluate_minevaluate_max)r1  r   rH  startendr  r  r  r  r  r  s   `     @@@@@r   normalize_start_endSliceView.normalize_start_endY  s     77##::<$H%h1GHHHyyHyyH 
 (+
 
 
 yyHyyH,,H,,H		  		 	,	,$'	,03	,>H	,	, 	, 5!Xq1Xx8zr   c           	       ^^^^ [         R                  " T5      m[        T[        5      (       d  TS:  d   T5       e TS:X  a  US:  a  TS:X  a  U$ [        UR                  5       5      mU(       a  U R                  UTTU5      u  mn[        UT-
  TS-
  -   T5      TT'   [        U5      (       a  [        U5      u  px[        UR                  5      n	U	T   T-  U	T'   [        UR                  UR                  TU	UR                  UR                  T   T-  -   UR                   5      n
[#        XzS9$     SUUUU4S jjn[%        UTUS9$ ! [         a     GNf = f)Nr   l    rE   rS  c                   > [        U 5      [        T5      :X  d   SU  ST 35       e[        U 5      n U T   T-  T-   U T'   U $ )Nzwrong ndim r!  )r   r   )r   rH  r  r  steps    r   r   !SliceView.create.<locals>.reindex  sR     u:X.P+eWAhZ0PP.KEsd*U2E#JLr   r7  r  )r   rL  r   r   	TypeErrorr   r  r  rA   r  rV  r	  rW  r  r
  rX  rY  rZ  r  )r1  r   rH  r  r  r  r  r]  r^  r_  r`  r   r  s     `` `      @r   r  SliceView.create  sm    ||D!$%%7471	zcY.419 

%
 00CDJE3 uq!94@ ##"7":Gj//0J(o4JsO$!!  !!J$5$5c$:U$BB$$J #CC	!		 	 ah@@K  		s   E 
EEr   N)
r   r   rH  r   r  r   r  r   r   ztuple[int, int])rE   T)r   r   rH  r   r  r   r  r   r  r   r  r   r   r   )	r   r   r   r   r  rl  r  r  r   r   r   r   r  r  S  s    
 // /),/36/	/ /b  3A3A 3A 	3A
 3A 3A 3A 
3A 3Ar   r  c                  R    \ rS rSr% S\S'   S\S'   SS jrSS jrSS jrSS	 jrS
r	g)BaseConstanti  r  r
  r  r  c                    gNr   r   r  s    r   r  BaseConstant.get_size  s    r   c                    U R                   $ r   r%  r  s    r   r  BaseConstant.get_device  r'  r   c                    g r   r   r  s    r   r  BaseConstant.get_origin_node  r  r   c                    [        5       $ r   r=   r  s    r   r  BaseConstant.get_reads  r  r   r   Nr  r  r  r  )
r   r   r   r   r   r  r  r  r  r   r   r   r   r  r    s"    r   r  c                  R    \ rS rSr% S\S'   S\S'   S\S'   SS jrSS	 jrSS
 jrSrg)Constanti  r   r  r  r
  r  r  c                   ^  SU 4S jjnU$ )Nc                Z   > [         R                  " TR                  TR                  5      $ r   )rq   rq  r  r
  r   r  s    r   r  $Constant.make_loader.<locals>.loader  s    <<

DJJ77r   r  r   r  s   ` r   r-  Constant.make_loader  s    	8 r   c                    g r   r   r  s    r   r  Constant.realize  rJ  r   c                @    [        U R                  U R                  US9$ )N)r  r
  r  )r  r  r
  r#  s     r   r  Constant.constant_to_device  s    djj

6JJr   r   Nr  r  r  )	r   r   r   r   r   r-  r  r  r   r   r   r   r  r    s#    JKr   r  c                  H    \ rS rSr% S\S'   S\S'   S\S'   SS jrSS	 jrS
rg)IndexingConstanti  r   r   r  r
  r  r  c                   ^  SU 4S jjnU$ )Nc                Z   > [         R                  " TR                  TR                  5      $ r   )rq   rM  r   r
  r  s    r   r  ,IndexingConstant.make_loader.<locals>.loader  s    >>$**djj99r   r  r   r  s   ` r   r-  IndexingConstant.make_loader  s    	: r   c                @    [        U R                  U R                  US9$ )N)r   r
  r  )r  r   r
  r#  s     r   r  #IndexingConstant.constant_to_device  s    djj

6RRr   r   Nr  r  )r   r   r   r   r   r-  r  r   r   r   r   r  r    s    JSr   r  c                `   SnSn[        [        [        X5      5      5       H  u  pEUS:X  a  M  [        R                  R
                  R                  XR5      (       d0  [        R                  R
                  R                  XS5      (       d    gU[        R                  " SU5      -  nX$-  nM     gNrE   FT)	reversedr   r   rs   r  r  rO  r   r  )r	  rG  expected_strideexpected_stride_maxr   ys         r   is_contiguous_strides_for_shaper	    s     Os51236ww77
 
''""::1RRuyyA. 4 r   c                <    [         R                  U R                  -  $ r   )rF   padding_alignment_bytesitemsizer  s    r   get_align_for_dtyper    s    ))U^^;;r   c                  B    \ rS rSrSrSS jrS	S jr S
   SS jjrSrg)r#  i  ztAbstract base for Layout, MultiOutputLayout, NoneLayout.
Represents the memory layout of the output of an Operation.c                >    [        [        U 5      R                  5      er   r,  r  s    r   r  OutputSpec.get_device  r/  r   c                >    [        [        U 5      R                  5      er   r,  r  s    r   storage_sizeOutputSpec.storage_size  r/  r   c                >    [        [        U 5      R                  5      er   r,  ru  s     r   rv  OutputSpec.get_free_symbol_uses  rX  r   r   Nr  r  r  r  )	r   r   r   r   r  r  r  rv  r   r   r   r   r#  r#    s1    C77 %*7!7	!7 7r   r#  c                     \ rS rSrSrS\" S5      S4             SS jjr\SS j5       r\R                  S S j5       r\SS	 j5       r
\
R                  S S
 j5       r
\S!S j5       r\R                  S"S j5       rS#S jr\rS$S jrS%S jrS&S jr\      S'S j5       rS&S jrS(S jrS&S jr\        S)S j5       rS*S jrS&S jrS+S jrS,S jrS-S jrS!S jr\" S 5       S.   S/S jj5       rSrg)0r  i  z_
Layout base class

Carries tensor meta-information including offset and
whether it is pinned.
Nr   Fc                ^   Uc  [         R                  U5      nXl        X l        [	        U5      [	        U5      :X  d   SU SU 35       e[        S U 5       5      (       d   eX0l        X@l        XPl        X`l	        U R                  (       a"  U R                  R                  S:X  d   S5       eg g )Nr4  	, stride=c              3  N   #    U  H  n[        U[        [        45      v   M     g 7fr   )r   r   r   r   s     r   r   "Layout.__init__.<locals>.<genexpr>7  s     <t!:a$--t   #%r&  zOnly CPU tensors can be pinned)r  r  r  r
  r   r   _size_stride_offsetrY  r   )r  r  r
  r  r	  rX  rY  s          r   r  Layout.__init__(  s     >#66t<F
4yCK'H5ix)HH'<t<<<<<
"NN(8(8E(A 	
,	
B(ANr   c                    U R                   $ r   r  r  s    r   r  Layout.sizeA      zzr   c                    Xl         g r   r!  r  r  s     r   r  r"  E  s    
r   c                    U R                   $ r   r  r  s    r   r	  Layout.strideI      ||r   c                    Xl         g r   r'  r%  s     r   r	  r(  M      r   c                    U R                   $ r   r  r  s    r   rX  Layout.offsetQ  r)  r   c                    Xl         g r   r-  r%  s     r   rX  r.  U  r+  r   c                   SnU R                   S:w  a  SU R                    3nU R                  R                  c  SOSU R                  R                   3nSnU R                  (       a  SU R                   3n[	        U 5      R
                   SU R                  R                   U SU R                   SU R                   S	U R                   U U S
3$ )Nr  r   z	, offset=:z, is_pinned=z('z', z, size=r  r"  )	rX  r  r   rY  r   r   r
  r  r	  )r  rX  device_index_stris_pinned_strs       r   r!  Layout.__str__Y  s    ;;! .F!%!2!2!:2!DKKDUDUCV@W>>*4>>*:;MDz""#2dkk&6&6%78H7ITZZL YII;i}VH]O1N	
r   c                    U R                   $ r   r%  r  s    r   r  Layout.get_devicei  r'  r   c           	        [         R                     [        R                  " [	        U R
                  5      [	        U R                  5      U R                  U R                  U R                  S9sS S S 5        $ ! , (       d  f       g = f)N)r
  r  
pin_memory)
rs   	fake_moder  r  ra   r  r	  r
  r  rY  r  s    r   get_exampleLayout.get_examplel  sN    [[&&'		2'4jj{{>> [[s   AA77
Bc                B    [        U R                  U R                  5      $ r   )r	  r	  r  r  s    r   r  Layout.is_contiguousv  s    .t{{DIIFFr   c                    [        U 5      nUS;  d	  U S   S:X  a  g[        U[        U 5      U 5       H  u  p4nUS:w  d  M  X4:w  d  M    g   g)N)r      rE   FT)r   r   r/   )rG  r\  ndimleftrightr  s         r   is_channels_last_contiguous"Layout.is_channels_last_contiguousy  sY     5zvqQ!$*51"
D qyT]"
 r   c                    [        U R                  [        [        R	                  [        [        U R                  5      5      5      5      U R                  5       H  u  pnUS:w  d  M  X:w  d  M    g   gr  )r   r	  r  r  r  r   r  )r  rA  rB  r  s       r   is_transposedLayout.is_transposed  sZ    !$KK^66tHTYY<O7PQRII"
D
 qyT]"
 r   c           	        [        U R                  5      [        U5      :X  d   e[        U R                  5       VVs/ s H5  u  p#[        R
                  R                  R                  USS9S:w  d  M3  UPM7     nnnU Vs/ s H  o R                  U   PM     nnU Vs/ s H  o!U   PM	     nnS	S jnU" U5      nS/[        U5      -  n[        [        U5      5       H  nXR   XqU   '   M     [        [        U5      S-
  5       H_  nXr   XrS-      :  n[        U[        5      (       d2  [        R
                  R                  R                  Xr   XrS-      :  SS9nU(       d  M_    g   gs  snnf s  snf s  snf )
Nr   r  rE   c                d    [        U 5      nU  Vs/ s H  o!R                  U5      PM     sn$ s  snf r   )r  r   )arr
sorted_arrelements      r   sorted_indices0Layout.is_stride_ordered.<locals>.sorted_indices  s,    J=@AS'$$W-SAAAs   -r8  TrM  F)rJ  r  r   r  )r   r	  r   r  rs   r  r  r  r   r   r   
_shape_envr=  )	r  r   r   rH  non_1_indicesr	  rM  stride_orderedrT  s	            r   r  Layout.is_stride_ordered  sl   4;;3u:---
 $DII.
.ww11#1BaG . 	 
 +88-Q++a.-82?@-Qa-@	B
 u% E
*s5z"A'-yN8$ # s5zA~&A!$~!e'<<DdD))ww))77"%1u(==d 8  t ' ;
 9@s   2E)3E) E/E4c                    S/[        [        [        S[        U R                  5      S-
  5      5      5      -   n[        U5      /U-   nU R                  U5      $ Nr   rE   )r   r  r   r   r	  r  rZ  s     r   is_channels_last_stride_ordered&Layout.is_channels_last_stride_ordered  sN    d8E!S-=-A$BCDDUu$%%e,,r   c                  ^^ [        U5      n[        U 5      S:X  a  U $ [        R                  (       d  [        R                  X5      (       a  U $ [        R                  " 5       n[        US5      (       a#  UR                  R                  SS5      (       a  U $ [        S [        R                  " X5       5       5      (       + n[        R                  (       d	  U(       a  U $ [        [        R                  S5      (       a  [        R                  R                   OSmSU4S jjmT(       a  [#        U4S	 jU  5       5      (       a  U $ [%        U T5      n['        U5      n[)        [        U 5      5       Vs/ s H  nSPM     n	nS
XS   '   Sn
[+        US
S S
S9 H  u  pX{S
-
     nX   X   -  n[-        U[.        [0        R2                  45      =(       a!    U[        R4                  :  =(       a    X-  S:g  =(       d1    [-        U[0        R6                  5      =(       a    [        R                  nXU'   U(       d  M  [9        X5      U-  X'   Sn
M     U
(       d  U $ [:        =R<                  S
-  sl        U	$ s  snf )zv
The padding does not change stride order but makes sure all strides larger
than the threshold are multiple of align.
r   ri  dislike_paddingFc              3  b   #    U  H%  n[        U[        [        R                  45      v   M'     g 7fr   r   r   s     r   r   &Layout._pad_strides.<locals>.<genexpr>  s*      
6 q3.//6r   rO  Nc                   > Tc  g[        U [        R                  5      (       d  g[        U4S jU R                   5       5      $ )NFc              3  F   >#    U  H  nTR                  U5      v   M     g 7fr   )is_unbacked_symint)r   r   r   s     r   r   ILayout._pad_strides.<locals>.contains_unbacked_symints.<locals>.<genexpr>  s!     R@Q1y33A66@Qs   !)r   r   r   r  r3   )rT  r   s    r   contains_unbacked_symints6Layout._pad_strides.<locals>.contains_unbacked_symints  s7     dEJJ//R@Q@QRRRr   c              3  4   >#    U  H  nT" U5      v   M     g 7fr   r   )r   r   r_  s     r   r   rZ    s     N:a6q99:r  rE   )r  T)rT  zsympy.Expr | intr   r   )r  r   rF   pad_channels_lastr  rC  rs   get_current_noder  ri  r  r   rF  chainpad_dynamic_shapesr  rO  r  r  r   r   r   r   r   r   r    padding_stride_thresholdr   r_   r%   num_comprehensive_padding)
in_stridesr  r
  aligncurrent_fx_noderi   r  r   r   new_stridespaddedrankr   prev_idxr	  require_paddingr_  r   s                   @@r   _pad_stridesLayout._pad_strides  s-    $E*z?a''F,N,N-
 -
 ,,.?F++0D0D0H0Hu1
 1
   
__Z6
 
 

 ((Z*1!''<*H*HAGG&&d		S N:NNN'
I>,\:
"'J"89"8Qq"89 &'qM"":ab>;ID!(+H *T^;F 6C#78 (V<<<(Na'P VUZZ0NV5N5N	 
  &#*6#9E#A  <   	))Q.); :s   5I5c                    [        U [        5      (       d   [        U 5      5       eU R                  c   eU R	                  U R                  U R
                  U R                  5      U l        g r   )r   r  r   r	  rp  r  r
  r  s    r   r  Layout.pad_strides  sP    $//;d;/{{&&&''TYY

Kr   c                P    [         R                  =(       a    [        U [        5      $ r   )rF   comprehensive_paddingr   r  r  s    r   r  Layout.should_pad_strides  s    ++P
40PPr   c                   [        U [        5      (       a  U $ U R                  5       (       a  U R                  5         [        U R                  U R
                  U R                  U R                  U R                  U R                  5      $ r   )
r   rW  r  r  r  r
  r  r	  rX  rY  r  s    r   as_fixedLayout.as_fixed  sf    dK((K""$$KKJJIIKKKKNN
 	
r   c                    [         R                  (       d   S[        U 5      R                   S35       eU R	                  5       R                  5       $ )Nzconvert z to FixedLayout first)r  r>  r   r   rx  r1  r  s    r   r1  Layout.make_indexer$  sG    ,, 	
tDz**++@A	
, }}++--r   c                   [        U[        5      =(       a    U R                  UR                  :H  =(       a    U R                  UR                  :H  =(       ay    U R                  UR                  :H  =(       aY    U R
                  UR
                  :H  =(       a9    U R                  UR                  :H  =(       a    U R                  UR                  :H  $ r   )r   r  r  r
  r  r	  rX  rY  )r  others     r   __eq__Layout.__eq__*  s    uf% 2u||+2

ekk)2 		UZZ'2 u||+	2
 u||+2 %//1	
r   c                X    [        U R                  U R                  U R                  5      $ r   )r+   r  r	  rX  r  s    r   r  Layout.storage_size5  s    .tyy$++t{{SSr   c                    [        U R                  U5      [        U R                  U5      -  [        U R                  U5      -  $ r   )r'   r  r	  rX  ru  s     r   rv  Layout.get_free_symbol_uses8  s=    
 TYY6t{{M:;t{{M:;	
r   )r  r  r  r  r
  rY  r	  )r  r  r
  r  r  r  r	  zSequence[Expr] | NonerX  r   rY  r   r   r   r  r  r  r   r   r  r  r   r   r   r  r  )r   torch.Tensorr  )rG  r  r\  r  r   r   )r   r  r   r   )rh  r  r  r  r
  r  r   r  r  r   rW  r  )r}  r   r   r   r  r  ) r   r   r   r   r  r    r  r  r  setterr	  rX  r!  rk  r  r:  r  r  rC  rF  r  rU  rp  r  r  rx  r1  r~  r  r^   rv  r   r   r   r   r  r    s    )-qz

 
 	

 &
 
 
 

2   
[[    ]]    ]] 
 HG !,>	 "!F- L!L)7L@KL	L L\L
Q
.	
T H%$)
!
	!
 &
r   r  c                  "    \ rS rSrSrSS jrSrg)rW  iC  z A Tensor layout we cannot changec                X    [        U R                  U R                  U R                  5      $ )r  )rT  r  r	  rX  r  s    r   r1  FixedLayout.make_indexerF  s    diidkkBBr   r   Nr  )r   r   r   r   r  r1  r   r   r   r   rW  rW  C  s    *Cr   rW  c                    ^  \ rS rSrSrSrSS jr\SS j5       r\SS j5       r	\SS j5       r
\      SS j5       r\      SS	 j5       r\SS
 j5       r\R                  SS j5       r\SS j5       r\R                  SS j5       r\S S j5       r\R                  S!S j5       r S"     S#S jjr S"     S$S jjrS%S jrS&S jrS'S jrS(S jr  S)           S*U 4S jjjrSrU =r$ )+r  iK  zp
A Tensor layout that we are allowed to change

Assumption: layout change should NOT add or remove free symbols
Fc                J    [         R                  " U 5      R                  5       $ )z
Compute what the strides would be if this layout were frozen,
without actually modifying the layout. This is used for speculative
stride computation during Triton template code generation.
)r  deepcopyrx  r  s    r   !get_fixed_layout_without_freezing0FlexibleLayout.get_fixed_layout_without_freezingT  s     }}T"++--r   c                    [        U 5      S:X  a  / $ [        R                  R                  /n[	        U SS  5       H  nUR                  X!S   -  5        M     [        [	        U5      5      $ )Nr   rE   r8  )r   r   r7  r]  r  r  r   )sizesreversed_stridesr  s      r   r  !FlexibleLayout.contiguous_strides^  s^    u:?I!GGKK=U12Y'D##DB+?$?@ (H-.//r   c                    [        [        [        U 5      5      5      [        U5      :X  d   X45       e[        R                  R
                  nS/[        U5      -  nU H  nX#U'   X U   -  nM     U$ )z
Create a stride based on the order the dimensions should be filled in.

In this format, channels last would be:
    [1, 3, 2, 0]
N)r>   r   r   r   r7  r]  )r  r   next_strider\  r   s        r   fill_orderedFlexibleLayout.fill_orderedg  sm     %E
+,
50AAQE>QAggkk&3u:%A$AJ%a0K  r   c                    [        [        [        U 5      5      5      [        U5      :X  d   e[        U5      n[        R                  X5      $ )zz
Create a stride based on the sorted order of a permuted range.

In this format, channels last would be:
    [3, 0, 2, 1]
)r>   r   r   r   r  r  )r  r   r   s      r   rQ  FlexibleLayout.stride_orderedx  s@     %E
+,
50AAAA,U3
**5==r   c                D   U[         R                  :X  a  [        R                  U [        5      $ U[         R
                  :X  a  [        R                  U [        5      $ U[         R                  :X  a  [        R                  U 5      $ [        R                  SU5        [        e)a9  
Create a stride based on a memory format.

Memory format is translasted into a stride order,
so channels_last is the same as:
    FlexibleLayout.stride_ordered(sizes, [3, 0, 2, 1])

This interface does not support memory_format `torch.preserve_format`
which should be used to deduce a format from another source
z>stride_ordered_for_memory_format, unsuppored memory_format: %s)r  channels_lastr  rQ  NHWC_STRIDE_ORDERchannels_last_3dNHWDC_STRIDE_ORDERcontiguous_formatr  r'  r  r  )r  memory_formats     r    stride_ordered_for_memory_format/FlexibleLayout.stride_ordered_for_memory_format  s     E///!008IJJe444!008JKKe555!44U;;IIP &%r   c                    [        U 5      [        U5      :X  d   e[        R                  R                  R	                  U5      n[        [        [        U5      5      UR                  S9n[        R                  X5      $ )z
Create a stride that has the same stride order as given stride

For example, if given stride is [1000, 1, 100, 10],
the fill order should be [1, 3, 2, 0]
r  )
r   rs   r  r  guarding_hints_or_throwr  r   __getitem__r  r  )r  r	  r   s      r   same_orderedFlexibleLayout.same_ordered  s`     5zS[(((!!99&AE#f+.F4F4FG
**5==r   c                    U R                   $ r   r!  r  s    r   r  FlexibleLayout.size  r#  r   c                4    U R                  SU5        Xl        g )Nr  )!assert_free_symbol_uses_unchangedr  r%  s     r   r  r    s    ..vu=
r   c                    U R                   $ r   r'  r  s    r   r	  FlexibleLayout.stride  r)  r   c                4    U R                  SU5        Xl        g )Nr	  )r  r  r%  s     r   r	  r        ..x?r   c                    U R                   $ r   r-  r  s    r   rX  FlexibleLayout.offset  r)  r   c                4    U R                  SU5        Xl        g )NrX  )r  r  r%  s     r   rX  r    r  r   c                B   U R                  U R                  U5      nU R                  5       (       a-  U(       a&  U R                  X0R                  U R                  5      n[        U R                  U R                  U R                  UU R                  U R                  5      $ r   )	rQ  r  r  rp  r
  rW  r  rX  rY  )r  r   rU  r_  s       r   as_stride_orderFlexibleLayout.as_stride_order  sx     ((E:
""$$**:yy$**MJKKJJIIKKNN
 	
r   c                   UnU R                  5       (       a-  U(       a&  U R                  X0R                  U R                  5      n[	        U R
                  U R                  U R                  UU R                  U R                  5      $ r   )r  rp  r  r
  rW  r  rX  rY  )r  rc  rU  r_  s       r   as_exact_stridesFlexibleLayout.as_exact_strides  sg     #
""$$**:yy$**MJKKJJIIKKNN
 	
r   c                4   U R                  U R                  U5      nU R                  5       (       a&  U R                  X R                  U R                  5      n[        U R                  U R                  U R                  UU R                  U R                  5      $ r   )	r  r  r  rp  r
  rW  r  rX  rY  )r  r   r_  s      r   as_fill_orderFlexibleLayout.as_fill_order  st    $($5$5dii$G
""$$**:yy$**MJKKJJIIKKNN
 	
r   c                4   U R                  U R                  U5      nU R                  5       (       a&  U R                  X R                  U R                  5      n[        U R                  U R                  U R                  UU R                  U R                  5      $ r   )	r  r  r  rp  r
  rW  r  rX  rY  )r  r	  r_  s      r   as_same_orderFlexibleLayout.as_same_order  st    &&tyy&9
""$$**:yy$**MJKKJJIIKKNN
 	
r   c           
     t    0 nS H/  nS H&  nX#4n[        [        [        X5      U5      5      X'   M(     M1     U$ )N)r  r	  rX  TF)r>   r'   r   )r  initial_free_symbolsr   rp  r  s        r   get_initial_free_symbol_uses+FlexibleLayout.get_initial_free_symbol_uses  sI    !0D!.+,6$WT%8-H-$) "/ 1 $#r   c                    S H9  nU R                   X4   n[        [        X#5      5      nXT:X  a  M.   SU SU 35       e   g )Nr  z)Expected free symbols unchanged, but got z vs )r  r>   r'   )r  r   r  rp  old_free_symbolsnew_free_symbolss         r   r  0FlexibleLayout.assert_free_symbol_uses_unchanged  sX    *M#88$9NO)*:5*PQ#7 ;<L;MTRbQcd7 +r   c                   > U(       a  [         R                  X45      nO[         R                  U5      n[        TU ]  XX6US9  U R                  5       U l        g )NrY  )r  r  r  r  r  r  r  )r  r  r
  r  r  rY  r\  r  s          r   r  FlexibleLayout.__init__  sO     $11$EG$77=GK %)$E$E$G!r   )r  r  r  r  r  )r  r  r   r  )r  r  r   r  r   r  )r  r  r   r  r   r  )r  r  r  ztorch.memory_formatr   r  )r  r  r	  r  r   r  r  r  r  r  r  )r   r  rU  r   r   rW  )rc  r  rU  r   r   rW  )r   r  r   rW  )r	  r  r   rW  )r   z$dict[tuple[str, bool], sympy.Symbol])r   r   r  r7   r   r   r'  )r  r  r
  r  r  r  r  Sequence[int | Integer] | NonerY  r   r   r   )r   r   r   r   r  r>  r  r  r  r  rQ  r  r  r  r  r  r	  rX  r  r  r  r  r  r  r  r   r  r  s   @r   r  r  K  s    N. 0 0    	> 	> &&-@&	& &4 >>&8>	> >   
[[    ]]    ]] 
 ;@
"
37
	
" HM
/
@D
	
 

	$ 8<HH H 	H
 5H H 
H Hr   r  c                  p   ^  \ rS rSrSrSU 4S jjrS	S jrS
S jr\" S 5       S   SS jj5       r	Sr
U =r$ )NonOwningLayouti)  z,Is a view into the storage of another tensorc                   > UR                  5       n[        TU ]	  UR                  UR                  UR
                  UR                  5        Xl        g r   )r  r  r  r  r
  r  r	  view)r  r  rU  r  s      r   r  NonOwningLayout.__init__,  s?    "MMLLKKMM		
 	r   c                >    U R                  5       R                  5       $ r   )rx  r1  r  s    r   r1  NonOwningLayout.make_indexer6  s    }}++--r   c                    U R                   R                  5       R                  nUS:X  a  gSSKJn  [
        R                  R                  R                  X5      $ )Nr   TrE   )	ALIGNMENT)	r  r  rX  utilsr  rs   r  r  r  )r  rX  r  s      r   maybe_guard_aligned#NonOwningLayout.maybe_guard_aligned9  sB    %%'..Q;$ww<<VOOr   c                R   [        U R                  [        5      (       d   eU R                  R                  n[        U[        5      (       d   [        U5      5       eUR                  n[        U[        5      (       d   [        U5      5       eUR                  R                  U5      $ r   )	r   r  rZ  rT  rx  r   ry  rU  rv  )r  rp  boxinput_buffers       r   rv  $NonOwningLayout.get_free_symbol_usesA  s     $))_5555iinn#z**5DI5*xx,//:c:/""77FFr   )r  )r  zBaseView | TensorBoxr   r   r  r  r  r  )r   r   r   r   r  r  r1  r  r^   rv  r   r  r  s   @r   r  r  )  sG    6.P -.$)G!G	!G /Gr   r  c                      \ rS rSrSrSrg)CommBufferTypeiM  symm_memr   N)r   r   r   r   SYMM_MEMr   r   r   r   r  r  M  s    Hr   r  c                  R   ^  \ rS rSr% SrS\S'   S\S'         S	U 4S jjrSrU =r$ )
CommBufferLayoutiQ  a\  
A layout that signifies the buffer is a comm buffer.
In terms of striding, the layout is identical to `FixedLayout`.

Buffers with this layout do not participate in in-place reuse - it can be
neither the source nor the target for in-place reuse.

For detailed motivation and usage of this layout, see
NOTE [lowering-time collective optimization].
r  comm_buffer_typer   
group_namec           	       > [        U[        5      (       a  UR                  5       OUn[        TU ]  UR
                  UR                  UR                  UR                  UR                  UR                  S9  X l        X0l        g )Nr  r
  r  r	  rX  rY  )r   r  rx  r  r  r  r
  r  r	  rX  rY  r  r  )r  rU  r  r  fixedr  s        r   r  CommBufferLayout.__init__`  sh     &0%G%G!V<<++<<<<oo 	 	
 !1$r   )r  r  )rU  zFlexibleLayout | FixedLayoutr  r  r  r   )	r   r   r   r   r  r   r  r   r  r  s   @r   r  r  Q  s;    	 %$O%,% )% 	% %r   r  c                      \ rS rSr% S\S'   \R                  " S S9rS\S'   \R                  " S S9rS\S	'   SS
 jr	SS jr
SS jrSrg)
NoneLayoutis  r  r  c                     S/$ r  r   r   r   r   r  NoneLayout.<lambda>~  s    r   default_factoryr  r  c                     S/$ r  r   r   r   r   r  r    s    1#r   r	  c                    gr  r   r  s    r   r  NoneLayout.storage_size  r  r   c                    U $ r   r   r  s    r   rx  NoneLayout.as_fixed      r   c                    U R                   $ r   r%  r  s    r   r  NoneLayout.get_device  r'  r   r   Nr  r  r  )r   r   r   r   r   r  r  r  r	  r  rx  r  r   r   r   r   r  r  s  sC      !''DD)D#))+FFIFr   r  c                     ^  \ rS rSrSU 4S jjr\SS j5       r\R                  SS j5       rSS jrSS jr	SS jr
\ S       SS jj5       rSS	 jrSS
 jrSrU =r$ )MutationLayoutSHOULDREMOVEi  c                  > [         TU ]  UR                  5       UR                  5       UR	                  5       S 5        Xl        U R                  5       R                  5       n[        R                  R                  U5        g r   )r  r  r$  r  r  r	  
get_bufferrz  rs   r  mark_buffer_mutated)r  r	  r   r  s      r   r  #MutationLayoutSHOULDREMOVE.__init__  sc    &&(OO		
  ))+	##D)r   c                6    U R                  5       R                  $ r   )real_layoutr	  r  s    r   r	  !MutationLayoutSHOULDREMOVE.stride  s    !(((r   c                    g r   r   r%  s     r   r	  r    s    r   c                >    U R                  5       R                  5       $ r   )r  r  r  s    r   r  'MutationLayoutSHOULDREMOVE.storage_size  s    !..00r   c                   ^ SU4S jjmT" U R                   5      n[        U[        5      (       d   [        U5      5       eU$ )Nc                   > [        U [        5      (       a  T" U R                  5      $ [        U [        5      (       a  T" U R	                  5       5      $ [        U [
        5      (       a  T" U R                  5      $ U $ r   )r   r  r	  rv  rw  
MutableBoxrT  )r	  unwrap_viewss    r   r  ;MutationLayoutSHOULDREMOVE.get_buffer.<locals>.unwrap_views  sb    &"<==#FMM22&(++#F$6$6$899&*--#FKK00Mr   )r	  r   r   r   )r	  r   ry  r   )r  r  r  s     @r   r  %MutationLayoutSHOULDREMOVE.get_buffer  s9    	 dkk*&&))74<7)r   c                h    U R                  5       R                  n[        U[        5      (       d   eU$ r   )r  rU  r   r  )r  rU  s     r   r  &MutationLayoutSHOULDREMOVE.real_layout  s,    "))&&))))r   c                   UR                  5         [        R                  R                  UR	                  5       5        [        U[        5      (       a  UR                  nUR                  5         U(       d  [        R                  UR                  5       UR                  5       UR                  5       [        UR                  5       UR                  5       5       VVs/ s H.  u  pE[        R                  R                   R#                  XE5      PM0     snnS9n[        U[$        [&        45      (       d   eUR                  nUR                  5         [)        US5      (       d   U5       e[        UR                  R*                  [,        5      (       d$   [/        UR                  R*                  5      5       e[1        U5      UR                  l        UR                  $ s  snnf )Nr  rT  )r  rs   r  r  rz  r   r   rT  rL  ru  r  r  r  r-  r   r  r  check_equals_and_simplifyrv  r  r  rU  r  r   r  )r1  srcdstunsafe_aliasr  r  r   s          r   realize_into'MutationLayoutSHOULDREMOVE.realize_into  sV    	 	
##CLLN3c9%%((C 	##~~'mmo* !$CLLNCLLN C C GG$$>>qD C	 $ D dXz$:;;;;))CsF##(S(##((//>::QD<QQ:4S9xxs   5Gc                    U $ r   r   r  s    r   rx  #MutationLayoutSHOULDREMOVE.as_fixed  r  r   c                6    U R                   R                  5       $ r   )r	  r1  r  s    r   r1  'MutationLayoutSHOULDREMOVE.make_indexer  r  r   )r	  )r	  r   r   r   r  )r  r   r   r   r  )r   ry  r  r  )r  r   r  r   r  r   r   r   )r   r   r  )r   r   r   r   r  r  r	  r  r  r  r  rl  r  rx  r1  r   r  r  s   @r   r  r    s    	* ) ) ]] 1
 <A%%%%59%	% %N* *r   r  c                    ^  \ rS rSr% S\S'   S\S'   S%U 4S jjrS&S jrS'S jrS(S	 jrS)S
 jr	S*S jr
\S+S j5       rS,S jrS-S jrS.S jrS/S jrS0S jrS1S jrS2S jrS%S jr S3     S4S jjrS5S jrS6S jr S3     S7S jjrS2S jrS8S jrS9S:S jjrS%S jrS;S jrS;S jrS<S jr\ " S 5       S3   S=S  jj5       r!S>S! jr"S?S" jr#S2S# jr$S$r%U =r&$ )@ry  i  r  r   r#  rU  c                F   > [         TU ]  5         U R                  SS 5        g r  )r  r  r  r+  s    r   r  Buffer.__post_init__  s    t4r   c                >    U R                  5       R                  5       $ r   )r  r1  r  s    r   r1  Buffer.make_indexer  s     --//r   c                J    U R                   (       d   U 5       eU R                   $ r   r  r  s    r   rz  Buffer.get_name  s    yy$yyyr   c                    [        U R                  [        5      (       a  U R                  R                  5       $ [	        [        U R                  5      R                  5      er   )r   rU  r  r:  r  r   r   r  s    r   r:  Buffer.get_example  s@    dkk6**;;**,,!$t{{"3"<"<==r   c                >    U R                  5       R                  5       $ r   )r  r  r  s    r   r  Buffer.get_device  s    ##%0022r   c                    g r   r   r  s    r   r  Buffer.get_defining_op  r  r   c                6    U R                  5       R                  $ r   )r  r
  r  s    r   r
  Buffer.dtype  s     &&&r   c                :    / U R                  5       R                  Q$ r   )r  r  r  s    r   r  Buffer.get_size  s    ("''((r   c                :    / U R                  5       R                  Q$ r   )r  r	  r  s    r   r:  Buffer.get_stride  s    *"))**r   c                6    U R                  5       R                  $ r   )r  rX  r  s    r   
get_offsetBuffer.get_offset  s     '''r   c                    [        U R                  [        5      (       a  U R                  $ [        [	        U R                  5      R
                  5      er   )r   rU  r  r  r   r   r  s    r   r  Buffer.get_layout  s7    dkk6**;;!$t{{"3"<"<==r   c                    U R                   $ r   r  r  s    r   r  Buffer.get_output_spec  r'  r   c                "    U R                  5       $ r   )r  r  s    r   rr  Buffer.get_storage_numel  s    ~~r   c                6    U R                  5       R                  $ r   )r  rY  r  s    r   get_is_pinnedBuffer.get_is_pinned  s     ***r   c                    [        U R                  [        5      (       a@  [        U R                  [        5      (       d   U R                  R	                  5       U l        g g g r   )r   rU  r  r  rx  r  s    r   rQ  Buffer.freeze_layout  sF    dkk6**:KK4
 4
 ++..0DK4
*r   c                    [        U R                  [        5      (       d   [        U R                  5      5       eU R                  R	                  XS9U l        g Nr  )r   rU  r  r   r  rT  s      r   rV  &Buffer.freeze_layout_with_stride_order%  sB     $++~66IT[[8II6kk11%1Ur   c                    [        U R                  [        5      (       d   [        U R                  5      5       eU R                  R	                  U5      U l        g r   )r   rU  r  r   r  rZ  s     r   r[  $Buffer.freeze_layout_with_fill_order+  s=    $++~66IT[[8II6kk//6r   c                    [        U R                  [        5      (       d   [        U R                  5      5       eU R                  R	                  U5      U l        g r   )r   rU  r  r   r  r^  s     r   r_  $Buffer.freeze_layout_with_same_order/  s=    $++~66IT[[8II6kk//7r   c                    [        U R                  [        5      (       d   [        U R                  5      5       eU R                  R	                  XS9U l        g r8  )r   rU  r  r   r  rb  s      r   rd  'Buffer.freeze_layout_with_exact_strides3  sG     $++~66IT[[8II6kk22 3 
r   c                    [         R                  R                  R                  [        R
                  " U R                  5       S5      5      $ r  r  r  s    r   r  Buffer.is_zero_elements;  r  r   c                z   ^  T R                  5       (       a  [        [        T R                  5       S9$ SU 4S jjnU$ )Nr  c                   > TR                  5       n[        R                  " TR                  =(       d    SU" U 5      5      $ r  )r1  rq   r  r   r   r  r  s     r   r  "Buffer.make_loader.<locals>.loaderC  s/    '')G88DII2GENCCr   r  )r  r   rs  r  r  s   ` r   r-  Buffer.make_loader>  s3      ""=0@AA	D r   c                "    U R                  5       $ r   rz  r  s     r   r  Buffer.codegen_referenceI  r  r   c                    g r   r   r  s    r   r  Buffer.decide_layoutL  rJ  r   c                    [        U R                  [        5      (       a%  U R                  R                  R	                  5       /$ gr  )r   rU  r  r  rz  r  s    r   r  #Buffer.get_inputs_that_alias_outputO  s2    dkk?33KK$$--/00r   c                    [        U R                  [        5      (       a%  U R                  R                  R	                  5       /$ gr  )r   rU  r  r	  rz  r  s    r   r  Buffer.get_mutation_namesT  s3    dkk#=>>KK&&//122r   c                6    [        U R                  5       /5      $ r   )r>   rz  r  s    r   r  Buffer.get_read_namesY  s    4==?+,,r   c                    [        5       $ r   r=   ru  s     r   rv  Buffer.get_free_symbol_uses\       |r   c                    [        5       $ r   r=   r  s    r   r  Buffer.get_unbacked_symbol_defsb  r  r   c                    g r   r   r  s    r   r  Buffer.realizee  rJ  r   c                    gr'  r   r  s    r   should_allocateBuffer.should_allocateh  s    r   r  r  r  r  )r   ztorch.Tensor | torch.SymIntr  r  r  r  )r   r  r  r  r  r  r  r  r  r  )r	  r  r   r   )rc  r  rU  r   r   r   r  r   r  r  r  r  r  r  )'r   r   r   r   r   r  r1  rz  r:  r  r  r  r
  r  r:  r*  r  r  rr  r3  rQ  rV  r[  r_  rd  r  r-  r  r  r  r  r  r^   rv  r  r  rZ  r   r  r  s   @r   ry  ry    s$    
50>
3 ' ')+(>
 +1 ;@V"V37V	V78
 CH
*
;?
	
U	

- H%$)!	! &
 r   ry  c                  J    \ rS rSrSS jrSS jr\R                  rS	S jrSr	g)
OperationBufferim  c                    U /$ r   r   r  s    r   r  OperationBuffer.get_outputsp  s	    vr   c                    U $ r   r   r  s    r   r  OperationBuffer.get_defining_ops  r  r   c                X    [         R                  U 5        [        R                  U 5        g r   )ry  r  r  r  s    r   r  OperationBuffer.__post_init__y  s    T"%r   r   Nr  r   r  r  )
r   r   r   r   r  r  r  r  r  r   r   r   r   r]  r]  m  s     #55&r   r]  c                      \ rS rSrSS jrSrg)r  i~  c                    gr6  r   r  s    r   ro  InputBuffer.num_reads  r  r   r   Nr  )r   r   r   r   ro  r   r   r   r   r  r  ~  s    r   r  c                      \ rS rSrSrSrg)DonatedBufferi  aA  
Represents a donated buffer which is a saved tensor that is not alias to any
fwd inputs, fwd user outputs, and bwd outputs. We generally cannot inplace
reuse the input tensor memory during backward since it might be used in another
function. However, donated buffer can be inplace reused during backward
to save memory.
r   N)r   r   r   r   r  r   r   r   r   ri  ri    s    r   ri  c                  8    \ rS rSr% SrS\S'   SS jrS	S jrSrg)
r  i  Nr  r  c                   ^  SU 4S jjnU$ )Nc                   > TR                  5       R                  5       n[        R                  " [        R
                  R                  TR                  5       TR                  5      U" U 5      5      $ r   )	r  r1  rq   r  rs   r  constant_namerz  r  rD  s     r   r  *ConstantBuffer.make_loader.<locals>.loader  sP    oo'446G88%%dmmot7K7KL r   r  r   r  s   ` r   r-  ConstantBuffer.make_loader  s    	 r   c                    [        [        R                  R                  U R	                  5       U5      U R
                  S9$ Nr   rU  )r  rs   r  rm  rz  rU  r#  s     r   r  !ConstantBuffer.constant_to_device  s/    &&t}}?
 	
r   r   r  r  )	r   r   r   r   r  r   r-  r  r   r   r   r   r  r    s    +/O(/
r   r  c                  l    \ rS rSrS	S jr\" S 5       S
   SS jj5       rSSS jjrSS jrSS jr	Sr
g)NoneAsConstantBufferi  c                    [        5       $ r   r=   r  s    r   r  NoneAsConstantBuffer.get_reads  r  r   c                    [        5       $ r   r=   ru  s     r   rv  )NoneAsConstantBuffer.get_free_symbol_uses  rT  r   Nc                J    [         R                  R                  R                  $ r   )rs   r  r  none_strr  s     r   r  &NoneAsConstantBuffer.codegen_reference  s    ww##,,,r   c                    [        S S9$ Nr%  )r  r  s    r   r  $NoneAsConstantBuffer.get_output_spec  s    &&r   c                    gr'  r   r  s    r   r  &NoneAsConstantBuffer.has_tensor_output  r*  r   r   r  r  r  r   r  r  r  )r   r   r   r   r  r^   rv  r  r  r  r   r   r   r   ru  ru    sC     23$)!	! 4
-'r   ru  c                  d    \ rS rSr% S\S'   \" S 5       S	   S
S jj5       rSSS jjrSS jrSr	g)r   i  r   rT  c                .    [        U R                  U5      $ r   )r'   rT  ru  s     r   rv  *ShapeAsConstantBuffer.get_free_symbol_uses  s      		=99r   Nc                h    [         R                  R                  R                  U R                  5      $ r   )rs   r  r  codegen_sizevarrT  r  s     r   r  'ShapeAsConstantBuffer.codegen_reference  s!    ww##33DII>>r   c                    gr'  r   r  s    r   r  'ShapeAsConstantBuffer.has_tensor_output  r*  r   r   r  r  r   r  r  )
r   r   r   r   r   r^   rv  r  r  r   r   r   r   r   r     s<    
J34$):!:	!: 5:
?r   r   c                    ^  \ rS rSr% SrS\S'   SrS\S'   SrS	\S
'   SrS\S'   Sr	S\S'   Sr
S\S'   \R                  S'S j5       r\\R                  S'S j5       5       rS(S jrS)S jrS*S jrS+S jrS,S jr\" S 5       S-   S.S jj5       rS/U 4S jjrS0S jrS1S jrS2S jrS3S jr\  S4S j5       r  S5     S6S jjr\ S7           S8S jj5       rS9S  jr S9S! jr!S(S" jr"S0S# jr#S0S$ jr$S:S% jr%S&r&U =r'$ );r  i  zZ
Represents a buffer that is computed during kernel execution rather than being an input.
r  rT  FzClassVar[bool]_force_realizeNr  rx  Callable[..., Any] | Nonery  r  rz  r{  c              #    #    U R                   c   eU R                  c   eU R                  c   eU R                  c   e[	        U R
                  [        5      (       d   [        U R
                  5       5       eU R
                  nU R                  n [        UR                  UR                  U R                  U R                  U R                  UR                  UR                  UR                  S9nX0l        [        UR                  UR                  U R                  5      U l        U R                  R!                  U 5        S v   Xl        X l        g ! Xl        X l        f = f7f)Nr  )rx  ry  rz  r{  r   rT  r  r   rU  r  r
  r  r  r  rW  get_default_sizes_bodyclear_cache)r  old_datar^  new_datas       r   with_original_inner_fn%ComputedBuffer.with_original_inner_fn  s5    +++&&222$$000..:::$))Y//EDO3DE/99[[
	% nn00,,!%!@!@'66",,'66	H !I &%%DK
 ''33D9 I$K !I$Ks   BE%B5E E%E""E%c               #     #    [         R                  n  S[         l        S v   U [         l        g ! U [         l        f = f7fNT)r  r  )	old_values    r   force_realizeComputedBuffer.force_realize  s2      #11		6,0N),5N)IN)s   ?/ ?<?c                    U R                   b  U R                   $ [        U R                  S5      (       a  U R                  R                   $ g)z}
Returns self.name if it exists, otherwise returns the name of the data node if that exists.
If neither exist, returns None.
Nr   )r   r  rT  r  s    r   get_computed_buffer_name'ComputedBuffer.get_computed_buffer_name  s:    
 99 99499f%%99>>!r   c                6    U R                   R                  5       $ r   rT  ro  r  s    r   ro  ComputedBuffer.num_reads  r  r   c                6    U R                   R                  5       $ r   rT  r  r  s    r   r  ComputedBuffer.get_reads  r  r   c                6    U R                   R                  5       $ r   r  r  s    r   r  ComputedBuffer.get_read_names  r  r   c                t   [        U R                  [        [        [        [
        45      (       d.  [        R                  " [        5       [        5       [        5       S9$ [        R                  " [        SS5         U R                  R                  5       (       aT  [        U R                  5       U R                  R                  5       U R                  R!                  5       5      sS S S 5        $ [        U R                  5       U R                  R#                  5       5      sS S S 5        $ ! , (       d  f       g = f)Nrk  writesindex_exprsr>  T)r   rT  r  r  rg  ru  rG   
ReadWritesr>   r   r   r  ry  rP   get_store_functionr-  r|  r  r  s    r   rg  ComputedBuffer.get_read_writes  s    $))itY%GHH** l!|&L  \\.*:DAyy++--*++-II002II002 BA +++-II&&( BAAs   :A*D).1D))
D7c                    U R                   R                  U5      U R                  R                  U5      -  nU R                  5       (       a!  X R	                  5       R                  U5      -  nU$ r   )rU  rv  rT  has_store_functionrg  )r  rp  r  s      r   rv  #ComputedBuffer.get_free_symbol_uses+  sd    $ 11
II**=9: ""$$**,AA-PPFr   c                  > U R                  5       (       dg  U R                  [        R                  R                  ;  a?  U R                  5       S:X  a+  U R                  (       d  U R                  R                  5       $ [        TU ]!  5       $ r  )
ry  r   rs   r  mutated_buffersro  r  rT  r-  r  r+  s    r   r-  ComputedBuffer.make_loaderE  sc    ''))		!8!88 A%'' 99((**w"$$r   c                V    [        U R                  [        [        [        [
        45      $ r   )r   rT  r  r  rg  ru  r  s    r   r  !ComputedBuffer.has_store_functionP  s    $))itY%GHHr   c                   U R                  5       R                  5       R                  5       n[        U R                  [
        [        [        45      (       a+  [        U R                  R                  U R                  U5      $ [        U R                  [        5      (       d   [        U R                  5      5       e[        U R                  R                  U R                  U5      $ r   )r  rx  r1  r   rT  r  r  rg  r   r  r   ru  r   r  )r  r  s     r   r  !ComputedBuffer.get_store_functionS  s    //#,,.;;=dii)T4!89949944diiIIdii33DT$))_D349911499gFFr   c                   [        U R                  [        5      (       Ga  [        R                  " U R
                  R                  5       U R
                  R                  5       5      u  u  pnU R                  5       R                  n[        S U 5       5      (       d   eU VVs/ s Hk  n[        U[        R                  5      (       d  M$  [        UR                  U Vs0 s H%  ofS:w  d  M
  U[        R                  R                   _M'     sn5      PMm     nnnU(       a  [        U R
                  ["        [$        45      (       a  U R
                  R'                  X5      nOUnU Vs/ s H,  n[(        R*                  R,                  R/                  X5      PM.     n	nSSKJn
  U
" XR5                  5       5      $ gs  snf s  snnf s  snf )aD  
If our layout is still flexible, try to determine the stride order based on stride orders of reads.

TODO(jansel): A better algorithm here would look at downstream consumers of this
              value and try to do global graph-level layout optimization.
              This is also something just begging to be autotuned.
c              3  v   #    U  H/  n[        U[        R                  [        R                  45      v   M1     g 7fr   )r   rG   StarDep	MemoryDepr  s     r   r   0ComputedBuffer.get_fill_order.<locals>.<genexpr>j  s2      A 1|33\5K5KLMMs   79r   rE   pick_loop_orderN)r   rU  r  rG   r(  rT  r-  r|  rg  rk  r   r  ro   r   r   r7  r8  r  rg  r   rs   r  r  r*  	schedulerr  r  )r  
index_varsr  r   rk  r3  vr  rT  stride_lengthsr  s              r   r   ComputedBuffer.get_fill_order[  sx    dkk>22.:.M.M		,,.		0L0L0N/+(Z! ((*00E       Aa!7!78 Y
177n$WnUVPV_Q_n$WX   dii$66"ii//
KG(GMR"MRTAGG$$11$@U  " 7&~}}GG# %X"s*   %#F?F? 	F:-F:F?(3G:F?c                    [        U R                  [        5      (       a:  U R                  5       nU(       a  U R	                  U5        g U R                  5         g g r   )r   rU  r  r   r[  rQ  rZ  s     r   r  ComputedBuffer.decide_layout  sC    dkk>22'')E2259""$ 3r   c                p   [         R                  " U R                  5       U R                  5       SS9u  p[        R
                  " [        SU R                  5       5         [        U R                  5       U R                  5       (       a  UOUS S U/UQ76 nS S S 5        / n/ n/ n/ nUR                  5        Hf  u  pXS   ;   a-  U(       a   eUR                  U5        UR                  U	5        M:  XS   ;   d   eUR                  U5        UR                  U	5        Mh     Xg4WXE44$ ! , (       d  f       N= f)Nqr   r  rE   r   )rG   r(  r-  r|  r   r   r  r  rS   r  ry  itemsr  )
r  r   
var_rangesr  r  reduce_vars
index_sizereduce_sizer  r   s
             r   r  %ComputedBuffer.get_default_sizes_body  s-    (::##%t'>'>'@
 \\.*;T__=NO'')0022Ra 	D P 
!#
$$&DAG|&&!!!$!!!$G|#|""1%""1% ' ($0III) POs   7D''
D5c                X  ^ ^^ T R                  5       u  u  p4nu  pgU(       a  U" X44XVU45      u  u  p4nu  pg/ UR                  R                  5       QmUb  [        U[        5      (       a  [        U5      S:X  d   eUu  p[        U[        5      (       d   [        U5      5       e[        U	[        5      (       d   [        U	5      5       e[        S U	 5       5      (       d   eUR                  n
X:X  d	   U
U45       eU	 Vs/ s H  oT;  d  M
  UPM     n	nTU	-  m/ UR                  5       Qm[        R                  R                  T [        R                   5      (       d  TR#                  UR%                  5       5                  SUUU 4S jjnXg-   n['        [)        T 5      5      (       + =(       d    [*        R,                  (       + nU" UUUU5      u  nnnU" X}XN5      u  nnn[.        R0                  " UUSS9u  u  nnn[3        UU" U5      U" U5      /UUU5      nUU4U4$ s  snf )a  
This is a main place where we do loop transformations in a
backend-agnostic way.

Here we:
    1) Remove any 1 dimensions
    2) Fuse contiguous dimensions together
    3) Reorder dimensions based on stride orders

Optional argument extra_indexing_constraints can be used to append additional
indexing expressions to existing ones derived from buffer's body. This can be useful
to fuse scheduler nodes with compatible ranges, e.g. (s0*s1*...,) and (s0, s1, s2, ...)
on CPU by preventing indexing simplifications and obtaining index/reduce ranges for
the scheduler node compatible with other nodes.
Optional argument recompute_sizes_body_func can be used to recompute sizes and body
on the default body. This can be useful to append additional loop transformations.
r   c              3  B   #    U  H  n[        U[        5      v   M     g 7fr   )r   r   )r   fs     r   r   6ComputedBuffer.simplify_and_reorder.<locals>.<genexpr>  s     H4Gqz!T**4Gr  c           	       > TR                  XUT5      u  pEnTR                  5       S:X  ae  [        U5      S:X  aV  [        [	        [        U5      5      5      nU" U5      S   S:w  a*  U Vs/ s H  oU   PM	     nn[        U5      n[        U5      nU" U 5      n U(       aD  [        R                  R                  R                  U U[        TX5      5      u  pIn
[        Xi5      nOUnXKU4$ s  snf )Nr  r   r   )_apply_loop_reorderingry  r   r   r   r   r   rs   r  r  _simplify_loopsrK   r   )x_varssupport_varsr  simplify_loopsnewsizesreindex0r   r   r   r   _pruner   index_formulasmemory_addrsr  s               r   simplify_and_reorderAComputedBuffer.simplify_and_reorder.<locals>.simplify_and_reorder  s     ,0+F+Fe\,(H* &&(E1c%jAoU3u:./ E?1%*278%Qa%H8+E2H.u5H f%F-.WW-=-=-M-M,^VN.*F
 *(="h..!  9s   +C*pr   )
r  Sequence[sympy.Symbol]r  r  r  r  r  r   r   dtuple[list[int], Callable[[Sequence[int]], Sequence[int]], Callable[[Sequence[int]], Sequence[int]]])r  indexing_exprsr   r   r   r   r   r   r   r   r  get_write_exprsrs   r  r  rH   PREFER_STORE_LOOP_ORDERextendget_read_exprsrj   r$  rF   loop_ordering_after_fusionrG   index_vars_no_squeezerS   )r  extra_indexing_constraintsrecompute_sizes_body_funcr  r  r  r  r  extra_indexing_rangesextra_indexing_exprexpected_var_rangesr  r  r  should_merge_loopsiter_rangesiter_reindexr   reduce_rangesreduce_reindex	iter_varsr  r  r  s   `                     @@r   r  #ComputedBuffer.simplify_and_reorder  s   4 '')		
%Z%Z %
 *)4k1J	))
 94..5578%15u==23q89 :T6!3T::WDAV<WW:1488S$?R:SS8H4GHHHHH"&//&? #%B ? /#.a>2I.   # 11N0--/0ww""4)O)OPP 3 3 564	/*4	/04	/ !4	/ !	4	/

4	/ 4	/l "/t,--VV5V5V1V 	 (<	(
$\1 ,@{,
(~q
 0</Q/Q0
, K*
 )$n[&AB
 ]+T11#s   	H'H'c           
     n   SSK Jn  Uc  / n U Vs/ s H-  n[        R                  R                  R                  X`U5      PM/     nn[        U5      [        U5      :X  a  [        US   5      [        U 5      :X  d   e[        [        U" XrU5      5      5      nU V	s/ s H  oU	   PM	     nn	U[#        U5      [%        U5      4$ s  snf ! [         a^    [        R                  (       a)  [        R                  S[        [        X5      5      U5        [        [!        [        U5      5      5      n Nf = fs  sn	f )zE
Shuffle the order of loops around to hopefully improve performance.
rE   r  r   z%Did not simplify complex index:
%s
%s)r  r  rs   r  r  r*  r   r   r  	ExceptionrF   r  r'  warningr   r   r   r   r   )
r  r  r  r  priority_idxr  rT  r\  r   r   s
             r   r  %ComputedBuffer._apply_loop_reorderingB  s#    	/L	, )(D   --dM(   w<3|#44WQZCM :   /',"OPQE $))5aq5)l5)?5+AAA#  	,||=Z/0 
 s5z*+E	, *s*   C 4CAC D2C A%D/.D/c                6    U R                   R                  5       $ r   )rT  r-  r  s    r   r-  !ComputedBuffer.get_pointwise_sizej      yy++--r   c                6    U R                   R                  5       $ r   rT  r|  r  s    r   r|  !ComputedBuffer.get_reduction_sizem  r  r   c                6    U R                   R                  5       $ r   rT  ry  r  s    r   ry  !ComputedBuffer.get_reduction_typep  r  r   c                6    U R                   R                  5       $ r   )rT  r  r  s    r   r  ComputedBuffer.is_no_ops  s    yy))++r   c                    gr  r   r  s    r   rZ  ComputedBuffer.should_allocatev  r  r   c                8    U R                   R                  U5      $ )r  rT  r  r#  s     r   r  !ComputedBuffer.constant_to_devicey  s    yy++F33r   rS  )r   Iterator[None]r  r  r  r  r  r  r  r  r  )r   zCallable[..., None])r   list[int] | Noner  )r   zMtuple[tuple[list[Expr], list[Expr]], LoopBody, tuple[list[Expr], list[Expr]]]NN)r  'tuple[dict[Any, Any], list[Any]] | Noner  r  r   z5tuple[tuple[list[Expr], list[Expr]], LoopBody | None]r   )r  r  r  r  r  r  r  zlist[sympy.Expr]r  r	  r   r  r  r  )(r   r   r   r   r  r   r  rx  ry  rz  r{  r  r  r  r  r  r  ro  r  r  rg  r^   rv  r-  r  r  r   r  r]   r  r  r  r-  r|  ry  r  rZ  r  r   r  r  s   @r   r  r    s    K%*NN* #K"481826/6<@ 9@ %  %D 6  6	%%** ,-$)!	! .2	%IG%N% J
J JD OS?CU2$KU2 $=U2 
?	U2n  *.%B*%B,%B %B '	%B
 '%B
%B %BN...,4 4r   r  c                  B    \ rS rSr% SrS\S'   S\S'   S\S'   S\S'   S	rg
)FinalizeCodegenResulti~  zNStructured result from TemplateBuffer._finalize_codegen for external backends.r   sourcer   importscall_preamble	call_argsr   N)r   r   r   r   r  r   r   r   r   r   r	  r	  ~  s    XKr   r	  c                  4  ^  \ rS rSrSr   S             SU 4S jjjr\SS j5       rSS jrSS jr	SSS jjr
SS	 jrSS
 jrSS jr  S     SS jjrSS jrS S jr    S!S jr\S"S j5       r\SSSS.           S#S jj5       rSrU =r$ )$r  i  z
Base class for template operators that support epilogue and prologue fusion.
Subclasses: TritonTemplateBuffer (built-in Triton templates),
HelionTemplateBuffer (Helion kernels), etc.
Nc           	       > [         T
U ]  S US9  [        R                  U5      U l        X0l        [        R                  R                  U 5      U l	        [        R                  R                  U 5        0 U l        0 U l        0 U l        U(       a  [        U5      O0 U l        X@l        / U l        Ubm  U R                  S   n[%        U[&        5      (       d   [)        U5      5       eUR+                  5       nU V	s/ s H  n	[-        [/        US9X5      PM     sn	U l        U=(       d
    [1        5       U l        S U l        S U l        g s  sn	f )Nrr  r   r%  )r  r  r  unwrap_storagerq  make_kernel_renderrs   r  register_bufferr   register_operationr   epilogue_fusable_outputs_multi_output_childrenr   _named_inputsmutated_inputsmutation_outputsr   r   r   r  MutationOutputr  r>   allowed_prologue_inpsallow_epilogue_fusionallow_prologue_fusion)r  rU  rq  r	  r	  r	  named_inputsfirst_inputr  r  r  s             r   r  TemplateBuffer.__init__  s1    	d62"11&9"4GG++D1		""4(+- 9;% ?A#
 #/DB 	
 -68%++a.Kk622ED4EE2 ++-F *%)C z8#D)%D! "1Z\ 	"
 37"26"%s   9Ec                    [        U R                  [        5      (       a  [        S5      eU R	                  5       R
                  $ )Nz1Multi-output templates do not have a single dtype)r   rU  MultiOutputLayoutr  r  r
  r  s    r   r
  TemplateBuffer.dtype  s9    dkk#455%C   &&&r   c                     U R                  SS9$ )NT	normalize)rP   r  s    r   rg  TemplateBuffer.get_read_writes  s    ''$'77r   c           	       ^^ [        5       nU R                   H  n[        U[        [        45      (       d   [        U5      5       eUm[        TR                  [        5      (       d   [        TR                  5      5       eTR                  R                  5       mSUU4S jjnU[        R                  " UTR                  5       SUS9R                  -  nM     U$ )z(Build read dependencies from all inputs.c                |   > [        U5      S:X  d   e[        R                  " TR                  5       T" U 5      5      $ r  )r   rq   r  rz  )r   r  rs  inp_indexers     r   dummy4TemplateBuffer._read_deps_from_inputs.<locals>.dummy  s1    6{a'''xxE0BCCr   r   r'	  r   Sequence[Any]r  r0	  r   r   )r>   rq  r   rZ  ry  r   rU  r  r1  rG   rP   r  rk  )r  r(	  rk  inp_rawr-	  rs  r,	  s        @@r   _read_deps_from_inputs%TemplateBuffer._read_deps_from_inputs  s    .8l{{Gg'@AAP4=PA,3Ccjj&11C4

3CC1**113KD D \55s||~rYeE # r   c           	     *  ^^ [        U R                  [        5      (       ar  [        [        R
                  " U R                  5       [        R                  " S5      SSS9/5      n[        R                  " U R                  U5      U[        5       SSS9$ U R                  5       mU R                  5       R                  5       mSUU4S jjn[        R                  " X0R                  5       SUS9nU=R                  U R                  U5      -  sl        U$ )	a  Extract read/write dependencies for this TemplateBuffer.

When the layout is MultiOutputLayout (multi-output templates), the
buffer itself has no data layout, so we cannot build an indexer.
Instead, synthesize a trivial write dep and derive read deps from
the named tensor inputs (``_named_inputs``).  For single-output
templates with a concrete layout, fall through to the standard path.
r   r   )	var_namesr  N)rk  r  r  r  r  c                b   > [        U5      S:X  d   e[        R                  " TT" U 5      S5      $ Nr   faker   rq   r  r   r  r  r   s     r   r-	  1TemplateBuffer.extract_read_writes.<locals>.dummy  ,    v;!###99T75>6::r   r'	  r/	  )r   rU  r$	  r>   rG   r  rz  r   r    r  r2	  r  r1  rP   r  rk  )r  r(	  r  r-	  depsr  r   s        @@r   rP   "TemplateBuffer.extract_read_writes  s     dkk#4553= **q)9Rb4F  **11)<&L  }}//#002	; 	; //==?B)
 	

d11)<<
r   c                6    [         R                  R                  $ r   )r   r7  r]  r  s    r   r|  !TemplateBuffer.get_reduction_size  s    ww{{r   c                    g r   r   r  s    r   ry  !TemplateBuffer.get_reduction_type  r  r   c                    gr  r   r  s    r   rZ  TemplateBuffer.should_allocate
  r  r   c                *    U R                  5       / 4S 4$ r   r  )r  r  r  s      r   r  #TemplateBuffer.simplify_and_reorder  s$      
 	
r   c                6    [        U R                  [        5      $ )zFWhether this template produces multiple outputs via MultiOutputLayout.)r   rU  r$	  r  s    r   is_multi_outputs_template(TemplateBuffer.is_multi_outputs_template  s    $++'899r   c                    U R                   $ r   )r	  r  s    r   get_allowed_prologue_inps(TemplateBuffer.get_allowed_prologue_inps  s    )))r   c                    g)ah  Called after epilogue/prologue subgraph codegen with rendered hook outputs.

``hook_outputs`` maps placeholder keys (e.g. ``<STORE_OUTPUT_0>``,
``<LOAD_INPUT_x>``) to the Triton code generated by Inductor for
each fused subgraph.

Return a ``FinalizeCodegenResult`` to provide custom source code and
call metadata, or ``None`` to use the default codegen path.
Nr   )r  hook_outputss     r   _finalize_codegen TemplateBuffer._finalize_codegen!  s     r   c                P   [        U[        5      (       a+  [        UR                  [        5      (       a  UR                  $ [        R                  U5      n[        U[        5      (       a  UR                  n[        UR                  [        5      (       a  UR                  5         U$ )zWRealize a TensorBox, preserving MultiOutput layout (unlike ExternKernel.realize_input).)
r   r   rT  MultiOutputr  realize_inputrx  rU  r  rQ  )r1  r2  r  s      r   realize_template_input%TemplateBuffer.realize_template_input/  sr     b)$$BGG[)I)I77N++B/fj))[[Ffmm^44  "r   )direct_alias_at_leafon_tensor_leafon_non_tensor_leafc               ~   ^^^^^^^ 0 m[         R                  " 5       mSUUUUUUU4S jjm[        T" U/ 5      5      $ )zLWalk a structured output tree, creating MultiOutput nodes for tensor leaves.c           
     v  > [        U [        [        45      (       a?  / n[        U 5       H,  u  p4UR	                  T" U/ UQ[        U 5      U4P5      5        M.     U$ [        T
5      n[        U [        R                  5      (       a  T	(       a  UT	;   a  [        R                  T	U   5      /$ [        U 5      nUT;   a  TU   /$ [        [        R                  U 5      TU5      nUTR                  UR!                  5       '   Tb  T" UR!                  5       XqU5        [        U5      nUTU'   U/$ Tb  T" U5        / $ r   )r   r   r   r   r  r   rl  r  Tensorr   r  idrR	  FallbackKerneltensor_to_layoutr	  rz  )rc  r  r  r   itemleaf_idxtidmor2  rV	  leaf_counterrX	  rW	  seen_outputstemplate_bufwalks            r   rf	  0TemplateBuffer.build_multi_outputs.<locals>.walkJ  s0   &4-00+-(0GANN4.K.K$v,9J.K#LM  1L)H&%,,//'H8L,L%,,-A(-KLMMj,&(-.. "33F;\7 FH33BKKMB!-"2;;="xHr]$&S!t!-"8,Ir   )rc  r   r  zlist[tuple[type, int]]r   list[TensorBox])rF  countr   )	r1  re	  
structuredrV	  rW	  rX	  rc	  rd	  rf	  s	    ` ```@@@r   build_multi_outputs"TemplateBuffer.build_multi_outputs;  s4     .0 (	 	6 T*b)**r   )r	  r	  r	  r	  r	  r   r	  rq  r	  r	  r	  r   )NNN)rU  r#  rq  Sequence[IRNode]r	  r  r	  Iterable[IRNode] | Noner	  OrderedSet[str] | Noner 	  zdict[str, IRNode] | Noner   r   r  r  )r(	  r   r   zOrderedSet[dependencies.Dep]r  )r(	  r   r   r  r  r  r  r		  )r  r
	  r  r  r   z9tuple[tuple[Sequence[Expr], list[Expr]], LoopBody | None]r  )rN	  zdict[str, str]r   zFinalizeCodegenResult | None)r2  r   r   r   )re	  r  rj	  r   rV	  zdict[int, IRNode] | NonerW	  zFCallable[[str, MultiOutput, list[tuple[type, int]], int], None] | NonerX	  zCallable[[int], None] | Noner   ztuple[TensorBox, ...])r   r   r   r   r  r  r  r
  rg  r2	  rP   r|  ry  rZ  r  rH	  rK	  rO	  rl  rT	  rk	  r   r  r  s   @r   r  r    sM    378<150707 !07 6	07
 007  607 /07 
07 07d ' '8$$L
 OS?C
$K
 $=
 
C	
:**	% 	 	  :>;?)+$)+ )+
 7)+)+ 9)+ 
)+ )+r   r  c                     ^  \ rS rSr  S           SU 4S jjjr\" S 5       S	   S
U 4S jjj5       rSS jrSS jrSr	U =r
$ )TritonTemplateBufferih  c                   > [         TU ]  UUUUUS9  U R                  c   eU R                  U R                  0U l        SU l        SU l        g)a  
NOTE:[TritonTemplates with multiple outputs]
We want the ability for TritonTemplates to output multiple tensors. Triton
kernels have no notion of outputs and this is done by creating tensors that
are then mutated by the kernel. Currently our STORE_OUTPUT codegen doesn't
support creating multinode outputs for triton templates.
We work around this by creating an extra input buffer during the lowering
and we mark them as mutated inputs.
)r	  r	  N)r  r  r   r	  subgraph_inpssubgraph_outs)r  rU  rq  r	  r	  r	  r  s         r   r  TritonTemplateBuffer.__init__i  s]    " 	)"7 	 	
 yy$$$)-DII(>%FJ9=r   c                ,  > [         TU ]  U5      nU R                  (       a  U R                  O/ nU R                  (       a  U R                  O/ nU Hz  n[	        U[
        R                  5      (       a  UR                  [        XQ5      5        M>  [	        U[        5      (       a"  UR                  UR                  U5      5        Mu  Uc  Mz   e   U H?  n[	        U[        5      (       a"  UR                  UR                  U5      5        M:  Uc  M?   e   U$ r   )
r  rv  rt	  rs	  r   r   r   updater'   r   )r  rp  resrt	  rs	  rs  r   r  s          r   rv  )TritonTemplateBuffer.get_free_symbol_uses  s     g*=9.2.@.@**b.2.@.@**b C#uzz**

+C?@C((

333MBC{"{ ! !C#v&&

333MBC{"{	 ! 
r   c                     U /U R                   Q$ r   r	  r  s    r   r   TritonTemplateBuffer.get_outputs      -t,,--r   c                &    SU R                    S3nU$ )NzTritonTemplateBuffer(layout=r"  r  )r  r   s     r   r!  TritonTemplateBuffer.__str__  s    ,T[[M;
r   )r	  rs	  rt	  r		  )rU  r  rq  rm	  r	  zCallable[_P, _T] | Noner	  rn	  r	  ro	  r   r   r  r  r  r  )r   r   r   r   r  r^   rv  r  r!  r   r  r  s   @r   rq	  rq	  h  s     378<>> !> 4	>
 0>  6> 
> >< 23$)!	! 4.. r   rq	  c                     ^  \ rS rSrSr          SU 4S jjrSS jrSS jrSS jrSS jr	SS jr
SS	 jrSS
 jrSS jrSS jrSrU =r$ )ChoiceCalleri  a  
Represents a possible choice used in autotune_process.py.
During autotuning, self.benchmark() is first called to get benchmark result,
and if this choice is selected, self.output_node() is called to get the output_node.

Children classes: TritonTemplateCaller, CUTLASSTemplateCaller.
c                   > [         TU ]  5         Xl        X0l        X l        X@l        SU l        SU l        0 U l        S U l	        S U l
        0 U l        0 U l        g r'  )r  r  r   rU  r   descriptionfailed_benchmark_with_cudagraphsr   rk  decompositiondecomposition_kwargsconfig_patches)r  r   r   rU  r	  r  s        r   r  ChoiceCaller.__init__  s`     		& '!05' ,./38<46!.0r   c                  ^^ U R                  5       mU R                  (       a  [        R                  " UU4S j5      $ [        R
                  (       a  [        UU4S j5      $ [        R                  " TTSU0S S9$ )Nc                    > T " T6 $ r   r   algor   s   r   r  (ChoiceCaller.benchmark.<locals>.<lambda>  s	    T4[r   c                    > T " T6 $ r   r   r	  s   r   r  r	    s	    D$Kr   r   r%  )to_callabler	  rX   benchmark_gpu_with_cuda_graphrF   /profile_bandwidth_with_do_bench_using_profilingrc   	benchmark)r  r   r   r	  s     `@r   r	  ChoiceCaller.benchmark  s\    !**<<=PQQAA+,?@@$$T4%dKKr   c                    [         er   r  r  s    r   	call_nameChoiceCaller.call_name  r  r   c                    [         er   r  r  s    r   r	  ChoiceCaller.to_callable  r  r   c                "    U R                  5       $ )z
Hash key for the underlying kernel. By default, we assume there are no
runtime params, so kernel hash key defaults to choice caller's hash key.
)hash_keyr  s    r   kernel_hash_keyChoiceCaller.kernel_hash_key  s    
 }}r   c                    [         er   r  r  s    r   r	  ChoiceCaller.hash_key  r  r   c                    [         er   r  r  s    r   rl  ChoiceCaller.output_node  r  r   c                    0 $ )zRInformation returned here is logged to the autotune log file when that is enabled.r   r  s    r   	info_dictChoiceCaller.info_dict  r  r   c                    g)Nunsupported_choicer   r  s    r   autoheuristic_idChoiceCaller.autoheuristic_id  s    #r   c                    SU l         g)zp
Mark the choice as failed so that it can be
removed later. Useful for when we decouple
compilation and tuning.
TN)r	  r  s    r   mark_failedChoiceCaller.mark_failed  s     r   )r	  r   r	  r	  r	  r	  r	  rk  r   rU  r   )
r   r   r   r  rU  r  r	  r   r   r   )r   r   r   r  r   rr  r  )r   r	  )r   r   )r   z6dict[str, PrimitiveInfoType | list[PrimitiveInfoType]]r  )r   r   r   r   r  r  r	  r	  r	  r	  r	  rl  r	  r	  r	  r   r  r  s   @r   r	  r	    sp    11 "1 	1
 1 
16L""""$ r   r	  c                      \ rS rSrSS jrSrg)TritonTemplateCallerBasei  c                    [         er   r  r  s    r   get_make_kernel_render/TritonTemplateCallerBase.get_make_kernel_render  r  r   r   N)r   r   )r   r   r   r   r	  r   r   r   r   r	  r	    s    "r   r	  c                     ^  \ rS rSrSr            SU 4S jjr\SS j5       r\SS j5       r S   SS jjr	\
R                  SS j5       rSS jr S   SS	 jjr    SS
 jrSrU =r$ )MultiTemplateBufferi  a3  
Represents a Buffer with multiple backing implementation choices.

Choices can be TritonTemplates or ExternKernels. During scheduling if there is a potential
epilogue we will benchmark each of the choices with the epilogue to determine an implementation.
Otherwise, the fastest base choice will be chosen.
c                   > [         TU ]  UUS US9  X0l        0 U l        X@l        X l        [        S U 5       5      U l        0 U l        g )N)rU  rq  r	  r	  c              3     #    U  H]  n[        U[        5      =(       dA    [        U[        R                  R                  R
                  5      =(       a    UR                  v   M_     g 7fr   )r   r	  r  r  select_algorithmExternKernelCallerhas_out_variant)r   choices     r   r   /MultiTemplateBuffer.__init__.<locals>.<genexpr>  sU      %
 - v78 65??#C#C#V#VW +**
 -s   A%A')	r  r  _choice_timings_fn_choice_timings_choicesoriginal_inputsr   _output_plannable_make_kernel_renders)r  rU  rq  choice_timings_fnunfiltered_choicesr	  r  s         r   r  MultiTemplateBuffer.__init__  sd     	#"7	 	 	
 #4LN,>%!$ %
 -%
 "
 <>!r   c                    U R                   $ )zN
Are all possible choices TritonTemplates or Extern Kernels with out variants
)r	  r  s    r   output_plannable$MultiTemplateBuffer.output_plannable#  s    
 %%%r   c                    U R                   $ r   )r	  r  s    r   r#  MultiTemplateBuffer.choices*  s    }}r   c                z    XR                   ;  a  U R                  U5      U R                   U'   U R                   U   $ r   )r	  r	  )r  hint_overrides     r   choice_timings"MultiTemplateBuffer.choice_timings.  s<      4 44262I2I-2XD  /##M22r   c              #  8  #    [        U[        R                  R                  R                  5      (       d   [        U5      5       eU R                  UR                  :X  d   eU R                  nUR                  5       U l         S v   X l        g ! X l        f = f7fr   )	r   r  r  r	  TritonTemplateCallerr   rU  r	  r	  )r  callerrenders      r   swap_as_triton_caller)MultiTemplateBuffer.swap_as_triton_caller5  s     EOO44II
 
 	<	 
 {{fmm+++(("("?"?"A	-&,#f#s   BBB BBBc                \   [        U[        R                  R                  R                  5      (       d   [        U5      5       eU R                  5       UR                  R                  :X  d   eU R                  5       UR                  R                  :X  d   eUR                  5       U l        g r   )r   r  r  r	  r	  r   r  rU  r  r:  r	  r	  r	  )r  r	  s     r   finalize_as_triton_caller-MultiTemplateBuffer.finalize_as_triton_callerC  s    EOO44II
 
 	<	 
 }}&--"4"4444 FMM$8$8888"("?"?"Ar   c                R    U R                  US9n[        X"R                  S9nX2U   4$ )N)r	  r  )r	  r  r  )r  r	  timings
min_choices       r   get_min_choice"MultiTemplateBuffer.get_min_choiceK  s3     %%M%Bkk2
J/00r   c                    UR                  5        H"  u  p#UR                  5       U R                  U'   M$     U R                  S   U l        g)z;Finalize with multiple callers for different hint overridesN)r  r	  r	  r	  )r  callersr	  r	  s       r   finalize_as_triton_callers.MultiTemplateBuffer.finalize_as_triton_callersR  sE     &-]]_!M7=7T7T7VD%%m4 &5 #'";";D"Ar   )r	  r	  r	  r	  r	  r	  r	  )rU  r  rq  rm	  r	  z1Callable[[int | None], dict[ChoiceCaller, float]]r	  list[ChoiceCaller]r	  r  r   r   r  )r   r	  r   )r	  r  r   zdict[ChoiceCaller, float])r	  r	  r   r	  )r	  r	  r   r   )r	  r  r   ztuple[ChoiceCaller, float])r	  z*dict[int | None, TritonTemplateCallerBase]r   r   )r   r   r   r   r  r  r  r	  r#  r	  r  r  r	  r	  r	  r	  r   r  r  s   @r   r	  r	    s    >> !> M	>
 />  /> 
>8 & &   +/3'3	"3 - -B +/1'1	#1BAB	B Br   r	  c                  \   ^  \ rS rSr              SU 4S jjrSS jrSS jrSrU =r$ )	CUTLASSTemplateBufferi]  c                J   > [         TU ]  XU5        X@l        XPl        X`l        g r   )r  r  workspace_sizetemplatesupports_epilogue_fusion)r  rU  rq  r	  r	  r	  r	  r  s          r   r  CUTLASSTemplateBuffer.__init__^  s&     	);<, (@%r   c                8    U R                   b  U R                   $ S$ r  r	  r  s    r   r  (CUTLASSTemplateBuffer.get_workspace_sizem  s    &*&9&9&Et""L1Lr   c                ~    U R                  5        H)  n[        R                  " UR                  5       S S 5        M+     g r   )r  rq   r  rz  )r  rc  s     r   emulate_store_fn&CUTLASSTemplateBuffer.emulate_store_fnp  s,    &&(FIIfoo't4 )r   )r	  r	  r	  )rU  r  rq  rm	  r	  Callable[_P, _T]r	  r   r	  rw   r	  r   r   r   r  r  )	r   r   r   r   r  r  r	  r   r  r  s   @r   r	  r	  ]  sd    AA !A -	A
 A "A #'A 
AM5 5r   r	  c                  T   ^  \ rS rSr            SU 4S jjrSU 4S jjrSrU =r$ )CppTemplateBufferiu  c                L   > [         TU ]  XU5        X@l        XPl        S U l        g r   )r  r  r	  r	  outputs)r  rU  rq  r	  r	  r	  r  s         r   r  CppTemplateBuffer.__init__v  s&     	);< ,0r   c                  > [        U R                  [        5      (       a  [        U R                  [        5      (       d   [        U R                  5      5       eU R                  S   n[        U[        5      (       d   [        U5      5       eUR                  n[        U[        5      (       d   [        U5      5       eU$ [        TU ]%  5       $ r  )
r   rU  r$	  r	  r   r   ry  r  r  r  )r  first_outputrU  r  s      r   r  CppTemplateBuffer.get_layout  s    dkk#455dllH55ItDLL7II5<<?LlF33GT,5GG3!((Fff--;tF|;-M7%''r   )r	  r	  r	  )rU  r  rq  rm	  r	  r	  r	  rw   r	  r   r   r   r  )r   r   r   r   r  r  r   r  r  s   @r   r	  r	  u  sL    11 !1 -	1
 "1 1 
1
( 
(r   r	  c                  V   ^  \ rS rSrSr S           SU 4S jjjrSS jrSrU =r$ )	CuteDSLTemplateBufferi  z
Buffer for CuteDSL (CUTLASS Python DSL) template kernels.
Similar to other template buffers but specialized for CuteDSL operations.
c                  > [         TU ]  XU5        X@l        XPl        U /U l        Ub  [        U R                  S   [        5      (       d   [        U R                  S   5      5       eU R                  S   R                  5       nU =R                  U Vs/ s H  n[        [        US9Xp5      PM     sn-  sl        g g s  snf )Nr   r%  )r  r  r	  r	  r	  r   rq  r   r   r  r	  r  )	r  rU  rq  r	  r	  r	  r  r  r  s	           r   r  CuteDSLTemplateBuffer.__init__  s     	);< ,&*V%dkk!nf55KtDKKN7KK5[[^..0FLL))C z8#D) L &s   B>c                    U R                   $ r   r	  r  s    r   r  !CuteDSLTemplateBuffer.get_outputs      ||r   )r	  r	  r	  r   )rU  r  rq  rm	  r	  r	  r	  r   r	  rn	  r   r   r  )	r   r   r   r   r  r  r  r   r  r  s   @r   r	  r	    s[     37 ! -	
  0 
 * r   r	  c                     ^  \ rS rSrSr     S                     S	U 4S jjjrS
S jrSS jr S     SS jjrSr	U =r
$ )NVUniversalGemmBufferi  z
Buffer for NVIDIA Universal GEMM kernels.

Unlike CuteDSL templates which use Jinja templates, this generates
simpler Python code that directly calls the cutlass_api library.
c                  > [         TU ]  XS S9  X0l        X@l        U /U l        X`l        XPl        Xpl        Xl        Xl	        Xl
        UR                  R                  UR                  R                  S.U l        U R                  U l        g )N)r	  )kernel_namemin_cc)r  r  kernelaccumulator_typer	  r	  variantscale_type_ascale_type_bswizzle_type_aswizzle_type_bmetadatar 
  r
  kernel_metadata_make_kernel_renderr	  )r  rU  rq  r
  r
  r
  r	  r
  r
  r
  r
  r  s              r   r  NVUniversalGemmBuffer.__init__  s     	DA 0&*V,((,, "??66oo,, 
 #'":":r   c                    U R                   $ )z#Return the workspace size in bytes.r	  r  s    r   r  (NVUniversalGemmBuffer.get_workspace_size  s    """r   c                    U R                   $ r   r	  r  s    r   r  !NVUniversalGemmBuffer.get_outputs  r	  r   c                  ^	 SSK Jn  SSKJn  / nU R                   HV  n[        U[        5      (       a  UR                  n[        U[        5      (       a  UR                  nUR                  U5        MX     [        UR                  5      nU" UUUU R                  U R                  U R                  U R                  U R                   U R"                  U R$                  U R&                  S9m	U	4S jnT	U4$ )z
Create a kernel renderer for code generation.

Returns (kernel, render) tuple where:
- kernel: NVUniversalGemmKernel object with call_kernel() method
- render: function that returns source code string
r   )NVUniversalGemmKernel)Placeholder)r 
  r   rl  r

  r
  r	  r
  r
  r
  r
  r
  c                 $   > T R                  5       $ r   )r	  )render_kernels   r   r	  9NVUniversalGemmBuffer._make_kernel_render.<locals>.render  s     ''))r   )Btorch._inductor.codegen.nv_universal_gemm.nv_universal_gemm_kernelr
  torch._inductor.utilsr
  rq  r   r   rT  rx  r  r   KERNEL_NAMEr

  r
  r	  r
  r
  r
  r
  r
  )
r  out_noder	  r
  r
  r   rs  r 
  r	  r
  s
            @r   r
  )NVUniversalGemmBuffer._make_kernel_render  s    	
 	6!#;;C#y))hh#z**hhs#  +112-##  00!22..LL****....
	* f$$r   )r
  r
  r

  r	  r	  r
  r
  r
  r
  r
  r	  )r   NNNN)rU  r  rq  rm	  r
  r   r
  r   r
  r   r	  r   r
  
Any | Noner
  r
  r
  r
  r
  r
  r   r   r  r  r   )r
  r   r	  r  r   ztuple[Any, Any])r   r   r   r   r  r  r  r  r
  r   r  r  s   @r   r	  r	    s      #'#'%)%);; !; 	;
 ; ; ; !; !; #; #; 
; ;B# :>*%*%,6*%	*% *%r   r	  c                &    [        S U  5       5      $ )Nc              3  B   #    U  H  n[        U[        5      v   M     g 7fr   r   r   )r   r9  s     r   r   #is_node_sequence.<locals>.<genexpr>  s     4ez!V$$er  )r   )r   s    r   is_node_sequencer!
    s     4e444r   c                      \ rS rSr% S\S'   SS jrSS jrSS jr\SS j5       r	\
    SS j5       rSS	 jrSS
 jr\" S 5       S   SS jj5       rSrg)r  i  #Sequence[IRNode | Sequence[IRNode]]rq  c                n    U R                   U   n[        U[        5      (       d   eUR                  5       $ r   rq  r   r   rz  )r  r   inputs      r   
input_nameInputsKernel.input_name  s/    A%((((~~r   c                  ^ [         [        R                     " 5       n[        R                  mU R                   Hq  n[        U[        5      (       a  UR                  U4S jU 5       5        M5  [        U[        5      (       a  ML  UR                  T" UR                  5       5      5        Ms     [         [        R                     " U4S jU R                  5        5       5      n[        R                  " UU[        5       S9$ )Nc              3  P   >#    U  H  nT" UR                  5       5      v   M     g 7fr   rH  )r   r   r  s     r   r   /InputsKernel.get_read_writes.<locals>.<genexpr>   s     BEqWQZZ\22E   #&c              3  P   >#    U  H  nT" UR                  5       5      v   M     g 7fr   rH  )r   r  r  s     r   r   r+
  '  s#      .
/AGCLLN##/Ar,
  r  )r>   rG   rM   r  rq  r   r   rw	  r   r  rz  r  r  )r  rk  r&
  r  r  s       @r   rg  InputsKernel.get_read_writes  s    <++,.&&[[E%**BEBBE#899		'%.."234 ! L,,- .
/3/?/?/A.
 
 &&"
 	
r   c                6    U R                  5       R                  $ r   rj  r  s    r   r  InputsKernel.get_reads1  rm  r   c                   [        U[        5      (       a  UR                  n[        U[        5      (       a  UR                  n[        U[        5      (       a*  [        U[
        5      (       d  [        R                  U5      n[        U[        5      (       a  U R                  U5      $ [        U[        5      (       a  U$ [        U[        [
        45      (       d   [        U5      5       eU$ r   )r   r   rT  rx  rv  rZ  r  rS	  unwrap_storage_for_inputTorchBindObjectry  r   r1  r   s     r   r2
  %InputsKernel.unwrap_storage_for_input4  s    a##Aa$$Aa"":a+I+I**1-Aa##
 //22a))H!fo677@a@7r   c                    / nU  Hd  n[        U[        5      (       a&  U Vs/ s H  n[        R                  U5      PM     nnO[        R                  U5      nUR	                  U5        Mf     U$ s  snf r   )r   r   r  r2
  r  )rq  
inputs_newr   r   s       r   r	  InputsKernel.unwrap_storageG  sm     79
A!X&&GHIq!\::1=qI 99!<a   	 Js   A/c                    gr  r   r  s    r   r  InputsKernel.is_externT  r  r   c                    gr6  r   r  s    r   ro  InputsKernel.num_readsW  r  r   c                    [         [        R                     " 5       nU R                   HI  n[	        U[
        5      (       a  X#R                  U5      -  nM-  U H  nX$R                  U5      -  nM     MK     U$ r   )r>   r   r!   rq  r   r   rv  )r  rp  r3  rs  	inner_inps        r   rv  !InputsKernel.get_free_symbol_usesZ  sg     u||$&;;C#v&&--m<<!$I77FFA "%	  r   r   N)r   r   r   r   r  r  rO  )rq  r#
  r   zlist[IRNode | Sequence[IRNode]]r  r  r  r  )r   r   r   r   r   r'
  rg  r  rl  r2
  r  r	  r  ro  r^   rv  r   r   r   r   r  r    s    // 

,,  $ 
3
	(
 
 N+$)
!
	!
 ,
r   r  c                  (    \ rS rSrSS jrSS jrSrg)	NopKernelih  c                    gr  r   r  s    r   r  NopKernel.is_no_opi  r  r   c                    [        5       $ r   r=   r  s    r   r  NopKernel.get_readsl  r  r   r   Nr  r  )r   r   r   r   r  r  r   r   r   r   rA
  rA
  h  s    r   rA
  c                      \ rS rSrSr\S
S j5       r\ S     SS jj5       r\" S 5       S   SS jj5       r	\SS j5       r
SS jrS	rg)ConcatKernelip  zb
There isn't actually a real kernel for concat, we just change the
storage for the upstream data.
c                
   US   R                  5       nUS   R                  5       n[        US   R                  5       5      nS/nXR   /nSUs=::  a  [	        U5      :  d   e   e[        S[	        U5      5       H  nX   R                  5       n	UR                  XR   5        [	        U	5      [	        U5      :X  d   eX   R                  5       U:X  d   eX   R                  5       U:X  d   e[        [	        U5      5       HE  n
X:X  a  XZ   X   -   XZ'   M  [        R                  R                  R                  XZ   X   5      XZ'   MG     UR                  XR   5        M     [        R                  U5      n[        R                  (       a#  [        R!                  XUS   R"                  5      n[        [	        U5      5       H|  nX   n[%        U5      (       d  M  UR'                  5       n[)        U[*        5      (       d  M@  [        R-                  UR.                  UR0                  5      (       d  Mq  [3        U5      n  O   [5        S U 5       5      n[        R                  R6                  R8                  S   nUSL a7  [)        U[        5      (       a"  [5        S U 5       5      (       a  [3        U5      n[;        S U 5       5      nUc   e[=        S[+        UUUUUS9/ S	9n[?        U5      n/ n[A        U5       GH  u  nn[)        U[B        [D        45      (       d   [G        U5      5       eU RI                  U[J        RM                  UX&U   Xx   SS
95      n[)        U[N        5      (       d   [G        U5      5       e[)        URP                  [        5      (       d   [G        URP                  5      5       eURP                  R                  U5        [)        URR                  [B        5      (       a  URR                  RU                  5       nOURR                  n[)        U[>        5      (       d  GM4  URW                  5       (       d  GML  UR                  5       =nc  GMb  [Y        URF                  5      (       d  GM  [[        U5      (       a  GM  UR                  UR]                  5       5        GM     [	        U5      S:  aR  [        R                  R_                  U[`        Rb                  5      (       a  [        R                  Re                  U5        [        R                  Rg                  U5      Ul4        U Rk                  URP                  5      Ul(        [        R                  Rm                  U5        U$ )z&
Create the concat kernel from inputs
r   rE   c              3  8   #    U  H  n[        U5      v   M     g 7fr   )r  r  s     r   r   &ConcatKernel.create.<locals>.<genexpr>  s     -WPV1.CA.F.FPVr  Fc              3    #    U  Hv  nS UR                   ;   =(       a[    UR                   S    R                  [        R                  S9=(       d*    UR                   S    R                  [        R                  S9v   Mx     g7f)rY  r  N)ri  r  r  r  r  r   args     r   r   rJ
    sx       (C !  HHUO11@S@S1T xx44&+&<&< 5  (s   A>B c              3  z   #    U  H1  n[        U5      =(       a    UR                  5       R                  v   M3     g 7fr   )r  r  rY  r  s     r   r   rJ
    s-      
KQa!!$A)A)AA6s   9;N)r  r
  r  r	  rY  r   rU  rq  r  )7r  r  r   r  r   r   r  rs   r  r  r  r  r  rF   ru  r  rp  r
  r  r  r   rW  rC  r  r	  r/   r  current_noder   r   rG
  rx  r   rv  r  r   r  r  r  ry  rq  rT  rw  r>  rj   ri   r  r  rH   FOREACHregister_operation_listr	  r   r	  r	  )r1  rq  rH  r  r
  r  offsets_startoffsets_endr   
input_sizer<  output_strider   rU  any_input_is_storage_and_layoutfx_node_argsrY  concat_kernelr
  op_namesrs  r  input_unwrappeddevs                           r   r  ConcatKernel.createv  s   
 %%'q	##%q	**,-}oC'#h-'''''q#f+&A++-J  /z?c(m3339&&(E1119'')V3333x=)8"*+
"=HK"#''"2"2"L"L Z]#HK	 * x}- ' (6'H'H'R''"//M
 s6{#A	A$Q''K 88fmmTT$B8$LM $ +.-WPV-W*W'ww++003 ,u4<..  (   ;8DM 
KQ
 
	 !!!$$# 

 M*'FAscHj#9::EDIE:++  Cq!1;> ! L lF33GT,5GG3m22D99U4@T@T;UU9  ''5#((H--"%(("6"6"8"%(( ?J77#3355NN,,S9388$$"<00 ? ? AB1 (4 x=1!4!4V^=S=S!T!TGG++H5WW44]C"11-2F2FG	""=1r   Nc                2   [        U[        5      (       a  U R                  UR                  U5      $ [        U[        [
        45      (       d   [        U5      5       e[        UR                  [        5      (       a  [        UR                  R                  [        5      (       a  UR                  R                  (       d  gUc  g[        UR                  5       5      [        UR                  5       5      :w  a  g[        S [        UR                  5       UR                  5       5       5       5      $ [        UR                  S5      =(       aJ    [        UR                  R                  [         5      =(       a    [        UR                  ["        5      (       + $ )NFTc              3  x   #    U  H0  u  p[         R                  R                  R                  X5      v   M2     g 7fr   rN  rP  s      r   r   =ConcatKernel.can_realize_into_without_copy.<locals>.<genexpr>  s1      EFB   88@@ErR  rU  )r   r   can_realize_into_without_copyrT  rv  rx  r   r	  rU  rW  r	  r   r:  r   r   r  r  ExternKernelAlloc)r1  r  r  s      r   rc
  *ConcatKernel.can_realize_into_without_copy  s!    c9%%44SXXsCC#*566AS	A6chh 344sxx<<xx00 { 3>>#$CNN,<(== !#.."2CNN4DE   CHHh' <388??N;<sxx):;;	
r   c                ,    [         R                  X5      $ r   )rA
  rv  ru  s     r   rv  !ConcatKernel.get_free_symbol_uses  s     --dBBr   c                   [        U[        5      (       d&  [        U5      (       a  [        U5      u  p4[        X4S9n[        U[        5      (       d   [	        U5      5       e[        U[
        5      (       a  U R                  UR                  U5      $ [        U[        5      (       ai  UR                  5         [        UR                  S5      (       d   eU R                  X5      (       a&  [        U5      UR                  l        UR                  $ [        R                  UR!                  5       UR#                  5       UR%                  5       ['        UR)                  5       UR)                  5       5       VVs/ s H.  u  pV[*        R,                  R.                  R1                  XV5      PM0     snnS9nU R                  Xr5      $ s  snnf )NrS  rU  r  )r   rZ  r  rV  r   r   r  rT  rx  r  r  rc
  r  rU  ru  r  r  r  r-  r   r  rs   r  r  r  )r1  r  r  r]  rU  r  r  pws           r   r  ConcatKernel.realize_into  sZ   
 #//$S))"7"<%7B#//:c:/c9%%##CHHc22c:&&KKM388X....00::"1#"6xx>>#--/__&  ??DA   ::1@?	  
 ((s   75Gc                    gr  r   r  s    r   rZ  ConcatKernel.should_allocate:  r  r   r   )rq  rm	  rH  r   r   rx  r   )r  r   r  r  r   r   r  r  )r  r   r  r   r   r   r  )r   r   r   r   r  rl  r  rc
  r^   rv  r  rZ  r   r   r   r   rG
  rG
  p  s    
 u un /3!
!
,!
	!
 !
F N+$)C!C	!C ,C
 ) )Br   rG
  c                  $  ^  \ rS rSr% SrSrS\S'   \R                  " \	S9r
S\S'   S	rS
\S'   S	rS\S'   S	rS\S'   \R                  " \S9rS\S'   S	rS\S'   S	rS\S'   \R                  " \	S9rS\S'   S	rS\S'   \R                  " \	S9rS\S'   \R                  " \S9rS\S'          SC                     SDU 4S jjjrSES jrSFS jrSGS  jrSGS! jr SH     SIS" jjrSJS# jrSHSKS$ jjrSLS% jrSMS& jrSNS' jr \!SOS( j5       r"\#        SPS) j5       r$\#SQS* j5       r%\#SRS+ j5       r&\#SRS, j5       r'\#   SS         STS- jj5       r(\# SU       SVS. jj5       r)\# SU       SWS/ jj5       r*\#SRS0 j5       r+\#SRS1 j5       r,\#SRS2 j5       r-\#SRS3 j5       r.SGS4 jr/      SXS5 jr0SHSYS6 jjr1SZS7 jr2S[S8 jr3SUS\S9 jjr4SNS: jr5SJS; jr6SJS< jr7SJS= jr8S]S> jr9S^S? jr:\;" S 5       SU   S_S@ jj5       r<SNSA jr=\=r>SBr?U =r@$ )`r  i>  z
A class that represents Kernels which are not directly lowered to Inductor
Loop Level IR, such as custom operators, or aten operators which we fallback to.
r   r0	  constant_argsr  r  r   NReinterpretView | Noneoutput_viewr  python_kernel_namecpp_kernel_nameIterable[str]ordered_kwargs_for_cpp_kernel_OpOverloads | Noneop_overloadzlist[dict[str, Any]] | Nonearg_propertieszdict[str, dict[str, Any]]allarg_propertiesz dict[str, dict[str, Any]] | Nonekwarg_propertiesz"dict[sympy.Symbol, pytree.KeyPath]unbacked_bindingszlist[MutationOutput]r	  c                @  > [         TU ]  UUUS9  X@l        U(       a  UO0 U l        X`l        Xl        U R                  U5        U R                  U5        Xl        U R                  5         0 U l
        / U l        [        R                  R                  U l        0 U l        g NrP
  )r  r  rn
  r   rp
  rv
  set_cpp_kernel_nameset_python_kernel_namert
  collect_arg_kwarg_propertiesrz
  r	  rs   r  rR
  fx_noder   )r  r   rU  rq  rn
  r   rp
  rq
  rr
  rt
  rv
  r  s              r   r  ExternKernel.__init__Z  s     	 	 	

 + &fB&&  1##$67-J*))+!# "ww+++-r   c                     U /U R                   Q$ r   r{	  r  s    r   r  ExternKernel.get_outputsz  r}	  r   c                    [        5       $ r   r=   r  s    r   r  %ExternKernel.get_unbacked_symbol_defs}  r  r   c                   [        U R                  [        R                  R                  5      (       af  U R                  R
                  R                   Vs/ s H:  nUR                  (       a  M  UR                  UR                  UR                  S.PM<     snO.[        [        U R                  5      5       Vs/ s H  n0 PM     snU l        [        U R                  [        R                  R                  5      (       aS  U R                  R
                  R                   Vs0 s H'  nUR                  UR                  UR                  S._M)     snO0 U l        [        U R                  [        R                  R                  5      (       a  U R                   (       dR  U R                  R
                  R                   Vs/ s H!  oR                  (       d  M  UR                  PM#     snU l        U R                  R
                  R                   Vs/ s H  oR                  (       d  M  UPM     snU l        g / U l        g s  snf s  snf s  snf s  snf s  snf )N)r   r   r  )r   r  )r   rv
  r  _ops
OpOverload_schema	arguments
kwarg_onlyr   	real_typer  r   r   rq  rw
  rx
  rt
  schema_kwargs)r  r   r   s      r   r
  )ExternKernel.collect_arg_kwarg_properties  s    $**EJJ,A,ABB ))11;; <A||FFKK%&__
 < $C$4565"56 	$ $**EJJ,A,ABB ))11;;;A qOO;
  	 d&&

(=(=>>55$($4$4$<$<$F$F6$Fq,,FAFF$F62  ++33=="=a="D "$D? 76"s0   I/(I:I#.IIII+Ic                    [        U R                  [        5      (       a!  U R                  5         U R	                  5         g g r   )r   rU  r  apply_constraintrQ  r  s    r   r  ExternKernel.decide_layout  s0    dkk>22!!#  3r   c                    [        X5      u  p4U(       a  UR                  U5        U(       d  U R                  5       nU(       a  SSKJn  U" XSS9nUR                  X&5        g g )NrE   )'set_kernel_post_grad_provenance_tracingT)r  )rf   make_commenttry_get_kernel_namer  r
  write_provenance_debug_handle)r  wrapperr 
  
origin_str_detailed_origin_strr
  debug_handles          r   codegen_commentExternKernel.codegen_comment  s]     ,?t+M(
  ,224KFBTL 11+L r   c                    [         er   r  r  r
  s     r   codegenExternKernel.codegen  r  r   c                r   Xl         [        R                  R                  (       a3  [	        U R
                  [        R                  R                  5      (       d  g U R
                  nU R                   c  UR                  S:X  a  UR                  S:X  a  UR                  R                  S5      S   OUR                  R                  SS5      nSSKJn  UR!                  SUR                   30 5      n[#        S U 5       S	S
9nUS	:  a  U SU 3nSU S3U l         g UR$                  R&                  U l         g g )Natenr  .r   r   inductor_fallback_opszaten.c              3  l   #    U  H*  oR                  S 5      (       d  M  [        USS 5      v   M,     g7f)r  rE   N)
startswithr   )r   r  s     r   r   3ExternKernel.set_cpp_kernel_name.<locals>.<genexpr>  s'     KAc9JZS12ZZs   44rE   r  _vz
at::_ops::z::call)rr
  rs   r  cpp_wrapperr   rv
  r  r
  r
  	namespace_overloadnamer   r  replacetorchgen.aoti.fallback_opsr
  r  rM  r
  r   )r  rr
  r
  opnamer
  version_infolatest_versions          r   r}
   ExternKernel.set_cpp_kernel_name  s   .ww""*ejj33+
 +
 !!'6) ++y8 OO))#.q100c:  M4885@Q9RTVW!$KK" "A% &xr.)9:F)3F86'B$'-~~':':$7 (r   c                   Xl         Ub  g U R                  nUc  g [        U[        R                  R
                  5      (       a  SUR                   3U l         g UR                  R                  SS5       SUR                   3U l         g )Nztorch.ops.higher_order.._ops..ops.r
  )	rq
  rv
  r   r  r
  HigherOrderOperatorr   r   r
  )r  rq
  r
  s      r   r~
  #ExternKernel.set_python_kernel_name  s    "4)!!>

 > >??(??P&QD# $$,,Xw?@&//ARS #r   c                Z   SSK Jn  U R                  5       =n(       a  UR                  O[        R
                  R                  n[        R
                  R                  (       a  U R                  $ [        R
                  R                  (       a  [        [        R
                  R                  U5      (       d(   [        [        R
                  R                  5      5       eU R                  c  g [        R
                  R                  R                  U R                  U5      $ U R                  $ )NrE   )CppWrapperCpu)codegen.cpp_wrapper_cpur
  r  r   rs   r  device_type
fx_wrapperrq
  r
  r   r  rr
  get_c_shim_func_name)r  r
  dr  s       r   r
   ExternKernel.try_get_kernel_name  s    :!%!22A29L9L77***WW  agg22MBB D$$E B ##+77''<<$$f  ***r   c                0    U R                  5       nUc   eU$ r   )r
  r  s     r   get_kernel_nameExternKernel.get_kernel_name  s!    '')r   c           	         [         R                  U R                  5       U R                  5       U R	                  5       U R                  5       U R                  5       U R                  5       S9nUR                  5         U$ )N)r  r
  r
  r  r  r  )	ru  r  r  r  r-  r  r  r  r  )r   ri
  s     r   
copy_inputExternKernel.copy_input	  sa    <<>++-]]_::<))+oo'  
 	

	r   c                  ^^ X#S.n[         R                  " U5      u  nm/ m/ n/ n/ nU GH  n	U	=[        S` =bj  u     [        R                  R
                  R                  R                  U	SS9n
TR                  S5        UR                  U
5        UR                  U
5        Mz    =[        S` =b  u     TR                  S5        UR                  U	5        U	R                  R                  nU	R                  R                  S:X  a  Uc   eUR                  [        R                  R                  U   R!                  5       5        GM    =["        S` =bC  u     TR                  S5        UR                  U	5        UR                  U	R$                  5        GMl    [&        S` =b'  u   TR                  S5        UR                  U	5        GM     TR                  S5        UR                  U	5        UR                  U	5        GM           SUU4S jjnU Vs/ s H  oR)                  U5      PM     nnU H  n[+        U5      (       d  M  [-        USS	9  M!     / nU GH'  n[/        U[0        5      (       dh  UR3                  5       [        R                  R4                  ;   a<  UR                  [        R                  R4                  UR3                  5          5        M  [/        U[0        5      (       dh  UR3                  5       [        R                  R6                  ;   a<  UR                  [        R                  R6                  UR3                  5          5        M  [/        U[8        5      (       a"  UR                  UR;                  5       5        GM5  [/        U[<        5      (       a  UR                  UR>                  5        GMh  [/        U[        R@                  RB                  R                  5      (       ar  UR                  R                  nUR                  R                  S:X  a  Uc   eUR                  [        R                  R                  U   R!                  5       5        GM  UR                  [E        U5      5        GM*     U" X5      u  nnU" U0 UD6nSn[        RF                  R                  =n(       a  [        RH                  RJ                  RM                  S
5      n[O        5       n[        RH                  RP                  [        RR                  RT                  RV                  L a  US   n[Y        [        RH                  5      nU   [[        U[        RH                  U5        SSS5        []        UUU5      n[/        U[^        [`        45      (       d  U/OUnU H  n[/        U[        Rb                  5      (       d  M$  URd                  (       d  M7  [f        Rh                  (       a  MN  Sn[        R                  RH                  RJ                  RM                  SS5      =n(       a  U SU 3nU[        R                  l5        M     UUUUU4$ s  snf ! , (       d  f       N= f)a]  Partition kernel args into tensor and non-tensor, realize tensor inputs,
re-run fake tensor propagation with the realized strides, and return
(example_output, tensor_args, non_tensor_args, unflatten_args, unbacked_bindings).

unflatten_args(new_tensor_args, new_non_tensor_args) reconstructs the
original (args, kwargs) tree from replacement lists.
)r   r   r   N)r|  Fr'  Tc                6  > / n[        U 5      n[        U5      nT H@  nU(       a  UR                  [        U5      5        M&  UR                  [        U5      5        MB     [        R                  " UT5      nUR                  S/ 5      UR                  S0 5      4$ )Nr   r   )rm  r  rl  pytreetree_unflattenr  )	new_tensor_argsnew_non_tensor_argsr  
it_tensorsit_non_tensors	is_tensorr3  args_flat_is_tensor	args_specs	          r   unflatten_args3ExternKernel.process_kernel.<locals>.unflatten_argsU  s     Fo.J!"56N0	MM$z"23MM$~"67	 1
 %%fi8A55$aeeHb&999r   r  rY  rE   zEsparsity not handled. Please file issue for sparse inference weights.r  z Found from : 
 )r
  r   r
  r   r   ztuple[list[_T], dict[str, _T]])6r
  tree_flattenr   rs   r  r  r   create_symintnoder  GeneratorStater  r   r   r  r'  default_generatorsclone_stateOpaqueObjectStater  r   rS	  r  rV  r   rv  rz  	constantstorchbind_constantsr3
  	get_valuer   opaque_example_valuer  irr  r9  rR
  ri  r  r
   r	  _higher_order_opseffectswith_effectsr1   r8   r2   r   r   r[	  	is_sparserF   graph_partitiondisable_cudagraphs_reason)r1  r
  r   r   binded_args	args_flattensor_argsnon_tensor_argsreal_non_tensor_argsrN
  r   device_indexr
  r   example_argsnew_args
new_kwargsexample_outputrz
  r   node_meta_valctxexample_out_lir  msgr  r
  r
  s                             @@r   process_kernelExternKernel.process_kernel  s   "  $6%22;?	9*,$&(*  	 CTV77++55GGRVGWD'..u5#**40(//5	  &^%'..u5#**3/#&::#3#3L::??f49QQQ(//

55lCOOQ & )&('..u5#**3/(//		: )
 X'..t4&&s+  '..u5#**3/(//4; >	:)	:@L	:+	: 	: 6AA[((+[A A$Q''%a5   	 A a**qzz|qww?P?P/P##AGG$5$5ajjl$CDq(++JJLAGG$?$??##AGG$?$?

$MNA//##AKKM2A011##A$:$:;Au11@@AA xx~~xx}}.<3KKK##JJ11,?KKM ##$5a$89+ .  .lQ*8Z8GK---9-NN//33E:M0;C~~$$(?(?(G(G(T(TT -a 0<Q^^L	1>>>J  9>=! ntUm<<  	  A1ell++KKK...]"#''"6"6";";"?"?t"TT;T E!2;-@C471   
 	
Y Bj s   YY
Y&c                H   [        U[        5      (       d   [        U5      5       e[        U[        5      (       a  U$ UR	                  5       n[
        R                  R                  UR                  5       5      nUc   eUR                  5       nUb  SUR                  ;   a  [        U[        [        [        45      (       a  [        UR                  [        5      (       a}  [        UR                  S   [         R"                  S9(       d*  [        UR                  S   [         R$                  S9(       a)  UR'                  [)        UR+                  5       5      5        OUR-                  5         [.        R0                  " UR+                  5       SS9u  pVUS   nUR3                  5       " U5      n[
        R                  R4                  R7                  X5      n[
        R                  R4                  R9                  X5      n	[
        R                  R4                  R;                  X5      n
[=        Xy5      U
-   nX:w  a  [>        RA                  SU	U
U5        [B        e[        URD                  [G        URI                  5       URK                  5       UR+                  5       U	U
SS9S	9$ )
z
In order to pass this to an extern kernel we need a
ReinterpretView not a View.  This allows us to avoid some
unneeded copies.
rY  rL
  r3  r   r   z@convert_to_reinterpret_view failed: stride=%s offset=%s index=%sFr  rS  )&r   rv  r   rZ  rw  rs   r  r  rz  r  ri  ry  r  rU  r  r-   r  r  r  r_  r/   r  rQ  rG   r(  r1  r  r)  stride_vars
offset_varrk   r'  r  r  rT  rW  r$  r  )r1  r   x_unwrap_viewr  x_unwrap_view_fx_node
index_argsr  r  r   r\  rX  expecteds               r   convert_to_reinterpret_view(ExternKernel.convert_to_reinterpret_view  s"    !X&&/Q/&a))H gg  !7!7!9: # 3 3 5 "-.333=?FJ*OPP=//@@8)..u5"'"5"5 <)..u5"'"8"8 77.}/E/E/GH '')!-!@!@JJL"

  ]
 ,  55eH''""..uA!!,,U?Z1F:IIR	 &%,,.kkmZZ\

 
	
r   c           	     j   Uc
  [        5       $ [        U[        [        R                  R
                  R                  [        45      (       a	  [        US9$ [        U[        5      (       am  [        5          [        R                  R                  [        R                  " UR                   UR#                  5       UR%                  5       S95      sS S S 5        $ [        U[&        5      (       a  U$ [        U[(        5      (       a  U R+                  UR,                  5      $ [        U[.        5      (       a1  [/        U R+                  UR,                  5      UR1                  5       S9$ [        U[2        5      (       a@  UR5                  5         [7        UR9                  5       5      (       a   U R;                  U5      $ [        U[>        5      (       a  UR5                  5         U$ [        U[@        [        [B        45      (       a  U$ U RE                  U5      $ ! , (       d  f       GNK= f! [<         a     Nxf = f)N)rT  )r
  r  rS  )#ru  r   r   r   r   r   r   r   r   r  r?   rs   r  add_tensor_constantr  r[  r  r  r  r  r   rS	  rT  rZ  r  rv  r  r  rw  r
  r  rx  NonTensorObjr   r
  r4
  s     r   rS	  ExternKernel.realize_input  s   9'))a$ 3 3 ; ;SABB(a00a"" ()ww22LLallnU *) a((Ha##$$QVV,,a))"&&qvv.q||~  a""IIK$Q]]_55::1== a$$IIKHa,(=?PQRRH~~a  3 *)" + s   4AH(H% 
H"%
H21H2c                    [        U5      (       a@  [        UR                  5       5      S:X  a  U$ UR                  5        H  nUS:X  d  M  Us  $    U R                  U5      $ rT  )r  r   r:  r
  )r1  r   r	  s      r   require_stride1ExternKernel.require_stride1#  sR     ##1<<>"a',,.Q;H ) ~~a  r   c                
   Uc  Uc   eUR                  5       S;   a	  U(       d  U$ [        U5      (       Ga/  [        UR                  5       [        5      (       a  U(       a  [        X5      =(       a(    [        UR                  5       R                  5      (       + n[        USSU(       aJ  [        [        R                  R                  R                  UR                  5       R                  5      5      OUUS9  U$ [        USSS UUS9  U$ [        UR                  5       [        [        45      (       ay  U(       a$  UR                  5       R!                  U5      (       d>  U(       aG  [#        X1R                  5       R                  UR%                  5       5      (       a  Ub  ['        X5      $ U$ [        UR                  5       =n[(        5      (       a  [        UR+                  5       =n[        5      (       a  [-        S5      e[        U[        5      (       aO  U(       a  UR!                  U5      (       d0  U(       a+  [#        X7R                  UR%                  5       5      (       a  U$ [        U[.        5      (       ak  U(       a$  UR                  5       R!                  U5      (       d>  U(       a9  [#        X1R                  5       R                  UR%                  5       5      (       a  U$ [        U[0        5      (       a  [        UR2                  [4        5      (       a  [        UR2                  [6        5      (       d  [        UR9                  5       =n5      (       a  [;        US5      (       ao  [        UR2                  [<        5      (       dP   U R?                  UR2                  5      Ul        U(       a  U RA                  XUS9$ U(       a  U RC                  XUS9$  S n	UR%                  5       n
Ub  [        R                  R                  n[G        [I        UR%                  5       5      5       Vs/ s HJ  nURK                  X<   S	5      (       d  M  URM                  UR%                  5       U   S
5      (       d  MH  UPML     n	nU	 H.  n[N        RP                  RR                  RU                  XS	S5      nM0     U RW                  U5      n[        USSUUUS9  U(       a  [        X5      (       d   e U$ U	(       a<  U
b  Uc   e[N        RP                  RR                  RY                  X5      n['        X5      $ U$ ! [D         a     GNgf = fs  snf )N)r   rE   TF)r  r  r  rU  r  zHthe MutationLayoutSHOULDREMOVE's real layout shouldn't be FlexibleLayoutrT  r  r   r   rE   )-r  r  r   r  r  r  r4   r	  rV  r  rs   r  r  r  rW  r  r  rK  r  ra  r  r  r(  r  r   rT  rv  rZ  rw  r  rd
  r
  require_stride_orderrequire_exact_stridesr  r   r   rO  r  r  r  loweringslice_r
  rL  )r1  r   r   rc  rU  use_current_stride_ordermutation_layoutr  rw  expanded_dims	orig_sizer  r   rH  s                 r   require_stridesExternKernel.require_strides-  s     M$===;;=F"=H !##!,,..99 0R0 0K3ALLN4I4IJJ - *#(-  8 - ! 0 0 H H$%LLN$9$9!" "'&3 H *#(-%)&3&3 HALLN[/,JKK1<<>;;EBB!1%||~'<'<ajjl  %0 4AE 
 $%LLN25O  $3$?$?$AA[N  )b   [99{<<UCC%5)+=+=qzz| 
 H a%%q||~77>>-!<<>#8#8!**, 
 Hq)$$1668,,qvv77%Q]]_&DkEEV,,{//1BCC88@33 4   #44 5   # +/JJL	$ww''H s1::<011A33M4DaH  11!**,q/1E 1   %OO,,33AAqA %
 NN1!''	
 5a????  (]-FFF((//=A21DDW ' s*   
6T3 T3 ,U&U7U3
U Uc           
         U R                  UU Vs/ s H:  n[        U[        R                  5      (       a  UR                  R
                  OUPM<     snUS9$ s  snf )N)rc  rU  )r  r   r  SymIntr   rT  )r1  r   rc  rU  r   s        r   r  "ExternKernel.require_exact_strides  s]     ""KXKXaz!U\\::A= ( # 
 	
s   AA
c                "    U R                  XUS9$ )N)r   rU  )r  )r1  r   r   rU  s       r   r  !ExternKernel.require_stride_order  s     ""1"OOr   c                .    U R                  U[        5      $ r   )r  r  r4
  s     r   require_channels_last"ExternKernel.require_channels_last  s    ''+<==r   c                .    U R                  U[        5      $ r   )r  r  r4
  s     r   require_channels_last_3d%ExternKernel.require_channels_last_3d  s    ''+=>>r   c                    SS jnU" U5      (       a  U$ U R                  U[        R                  UR                  5       5      5      $ )Nc                     U R                  5       nU[        R                  R
                  ;   =(       a'    [        R                  R
                  U   R                  $ ! [        [        4 a     gf = fr'  )rz  AttributeErrorr  rs   r  r
  	is_mkldnn)r   r   s     r   is_mkldnn_tensor9ExternKernel.require_contiguous.<locals>.is_mkldnn_tensor  s]    zz| 177,,,R1B1B41H1R1RR #$78 s   A A0/A0r   r   r   r   r  r  r  r  )r1  r   r  s      r   rF  ExternKernel.require_contiguous  sC    	S AH,,>44QZZ\B r   c                h    U R                  U[        R                  UR                  5       5      5      $ r   r!  r4
  s     r   require_contiguous_strides'ExternKernel.require_contiguous_strides  s-     ((~00>
 	
r   c                    g r   r   r  s    r   r
  ExternKernel.apply_constraint  rJ  r   c                   [        U[        5      (       d   [        U5      5       e[        U[        5      (       d  [        U5      nU R                  (       d   S5       e[        U5      n[        U R                  5      nX4:  aq  [        R                  SU R                  XC-
  5        [        X45       H?  nU R                  U   S   nUR                  Xb;   a  X&   OU R                  U   S   5        MA     U$ )Nz/ExternKernel.arg_properties should not be emptyzv%s has %d unprovided positional arguments. Will check if they are in the keyword arguments or will use default values.r   r  )r   r   r   r   rw
  r   r'  r  rv
  r   r  )r  r   r   n_args
n_pos_argsr   arg_names          r   fill_non_provided_args#ExternKernel.fill_non_provided_args  s     $))54:5)$%%:D""U$UU"T,,-
 II^  #	 6...q1&9) $,,Q/@ / r   c                   [         R                  R                  (       Ga`  / nS nU(       ae  U R                  (       aT  [	        U R
                  5      [	        U5      :X  d   S5       eU R                   Vs0 s H  oDR                  S5      U_M     nn[        U R
                  5       H  u  pVUb3  Uc   eUR                  X   5      nU(       a  UR                  S5      OS nOb[	        U R                  5      U-   n	U R                  (       a7  U	[	        U R                  5      :  a  U R                  U	   R                  S5      OS nUR                  [         R                  R                  R                  Xh5      5        M     U$ U R
                   V
s/ s H,  n
[         R                  R                  R                  U
5      PM.     sn
$ s  snf s  sn
f )NzDnames passed to codegen_const_args does not match self.constant_argsr   r   )rs   r  r
  rw
  r   rn
  r  r   rq  r  r  val_to_arg_str)r  r  r  name_to_arg_propertiesrN
  r   r   proptype_r   r  s              r   codegen_const_argsExternKernel.codegen_const_args,  s   77F
 &*",,4--.#e*< Z< 594G4G*4GSGGFOS(4G ' * "$"4"45)5 ,,,155eh?D04DHHV,$Edkk*Q.C  ..3T=P=P9Q3Q ++C044V<! 
 agg22AA!KL 6 MDHDVDVWDVqAGG((77:DVWW'*& Xs   4G3G
c                    [         R                  R                  (       aD  U R                  b7  U R	                  / U R
                  QU R                  QU R                  5      nSnOU R
                  nSn/ n[        U5       H  u  pE[         R                  R                  (       a  U R                  (       a  U[        U R                  5      :  d   S5       eU R                  U   R                  S5      nUR                  [         R                  R                  R                  XV5      5        M  UR                  [         R                  R                  R                  U5      5        M     U(       a  UR                  U R!                  5       5        U$ )NFTz-Invalid access to ExternKernel.arg_propertiesr   )rs   r  r
  rv
  r,  rq  rn
  r   r   rw
  r   r  r  r  r/  r  r3  )r  rq  need_codegen_constant_argsr   r   r   r2  s          r   codegen_argsExternKernel.codegen_argsM  s*   774#3#3#?003$++3 2 23T[[F */&[[F)-&f%DAww""**q3t7J7J3K/K CK ++A.226:AGG00??IJAGG00??BC & &KK//12r   c                    X;   a  UR                  U5      $ XR                  ;   a  U R                  R                  U5      $ U R                  R                  U5      =nb  UR                  S5      $ [        U S35      e)zGiven an argument name, queries for values in (in order):
1. any provided kwargs for this function.
2. the class self.kwargs member.
3. any available default arguments in self.allarg_properties.r  z not in self.allarg_properties)r  r   rx
  r(  )r  r+  r   rN
  s       r   get_kwargs_valueExternKernel.get_kwargs_valueg  st    
 ::h''{{";;??8,,))--h77CD77?++z)GHIIr   c           	        [         R                  R                  (       a  U R                  b  [	        U R
                  5      S:X  a  / $ / nU R                   H  nU(       a  US:X  a  M  U R                  U5      n[        U[        5      (       a  UR                  U5        MK  U R                  c   eU R                  R                  U0 5      R                  S5      nUR                  [         R                  R                  R                  XE5      5        M     U$ U R                  R!                  5        VVs/ s H3  u  pdU S[         R                  R                  R                  U5       3PM5     nnnU$ s  snnf )Nr   r   r   r  )rs   r  r
  rv
  r   r
  rt
  r:  r   r   r  rx
  r  r  r/  r   r  )r  skip_outr   r+  r  r2  ks          r   codegen_kwargsExternKernel.codegen_kwargst  s8   77+D4F4F0G10L	F >>E 1))(3a&&MM!$11=== 2266xDHHPEMM!''"6"6"E"Ea"OP ?"  !KK--//DA #Qqww++::1=>?/   	s   6:E5c                    U R                   bS  U R                   R                  n[        USS5      nUR                  SS5      nUR	                  SS5      S   nU SU 3nU$ SnU$ )	Nr   unknown_namespacer
  r
  r
  rE   r   
unknown_op)r
  r	  r   r
  rsplit)r  r	  op_namespaceop_names       r   get_op_nameExternKernel.get_op_name  sv    <<#\\((F"6<9LML'//'BL'..sA6q9L%ax0G  #Gr   c                   [         R                  (       a  [        R                  R                  (       d  [        U R                  5       5      S:X  a  g [        R                  R                  R                  U R                  5       5      n[        R                  R                  R                  U R                  5       5      nU R                  5       nUR                  SU R                  5        SU SU SU< S3	5        g g g )Nr   zassert_size_stride(r  r"  )rF   size_assertsrs   r  r
  rn   r  r  codegen_shape_tupler:  rG  r  rz  )r  r
  r  r	  rF  s        r   codegen_size_asserts!ExternKernel.codegen_size_asserts  s    qww':':T]]_-277'';;DMMOLDWW))==doo>OPF&&(G%dmmo%6bb7+UVW (;r   c           	     j   [         R                  (       a  [        R                  R                  (       d~  U R                  5       nU[        R                  R                  ;  nU R                  5       nU(       a!  UR                  SU S[         SU< S35        g UR                  SU SU S35        g g g )Nzassert_alignment(r  r"  z	# buffer z (op: z) is assumed to be not aligned)
rF   alignment_assertsrs   r  r
  rz  r  rG  r  rg   )r  r
  r   alignedrF  s        r   codegen_alignment_asserts&ExternKernel.codegen_alignment_asserts  s    ##AGG,?,?==?D!''";";;G&&(G!!'vR/@7+QO !!vVG94RS -@#r   c                    [         R                  R                  (       a  [        R                  R
                  (       a  gUR                  5         U R                  5       nUR                  SU SU S35        g)zS
Track outputs of fallback operators if config.test_configs.track_memory_lifecycle
Nztrack_tensor(z, 'z'))	rF   test_configstrack_memory_lifecyclers   r  r
  "write_memory_track_allocation_oncerz  r  )r  r
  r   s      r   codegen_memory_tracking$ExternKernel.codegen_memory_tracking  sV     ""99QWW=P=P224}}M$s4&;<r   c                N    U R                  5       nU R                  5       nU/ /U4$ )z4
get output sizes and strides, for template_codegen
)r  r:  )r  r  r  s      r   get_group_strideExternKernel.get_group_stride  s*     //#r{G##r   c                   [         R                  R                  nU R                  5       nU R	                  5       nU Vs/ s H  oAR                  U5      PM     nn[        [        U5      5       Vs/ s H  n[        SU 35      PM     nn[        [        [        U5      5      UR                  SS9n[        U5       VV	s0 s H  u  pX_M	     n
nn	[        [        U
5      5       Vs/ s H  oZU   PM	     nnU Vs/ s H  oVU   PM	     nnU R                  5       nU" U5      n[         R                  R                  R                  XbU/5      u  pn[        S5      u  nn[        [!        Xo" U Vs/ s H  nU" U5      PM     sn5      5      5      n[#        [$        R&                  " U5      U5      nU[)        U5      4$ s  snf s  snf s  sn	nf s  snf s  snf s  snf )z3
Manually get canonicalization of the output index
r
  T)r  rr  c)rs   r  r  r  r:  r  r   r   rl   r  r  r   r1  r  rR   r   r   ro   r   rL  r   )r  r  r  r\  r   r   r  index_orderr   r   r   r   r  r   	new_sizesr   r  r   add_varreplacements                       r   canonicalizeExternKernel.canonicalize  s   
 77##//# ;BB'Q--a0'B;@U;LM;La(1QC1;L
MU3w<0g6I6ISWX+4[+AB+Axs#(+AB$)#f+$67$6q$67-23UmU
3##%
#%&WW%5%5%E%Ew&
"	F !%
73z7	3R	1GAJ	3R+STU5<<.<eI&&&+ CM C73 4Ss#   F=1G G'G;G/Gc                    U(       a  [         O[        n[        R                  X5      nU R                   H  nX2" U5      -  nM     U R
                  R                  5        H  nX2" U5      -  nM     U$ r   )maybe_free_unbacked_symbolsmaybe_free_symbolsr  rv  rn
  r   r   )r  rp  maybe_get_symbolsr3  rN
  s        r   rv  !ExternKernel.get_free_symbol_uses  sp     ,9'>P 	 --dB%%C"3''A &;;%%'C"3''A (r   c           
     ,   [        U SS 5      nSU< 3/nU[        R                  " U 5       Vs/ s H'  nUR                   S[        XR                  5       3PM)     sn-  nUR	                  SU R
                  < 35        U R                  U5      $ s  snf )Nrq
  zpython_kernel_name=r  r  )r   r  fieldsr   r  r  r  )r  r 
  r  r  s       r   r!  ExternKernel.__str__  s    d$8$?!+1
 	$++D1
1 zzl!GD**5671
 	
 	|D$4$4#789u%%
s   .B)rx
  r   rw
  rn
  rr
  r
  r   r	  rv
  rt
  rp
  rq
  r
  rz
  r   NNNNr   N)r   r  rU  r#  rq  r#
  rn
  r0	  r   dict[str, Any] | Nonerp
  ro
  rq
  r  rr
  r  rt
  rs
  rv
  ru
  r   r   r  r  r  r   )r
  rx   r 
  r  r   r   r
  rx   r   r   rr
  r  r   r   )rq
  r  r   r   r  r  )r   r   r   r   )r
  r   r   r   r   r   r   zftuple[Any, list[Any], list[Any], Callable[[Any, Any], Any], dict[sympy.Symbol, pytree.KeyPath] | None])r   r   r   rZ  rO  )NNF)
r   r   r   Sequence[int] | Nonerc  r  rU  r   r   r   r  )r   r   rc  r  rU  r   r   r   )r   r   r   r  rU  r   r   r   )r   r0	  r   r  r   r0	  )r  r  r   r   r   r   )r+  r   r   r   r   r   )r=  r   r   r   )r   z'tuple[list[Sequence[Expr]], list[Expr]])r   ztuple[Expr, Sequence[Expr]]r  )Ar   r   r   r   r  rn
  r   r  r  r   r   rp
  rq
  rr
  r   rt
  rv
  rw
  rx
  ry
  rz
  r	  r  r  r  r
  r  r
  r
  r}
  r~
  r
  r
  r  r
  rl  r
  r
  rS	  r  r  r  r  r  r  rF  r$  r
  r,  r3  r7  r:  r?  rG  rL  rQ  rW  rZ  rb  r^   rv  r!  rk  r   r  r  s   @r   r  r  >  s   
 $&M=%(..tDFND*.K'.%)
)"&OZ& 4?3D3D4!=  (,K$+26N/63>3D3D40  :>6=<G<M<M=9  .9->->t-T*T (*(,.2)-&*79+/.. . 4	.
 %. &. ,. '. $. (5. ). 
. .@.#$J! HLM+M:DM	M""#;J+$
 
 
 ^
!^
*-^
9<^

^
 ^
@ E
 E
N !! !!F ! !  '+37#aa $a 1	a
 a 
a aF QV	
	
'9	
JN	
		
 	
 DIPP,P=AP	P P
 > > ? ?  " 
 
"!"+9"	"HXB4J4	
	=$'B N+$)!	! ,
& Hr   r  c                  x   ^  \ rS rSrSS jr       S                   SU 4S jjjrS	S jrSrU =r$ )
ExternKernelOuti  c                &    UR                  U 5        g r   )generate_extern_kernel_outr
  s     r   r
  ExternKernelOut.codegen	      **40r   c
                :  > U R                  U5      n
[        U
[        5      (       d   [        U
5      5       e[        TU ]  S UU
UU=(       d    0 S UUUU	5
        [        R                  R                  U 5      U l	        [        R                  R                  U 5        g r   )r	  r   r   r   r  r  rs   r  r	  r   r	  )r  rU  rq  rn
  r   rp
  rq
  rr
  rt
  rv
  unwrapped_inputsr  s              r   r  ExternKernelOut.__init__  s      ..v6*H55Mt<L7MM5Lb)	
 GG++D1		""4(r   c                    gr  r   r  s    r   rZ  ExternKernelOut.should_allocate)  r  r   r  rn  rl  )rU  r  rq  rm	  rn
  r0	  r   rm  rp
  ro
  rq
  r  rr
  r  rt
  r0	  rv
  ru
  r   r   r  )	r   r   r   r   r
  r  rZ  r   r  r  s   @r   rs  rs    s    1 (*(,.2)-&*79+/)) !) %	)
 &) ,) ') $) (5) )) 
) ): r   rs  c                  ,   ^  \ rS rSrSU 4S jjrSrU =r$ )RandomSeedsi-  c           	       > [         R                  " [         R                  5      n[        TU ]  [        U[         R                  U/S9/ UR                  UR                  U//SS[        R                  R                  S9  g )Nr  zaten.randint.low_outzat::_ops::randint_low_out::call)rU  rq  rn
  rq
  rr
  rv
  )r  r  rN  r  r  rW  r  rM  r
  randintlow_out)r  ri	  r  limitsr  s       r   r  RandomSeeds.__init__.  sl    U[[)kkW
 !::vzzE7;5 >,, 	 	
r   r   )ri	  r   r  r  r   r   r   r   r   r   r  r   r  r  s   @r   r~  r~  -  s    
 
r   r~  c                  |   ^  \ rS rSrSS jr      S                 S	U 4S jjjrS
S jrSS jrSrU =r	$ )rd
  iA  c                &    UR                  U 5        g r   )generate_extern_kernel_allocr
  s     r   r
  ExternKernelAlloc.codegenB  s    ,,T2r   c	                Z  > U R                  U5      n	[        S U	 5       5      (       d   e[        T
U ]  S U[	        [
        [           U	5      UU=(       d    0 S UUUU5
        / U l        [        R                  R                  U 5      U l        [        R                  R                  U 5        g )Nc              3  B   #    U  H  n[        U[        5      v   M     g 7fr   r
  )r   r   s     r   r   -ExternKernelAlloc.__init__.<locals>.<genexpr>Q  s     C2BQ:a((2Br  )r	  r   r  r  r   r   r   r	  rs   r  r	  r   r	  )r  rU  rq  rn
  r   rq
  rr
  rt
  rv
  ry  r  s             r   r  ExternKernelAlloc.__init__E  s      ..v6C2BCCCCC&!#34Lb)	
 ')GG++D1		""4(r   c                    gr'  r   r  s    r   rZ  !ExternKernelAlloc.should_allocatee  r*  r   c                    [         er   r  r  s    r   r
  "ExternKernelAlloc.apply_constrainth  r  r   )r   r	  rn  )r   NNNr   N)rU  r#  rq  rm	  rn
  r0	  r   rm  rq
  r  rr
  r  rt
  r0	  rv
  ru
  r   r   r  r  )
r   r   r   r   r
  r  rZ  r
  r   r  r  s   @r   rd
  rd
  A  s    3 (*(,)-&*79+/)) !) %	)
 &) ') $) (5) )) 
) )@" "r   rd
  c                  h   ^  \ rS rSrSr        S	U 4S jjrS
S jrSS jrSS jrSS jr	Sr
U =r$ )r	  il  zH
An output buffer that represents the mutation of a pre-existing buffer
c                   > [         TU ]  S US9  UR                  5       n[        R                  R                  U5        U/U l        X0l        [        R                  R                  U 5      U l	        g rq  )
r  r  rz  rs   r  r  mutation_namesmutating_noder	  r   )r  rU  mutated_noder  mutated_node_namer  s        r   r  MutationOutput.__init__q  s`     	d62(113	##$5601(5GG++D1	r   c                    U R                   $ r   )r  r  s    r   r  MutationOutput.get_defining_op{  s    !!!r   c                    U R                   $ r   )r  r  s    r   r  !MutationOutput.get_mutation_names~  r  r   c                    gr'  r   r  s    r   rZ  MutationOutput.should_allocate  r*  r   c                j    U R                  5       nS U 5        Vs/ s H
  nUc  M  UPM     sn$ s  snf )Nc              3  `   #    U  H$  n[         R                  R                  U5      v   M&     g 7fr   )rs   r  try_get_buffer)r   r   s     r   r   6MutationOutput.get_mutation_buffers.<locals>.<genexpr>  s"     P..t44s   ,.)r  )r  r  r  s      r   get_mutation_buffers#MutationOutput.get_mutation_buffers  s@    002 QP
P P
 	
 
s   00)r  r  r   )rU  r#  r  r   r  r  r   r   rd  r  r  r   rm	  )r   r   r   r   r  r  r  r  rZ  r  r   r  r  s   @r   r	  r	  l  sF    2 2062GP2	2"#
 
r   r	  c                     ^  \ rS rSr% Sr0 rS\S'   \      SS j5       r\      SS j5       r	        SU 4S jjr
SS jrSS	 jrS
rU =r$ )TMADescriptori  aL  
An IR node representing a generic host-side TMA descriptor in the Triton API
Mostly useful for user-defined Triton kernels relying on host-side TMA;
but can, in principle, be used for Inductor's Triton templates, too.

See TMADescriptorExperimental and TMADescriptorStable for the two implementations
(the old API and the new API)
zdict[Any, TMADescriptor]_CACHEc                    [        U5      S:X  d   eUS   S:X  a  [        U/US   Q76 $ US   S:X  d   e[        U/US   Q76 $ )Nr   r   experimentalrE   ri  )r   TMADescriptorExperimentalTMADescriptorStable)r1  r[  tma_metas      r   _create_implTMADescriptor._create_impl  s\     8}!!!A;.(,VBhqkBBA;(***&v<<<r   c                    [        U5      U4nX0R                  ;  a  U R                  X5      U R                  U'   U R                  U   $ r   )r\	  r  r  )r1  r[  r  r  s       r   r  TMADescriptor.create  sB     &z8$jj !..v@CJJsOzz#r   c           
     8  > [         TU ]  S [        [        UUR	                  5       S95      [        [        [           U5      [        U5      S 5        Xl	        [        R                  R                  U 5      U l        [        R                  R                  U 5        g )NrS  )r  r  r  rZ  r  r   r   ry  r   r[  rs   r  r	  r   r	  )r  r[  rq  rn
  r  s       r   r  TMADescriptor.__init__  s     	 !,,. &!6*- 	
  GG++D1		""4(r   c                &    UR                  U 5        g r   )generate_tma_descriptorr
  s     r   r
  TMADescriptor.codegen      ''-r   c                    U R                   $ r   )r[  r  s    r   
get_tensorTMADescriptor.get_tensor  r'  r   )r   r[  )r[  r   r  ztuple[str, tuple[Any, ...]]r   r  )r[  r   rq  r0	  rn
  r0	  r   r   rn  r  )r   r   r   r   r  r  r   rl  r  r  r  r
  r  r   r  r  s   @r   r  r    s     (*F$)=='B=	= = 'B	 ))&3)DQ)	).. r   r  c                  H   ^  \ rS rSrSr S         SU 4S jjjrSrU =r$ )r  i  z
the new host-side TMA Descriptor API:
(the ones obtained via create_{1d,2d}_tma_descriptor calls).

See also TMADescriptorStable for the new API.
c                ^  > [        U5      S;   d   e[        U5      [        U5      :X  d   eUc  UR                  5       R                  nX l        X0l        X@l        [        U R                  5      U l        U/n/ U R                  QU R                  QU R
                  Pn[        TU ]!  UUUS9  g )N)rE   r   r[  rq  rn
  )	r   r  r  r  
block_dimselement_sizerm  r  r  )r  r[  r  r  r  rq  rn
  r  s          r   r  "TMADescriptorExperimental.__init__  s     4yF"""4yC
O+++!++-66L	$(		N	
YY
__
 
 	' 	 	
r   )r  r  r  rm  r   )
r[  r   r  list[int | torch.SymInt]r  r  r  r  r   r   r   r   r   r   r  r  r   r  r  s   @r   r  r    sG     $(

 '
 -	

 !
 

 
r   r  c                  0   ^  \ rS rSrSrSU 4S jjrSrU =r$ )r  i  z
the new host-side TMA descriptor API
(the ones obtained via TensorDescriptor.from_tensor).

See also TMADescriptorExperimental for the old API.
c                2   > X l         [        TU ]	  UU/US9  g )Nr  )block_shaper  r  )r  r[  r  r  s      r   r  TMADescriptorStable.__init__  s&    &8% 	 	
r   )r  )r[  r   r  r  r  r  s   @r   r  r    s    
 
r   r  c                  R   ^  \ rS rSr S           SU 4S jjjrSS jrSrU =r$ )SubgraphBufferi  c                V  > [         TU ]  S X5        X0l        X@l        [        R
                  R                  U 5      U l        [        R
                  R                  U 5        [        R
                  R                  U R                  XE5      U l
        [        U R                  5      (       d   e[        U R                  5      nU HT  nXR                  R                  UR                  '   U R                  R                  R!                  UR                  5        MV     U V	s/ s H  oR                  PM     sn	U l        SS KJs  Jn
  [        R*                  " U R                  5         SSSS.n0 UEU=(       d    0 EnU
R-                  U5         U R                  R.                  " U R                  6   S S S 5        U(       a<  U R                  R0                   H"  nUR3                  UR5                  5       5        M$     S S S 5        g s  sn	f ! , (       d  f       N_= f! , (       d  f       g = f)Nr   FATEN)max_autotunemax_autotune_gemmmax_autotune_gemm_backends)r  r  rk  example_inputsrs   r  r	  r   r	  make_subgraphsubgraphr!
  rq  rt  r=  graph_input_namesr  
sym_inputstorch._inductor.configr  rF   set_graph_handlerr   run
operationsr  r  )r  rU  r   rk  r  subgraph_namer	  r  sym_inpsym_varinductor_configbase_patchesmerged_patchesre  r  s                 r   r  SubgraphBuffer.__init__  s    	v3,GG++D1		""4(--dgg~U,,,,(5
!G7>MM&&w||4MM++227<<@ " 8BBzG<<zB88  / !&%*.4L .X-WAUSU-WN &&~6!!4#6#67 7
 --22B)).*=*=*?@ 3 0/	 C 76 0/s+   H%'H$H	0AH	
H	H
H(c                $    " S S5      n[        U R                  5      (       d   eU R                   Vs/ s H  o3R                  5       PM     nnUR                  U" U R                  5      / U R
                  QUQU R                  /5        g s  snf )Nc                      \ rS rSrSS jrSrg),SubgraphBuffer.codegen.<locals>.CodegenGraphi7  c                2    Xl         UR                  U l        g r   r  r   )r  r  s     r   r  5SubgraphBuffer.codegen.<locals>.CodegenGraph.__init__8  s    "
!JJ	r   r  N)r  ry   )r   r   r   r   r  r   r   r   r   CodegenGraphr  7  s    'r   r  )r!
  rq  r  'codegen_subgraph_with_flattened_outputsr  r  r   )r  r
  r  r  outer_inputss        r   r
  SubgraphBuffer.codegen6  s|    	' 	'
  ,,,,7;{{C{!++-{C77'-doo--YYK	
 Ds   B)r  rk  r   r  r  r   )rU  r  r   r  rk  torch.fx.GraphModuler  	list[Any]r  r   r	  rm  rn  r   r   r   r   r  r
  r   r  r  s   @r   r  r    s_     15,A,A ",A !	,A
 ",A ,A .,A ,A\
 
r   r  c                    ^  \ rS rSrSrSS jrSS jr\SS j5       r      SS jr	      SS jr
\" S 5       S   SU 4S jjj5       rSS	 jr          SU 4S
 jjr\SU 4S jj5       rSS jrSS jrSrU =r$ )UserDefinedTritonKerneliF  z6
A user-defined triton kernel (e.g. via @triton.jit).
c                z  ^ SSK Jn  SSKJn  UR	                  U R
                  5      m/ n/ n/ n[        TU5      (       a  [        TS5      (       a&  UR                  U4S jTR                   5       5        O.[        TS5      (       d   eUR                  TR                  5        [        TS5      (       a<  TR                   H+  nUR                  TR                  R                  U   5        M-     O.[        TS5      (       d   eUR                  TR                  5        TR                   nTR                  mTX4U4$ )	Nr   )	Autotuner)kernel_side_tablerestore_idxc              3  V   >#    U  H  nTR                   R                  U   v   M      g 7fr   )r   	arg_names)r   r   r
  s     r   r   BUserDefinedTritonKernel.get_kernel_and_metadata.<locals>.<genexpr>X  s$      *4FqFII''*4Fs   &)restore_value	reset_idxreset_to_zero)triton.runtime.autotunerr  *torch._higher_order_ops.triton_kernel_wrapr  
get_kernel
kernel_idxr   r  r  r  r  r  r  r   r  r  configs)r  r  r  r  restore_value_argsreset_to_zero_argsr   r
  s          @r   get_kernel_and_metadata/UserDefinedTritonKernel.get_kernel_and_metadataK  s   6P"--doo>(*(*fi(( v}--")) *4:4F4F*  v7777"))&*>*>?v{++))A&--fii.A.A!.DE * v7777"))&*>*>?nnGYYFw4FFFr   c                   [         R                  (       d  gU R                  R                  (       d  g[	        U R
                  R                  5      S:w  a  g[	        U R                  5      S:X  d   e[        U R                  S   [        5      (       d  g[        U R                  S   R                  [        5      (       d  g[        U R                  S   R                  R                  [        5      (       d  g[        U R                  S   R                  R                  R                  [        5      (       d  g[        S U R                  S   R                  R                  R                  R                   5       5      (       d  gg)a  
For kernels like

@triton.jit
def add_kernel(in_ptr0, in_ptr1, out_ptr, n_elements, BLOCK_SIZE: tl.constexpr):
    pid = tl.program_id(0)
    offs = pid * BLOCK_SIZE + tl.arange(0, BLOCK_SIZE)
    mask = offs < n_elements
    x = tl.load(in_ptr0 + offs, mask=mask)
    y = tl.load(in_ptr1 + offs, mask=mask)
    tl.store(out_ptr + offs, x + y, mask=mask)

@torch.compile
def fn(a, b):
    out = torch.empty_like(a)
    grid = (triton.cdiv(a.numel(), 1024),)
    add_kernel[grid](a, b, out, a.numel(), BLOCK_SIZE=1024)
    return out.relu()

We can potentially fuse the relu epilogue into the add_kernel.
We do this by pruning the `out` tensor allocation and directly writing the relu-output.
FrE   r   c              3  *   #    U  H	  oS :H  v   M     g7fr4  r   r  s     r   r   <UserDefinedTritonKernel.can_fuse_epilogue.<locals>.<genexpr>  s     N#Ma6#Mr  T)rF   *epilogue_fusion_user_defined_triton_kernelarg_accessescan_fuse_epiloguer   kernel_storesstoresmutable_argsr   r   rT  rx  r  ru  r   r  r  s    r   r  )UserDefinedTritonKernel.can_fuse_epiloguek  s%   0 @@  22 t!!(()Q. 4$$%***$++A.	::$++A.33Z@@$++A.3388.II$++A.3388==yIIN4#4#4Q#7#<#<#A#A#F#F#M#MNNNr   c                "    U R                  US S9$ )N)epilogue_fusion_codegenr
  s     r   r
  UserDefinedTritonKernel.codegen  s    }}Wd};;r   c                $    U R                  X5      $ )zA
epilogue_fusion: (fused epilogue node, modified kerel src code)
r  )r  r
  r
  s      r   codegen_with_epilogue_fusion4UserDefinedTritonKernel.codegen_with_epilogue_fusion  s     }}W66r   c                T  ^ SSK Jn  U R                  5       u  nnnnUR                  UUU R                  UUU R
                  U5      u  nn	n
nU R                   Vs0 s H  oU R                  U5      _M     nnU(       a{  [        U R                  R                  R                  5      S:X  d   e[        [        U R                  R                  R                  5      5      R                  nX;   d   eUu  nnXU'   UR                   Vs/ s H  nUR                  PM     snmUR                   Vs/ s H"  nUR                   (       d  M  UR"                  PM$     nn[%        U4S jU 5       5      n/ n/ n/ n/ n[&        R(                  " UR+                  5       [-        [&        R.                  " S5      U5      5       GH  u  nnUU;   a  U" 5       (       a  M  UR1                  U5        UR1                  U5        [3        U[4        5      (       a@  UR1                  UR7                  5       5        UR1                  UR9                  5       5        M  [3        U[:        [<        [>        [@        RB                  45      (       a-  UR1                  U5        UR1                  [E        U5      5        M  UU;   a)  UR1                  S5        UR1                  [:        5        GM  UcY   U" 5       (       a)  UR1                  S5        UR1                  [:        5        GMV  URG                  5         URG                  5         GMy  [I        S[E        U5       S	U 35      e   U RK                  X5        URM                  UUUUUU	U
S
U RO                  5       U RP                  R                  S9
  gs  snf s  snf s  snf )QOverrides the parent member.
See https://github.com/pytorch/pytorch/issues/151692r   )triton_version_uses_attrs_dictrE   c              3  .   >#    U  H
  nTU   v   M     g 7fr   r   )r   r   r  s     r   r   3UserDefinedTritonKernel._codegen.<locals>.<genexpr>  s     $F:aYq\:s   r  r8  NzUnsupported arg type: r!  T)	arg_typesraw_argsraw_keystriton_metainductor_metar*  r  original_fxnode_name))r
  r  r  !define_user_defined_triton_kernelr   gridrt
  r:  r   r  r  r  rl  rm  r   paramsis_constexprnumr>   rF  rd  r  r   repeatr  r   r   r  r  r   rr  r   r   r   r   r0  r  r
  generate_kernel_callr  r
  )r  r
  r
  r  r
  r  r  r  new_namer  r  extra_launch_argsr>  
named_argsmutable_arg_nameepilogue_computed_bufferr   r  
constexprsconstexpr_namesr   r  raw_keys_filteredraw_args_filteredr   rN
  r  s                             @r   r   UserDefinedTritonKernel._codegen  s2    	I ((*	
 55KKII
	
 261S1S
1SAt$$Q''1S 	 
 t((44;;<AAA#D):):)F)F)M)M$NOTT#111*9'$a+C'(%+]]3]QVV]3	%+]]E]anneaee]
E$$F:$FF!	')')"I$4$4R$8:K L
ID# &+I+K+K$$T*$$S)#v&&C1134  1C#udEJJ!?@@C   c+( B  % 233KKO$$S)%))+%))+),B49+RPSu*UVVI
L 	W/$$&&#'??$!%!2!2 	% 	
u
 4Es   NN /N%N%c                P   > [         TU ]  U5      [        U R                  U5      -  $ r   )r  rv  r'   r  r  s     r   rv  ,UserDefinedTritonKernel.get_free_symbol_uses  s-     w+M:=MII}>
 
 	
r   c                    [        5       $ r   r=   r  s    r   r  0UserDefinedTritonKernel.get_unbacked_symbol_defs   r  r   c          	       > / n0 n/ nUR                  5        H  u  p[        U	[        5      (       aX  [        R	                  U R                  U	5      5      n
X;   a  [        R                  XU   5      n
UR                  U
5        XU'   Mr  UR                  U	5        XU'   M     [        U5      S:w  d   eUS   R                  5       U l        [        U[        5      (       d   [        U5      5       e[        TU ]=  S [!        U R                  S9U[#        U5      U5        Xl        X l        U R)                  5       u  p  n[+        US5      (       d   eUR,                   Vs/ s H  oU;   d  M
  UPM     snU l        SSKJnJn  [        U5      S:  a  US   R6                  O0 nSS KnUR:                  U l        UR?                  U R<                  5      U l         U" U R<                  5      U l!        X@l"        U" U0 UEUEU5      U l#        U RF                  RH                  RJ                   Vs/ s HB  n[        URM                  URN                  5      [        5      (       d  M3  UURN                     PMD     snU l(        U RP                   Vs/ s H!  n[S        [!        U R                  S9UU 5      PM#     snU l*        [V        RX                  R[                  U 5        g s  snf s  snf s  snf )Nr   r%  r  )identify_accessed_tensorsidentify_triton_stores).r  r   r   r  r2
  rS	  r  r  r  r   r  r  r   r   r  r  r  r   r  r  r  r  r  rt
  r  r2  r3  r   astr  
kernel_srcparse
kernel_astr  kernel_argsr  r  r  r  r   r  r	  r	  rs   r  r	  )r  r  r  tma_descriptor_metadatar8  rq  r   rn
  r>  r  r  r
  r  r   rN
  r2  r3  autotuned_kwargsr4  r  r  r  s                        r   r   UserDefinedTritonKernel.__init__#  s     "$&&(%%'DA!Y'' 99$:L:LQ:OP/%,,Q0JKAa q	$$Q'q	 ( 6{aQi**,&(++9T&\9+dkk*- 	
 %	 $ < < >A v{++++!++.
+Ck/AC+.
*	

 14Gq0@71:,,b !**))DOO43DOOD&5/{/./#
 ((44;;
;+//#((3Y? "K!;
 ((!
( :T[[93E(!
 	
""4(K.
6
!
s   	K,K)2KK(K"c                f  > [         R                  (       d  [        TU ]  5       $ U R                  R
                  R                   Vs0 s H5  nUR                  U R                  UR                     R                  5       _M7     nn[        U R                  R
                  R                  5      n[        X0R                  5       VVs0 s H   u  pUR                  UR                  5       _M"     nnn[        R                  " [!        U R                  R
                  R                   Vs/ s H  nUR#                  U5      PM     sn5      [!        U R                  R
                  R                   Vs/ s H  nUR#                  U5      PM     sn5      [!        5       S9nU$ s  snf s  snnf s  snf s  snf Nr  )rF   r  r  rg  r  r  rk  r   r8  rz  r   r  r   r	  rG   r  r>   rename)	r  formal_arg_depread_renamesformal_arg_writes
mut_outputwrite_renamesr  r  r  s	           r   rg  'UserDefinedTritonKernel.get_read_writess  s    @@7*,,
 #'"3"3"?"?"E"E
"E !1!1.2E2E!F!O!O!QQ"E 	 

 !!2!2!>!>!E!EF /2!#8#8/
/* !4!4!66/ 	 
 #--  $00<<BBB JJ|,B   $00<<CCC JJ}-C #
 9

s   <F'F#'F)/F.c                ,    [        U R                  5      $ r   )r   r	  r  s    r   r  #UserDefinedTritonKernel.get_outputs  s    D))**r   c                    U R                   $ r   r%  r  s    r   r  "UserDefinedTritonKernel.get_device  r'  r   )r  r  r  r8  r7  r  r5  r  r  r	  rt
  )r   z(tuple[Kernel, Any, list[str], list[str]]r  rn  )r
  rx   r
  ztuple[ComputedBuffer, str]r   r   )r
  rx   r
  z!tuple[ComputedBuffer, str] | Noner   r   r  r  r  )
r  r   r  r   r9  r  r8  r  r   r   r  r  r  )r   r   r   r   r  r  r  r   r
  r  r  r^   rv  r  r  rg  r  r  r   r  r  s   @r   r  r  F  s    G@6p < <7+7>X7	7e
%e
 ;e
 
	e
N 56$)
!
	!
 7
N) N) 	N)
 "0N) $N) 
N)` # #J+ r   r  c                  h   ^  \ rS rSrSrS	S jrS
S jrSS jrSS jr        SU 4S jjr	Sr
U =r$ )InplaceBernoulliFallbacki  =
This needs to be a custom class to handle mutation properly
c                    [        S U R                   5       5      (       d   eS U R                   5       u  n[        R                  R                  (       a\  UR                  U R                  5        SU SSR                  [        [        U R                  5      5       SUR                   35        g UR                  U R                  5        SU SSR                  [        [        U R                  5      5       SUR                   35        g )Nc              3  B   #    U  H  n[        U[        5      v   M     g 7fr   r
  r   r  s     r   r   3InplaceBernoulliFallback.codegen.<locals>.<genexpr>  s     >+Q:a((+r  c              3  ^   #    U  H#  n[        [        U5      R                  5       v   M%     g 7fr   )r   r   r  rN  s     r   r   rO    s"     I[VQ1133[s   +-r  r  z, NULL)r"  )r   rq  rs   r  r
  r  r
  r  r  reprrn
  ending)r  r
  r   s      r   r
   InplaceBernoulliFallback.codegen  s    >$++>>>>>IT[[I77 '')*!A3b3tTEWEW;X1Y0ZZabibpbpaqr '')*!A3b3tTEWEW;X1Y0ZZ[\c\j\j[klr   c                    gr'  r   r  s    r   rZ  (InplaceBernoulliFallback.should_allocate  r*  r   c                &    U R                  S5      /$ r  r'
  r  s    r   r  +InplaceBernoulliFallback.get_mutation_names      "##r   c                    [        5       $ r   r=   r  s    r   r  1InplaceBernoulliFallback.get_unbacked_symbol_defs  r  r   c                R  > [         TU ]  S [        UR                  5       S9U R	                  U/5      UUS9  [
        R                  R                  UR                  5       5        [
        R                  R                  U 5      U l
        [
        R                  R                  U 5        g )Nr%  rv
  )r  r  r  r  r	  rs   r  r  rz  r	  r   r	  )r  rv
  r   rn
  r  s       r   r  !InplaceBernoulliFallback.__init__  s     	alln-$# 	 	
 	
##AJJL1GG++D1		""4(r   r  rn  r  r  r  )rv
  r   r   r   rn
  r   r   r   r   r   r   r   r  r
  rZ  r  r  r  r   r  r  s   @r   rJ  rJ    sF    $)'),2)DG)	) )r   rJ  c                     ^  \ rS rSrSrS
S jrSS jrSS jrSS jr        SU 4S jjr	\
 S       SS jj5       rS	rU =r$ )InplaceCopyFallbacki  rK  c                N    U R                  5       u  p#nUR                  X2U5        g r   )r7  codegen_device_copy)r  r
  r  r  non_blockings        r   r
  InplaceCopyFallback.codegen  s%    #'#4#4#6 <##Cl;r   c                    gr'  r   r  s    r   rZ  #InplaceCopyFallback.should_allocate  r*  r   c                &    U R                  S5      /$ r  rW  r  s    r   r  &InplaceCopyFallback.get_mutation_names  rY  r   c                    [        5       $ r   r=   r  s    r   r  ,InplaceCopyFallback.get_unbacked_symbol_defs  r  r   c           	       > [         TU ]  S UUUSSS9  [        R                  R	                  US   R                  5       5        [        R                  R                  U 5      U l        [        R                  R                  U 5        g )Nz
aten.copy_aoti_torch_copy_)rq
  rr
  r   )	r  r  rs   r  r  rz  r	  r   r	  )r  rU  rq  rn
  r  s       r   r  InplaceCopyFallback.__init__  sr     	+. 	 	
 	
##F1I$6$6$89GG++D1		""4(r   c                    X4 Vs/ s H  o@R                  U5      PM     nnU4n[        [        UR                  5       S9UU5      nU$ s  snf r~  )rS	  ra  r  r  )r1  r  r  rd  r  rq  rn
  r  s           r   r  InplaceCopyFallback.create  sU     25
;
1##A&
;%$cnn./

  <s   A	r  rn  r  r  r  )rU  r#  rq  rm	  rn
  r0	  r   r   r  )r  r   r  r   rd  r   r   ra  )r   r   r   r   r  r
  rZ  r  r  r  rl  r  r   r  r  s   @r   ra  ra    s~    <$)) !) %	)
 
)$ <A

%
59
	
 
r   ra  c                  J    \ rS rSrSrS
S jrSS jrSS jrSS jrSS jr	Sr
g	)MutatingFirstArgExternKerneli  rK  c                   [        U R                  5      (       d   e/ S U R                   5       Q[        [        U R                  5      QnUR                  U R                  5        SSR                  U5       SUR                   35        g )Nc              3  @   #    U  H  oR                  5       v   M     g 7fr   )r  rN  s     r   r   7MutatingFirstArgExternKernel.codegen.<locals>.<genexpr>  s     9[!!##[s   r  r  r"  )	r!
  rq  r  rQ  rn
  r  r
  r  rR  )r  r
  argrefss      r   r
  $MutatingFirstArgExternKernel.codegen  s    ,,,,
9T[[9
t))*
 	##%&a		'(:';1W^^<LM	
r   c                    gr'  r   r  s    r   rZ  ,MutatingFirstArgExternKernel.should_allocate  r*  r   c                &    U R                  S5      /$ r  rW  r  s    r   r  /MutatingFirstArgExternKernel.get_mutation_names  rY  r   c                    [        5       $ r   r=   r  s    r   r  5MutatingFirstArgExternKernel.get_unbacked_symbol_defs  r  r   c                    gr  r   r  s    r   has_side_effects-MutatingFirstArgExternKernel.has_side_effects  r  r   r   Nrn  r  r  r  )r   r   r   r   r  r
  rZ  r  r  r  r   r   r   r   rr  rr    s     
$r   rr  c                  ,   ^  \ rS rSrSU 4S jjrSrU =r$ )ResizeStorageBytesi  c                  > [        U[        5      (       d   S5       e[        TU ]  S [	        UR                  5       S9U R                  U/5      U4S9  [        R                  R                  UR                  5       5        [        R                  R                  U 5      U l        [        R                  R                  U 5        SU l        SU l        [        U[         ["        [$        45      (       d   ['        U5      5       e[        R                  R(                  R+                  UR,                  R                  5       5        g )NzTODO: dynamic shapesr%  )rn
  z"inductor_ops.resize_storage_bytes_z&torch::inductor::resize_storage_bytes_)r   r   r  r  r  r  r	  rs   r  r  rz  r	  r   r	  rq
  rr
  rv  rx  r   r   never_reuse_buffersr  rT  )r  variabler  r  s      r   r  ResizeStorageBytes.__init__  s    (C((@*@@(h1134
+#+	 	 	
 	
##H$5$5$78GG++D1		""4("FG(Xz9$EFFVXVF	##''(>(>(@Ar   )rr
  r   rq
  )r  r   r  r   r   r   r  r  s   @r   r  r    s    B Br   r  c                  6   ^  \ rS rSrSU 4S jjrSS jrSrU =r$ )SetSourceTensorKerneli-  c                  > UR                  5         [        TU ]	  UR                  5       X/S[        R
                  R                  R                  R                  S9  [        U[        [        [        45      (       d   [        U5      5       e[        R                  R                   R#                  UR$                  R'                  5       5        [        R                  R                   R#                  UR'                  5       5        [        R                  R                   R#                  U R'                  5       5        UR)                  5       n[+        [-        US9X5      [+        [-        US9X 5      /U l        g )Nz!torch.ops.aten.set_.source_Tensor)rq
  rv
  r%  )rQ  r  r  r  r  rq   r
  set_source_Tensorr   rv  rx  r   r   rs   r  r  r  rT  rz  r  r	  r  r	  )r  self_tensorstorage_tensorr  r  s       r   r  SetSourceTensorKernel.__init__.  s   $$&%%')B		++99	 	 	
 +*i'HII 	
4L
 	
I 	
##''(8(8(A(A(CD	##''(?(?(AB	##''8**,:V4kH:V4nK!
r   c                F    U R                  S5      U R                  S5      /$ rT  rW  r  s    r   r  2SetSourceTensorKernel.get_inputs_that_alias_outputB  s    "DOOA$677r   r{	  )r  r   r  r   r   r   r  )r   r   r   r   r  r  r   r  r  s   @r   r  r  -  s    
(8 8r   r  c                     ^  \ rS rSrSrSS jrSS jrSS jrSS jrSSS	.               SU 4S
 jjjr	Sr
U =r$ )ScatterFallbackiF  z
This needs to be a custom class to handle mutation properly.
This class handles both aten.scatter_ and aten.scatter_reduce_.
It also handle the case `src` being a scalar properly.
c                &    UR                  U 5        g r   )generate_scatter_fallbackr
  s     r   r
  ScatterFallback.codegenM  s    ))$/r   c                    gr'  r   r  s    r   rZ  ScatterFallback.should_allocateP  r*  r   c                p    U R                   S   n[        U[        5      (       d   eUR                  5       /$ r  r%
  r  rs  s     r   r  "ScatterFallback.get_mutation_namesS  s1    kk!n#v&&&&r   c                    [        5       $ r   r=   r  s    r   r  (ScatterFallback.get_unbacked_symbol_defsX  r  r   NTrE  include_selfc               d  > [        U[        5      U l        U R                  (       a&  X$U4 Vs/ s H  oR                  U5      PM     n	nU4n
O$X$4 Vs/ s H  oR                  U5      PM     n	nX54n
[        TU ]  S [        UR                  5       S9U R                  U	5      U
XgS.[        U5      SS/US9  [        R                  R                  UR                  5       5        [        R                  R                  U 5      U l        [        R                  R!                  U 5        g s  snf s  snf )Nr%  r  rE  r  )rq
  rt
  rv
  )r   r   src_is_tensorrS	  r  r  r  r  r	  r   rs   r  r  rz  r	  r   r	  )r  rv
  r   rH  r   r  rE  r  r  tensorsrn
  r  s              r   r  ScatterFallback.__init__[  s    (Y7 78oFo))!,oGF FM78jAj))!,jGA JMalln-(<";/+3^*D# 	 		
 	
##AJJL1GG++D1		""4(% G Bs   D(D-)r   r  rn  r  rq  r  )rv
  r   r   r   rH  r   r   r   r  r   rE  r  r  r   r   r   r_  r  s   @r   r  r  F  s|    0 
 "!!)!!) !) 	!)
 !) !) !) !) 
!) !)r   r  c                  p   ^  \ rS rSrSrS	S jrS
S jrSS jrSS jr            SU 4S jjr	Sr
U =r$ )IndexPutFallbacki  zI
This needs to be a custom class to handle mutation and indices properly
c                &    UR                  U 5        g r   )generate_index_put_fallbackr
  s     r   r
  IndexPutFallback.codegen  s    ++D1r   c                    gr'  r   r  s    r   rZ   IndexPutFallback.should_allocate  r*  r   c                &    U R                  S5      /$ r  rW  r  s    r   r  #IndexPutFallback.get_mutation_names  rY  r   c                    [        5       $ r   r=   r  s    r   r  )IndexPutFallback.get_unbacked_symbol_defs  r  r   c           
       > X0l         U Vs/ s H	  ofc  M  UPM     nnX$/UQ Vs/ s H  o R                  U5      PM     nnSn	[        T
U ]  S [	        WR                  5       S9U R                  U5      U4SU	US9  [        R                  R                  U R                  S5      5        [        R                  R                  U 5      U l        [        R                  R                  U 5        g s  snf s  snf )Naoti_torch_index_put_outr%  zaten.index_put_)rq
  rr
  rv
  r   )r  rS	  r  r  r  r  r	  rs   r  r  r'
  r	  r   r	  )r  rv
  r   r  r   
accumulater   valid_indicesr  rr
  r  s             r   r  IndexPutFallback.__init__  s     $+=GqG=342M}2MN2MQ%%a(2MN4alln-(M0+# 	 	
 	
##DOOA$67GG++D1		""4( >Ns   C-C-C2)r  r   rn  r  r  r  )rv
  torch._ops.OpOverloadr   r   r  r  r   r0	  r  r   r   r   r_  r  s   @r   r  r    s`    2$)*) ) 	)
 ) ) 
) )r   r  c                  2    \ rS rSr\SS j5       rSS jrSrg)
DeviceCopyi  c           
        UR                  5       nUc   eUR                  5       (       d  [        U5      [        R                  R
                  ;  a  [        S UR                  5        5       5      (       a  [        R                  R                  (       dn  [        R                  R                  (       a>  [        R                  R                  U5        [        R                  R                  U5        UR                  U5      $ [        R                  R                  U5        [        R                  R                  U5        [        S5        U4n[        R!                  U5      nS nUR#                  5       (       a  UR%                  5       n['        UR(                  5      =(       a    UR(                  S:H  =(       a    UnUR(                  S:H  =(       a    ['        UR(                  5      =(       a    UnU(       a%  [+        U5      (       a  SUR-                  5       l        [1        [3        UUR5                  5       UR#                  5       UUS9U R7                  U5      /U5      $ )Nc              3  Z   #    U  H!  o[         R                  R                  ;   v   M#     g 7fr   )rs   r  r
  r  s     r   r   $DeviceCopy.create.<locals>.<genexpr>  s     G4Fq***4Fs   )+zDeviceCopy in input programr&  Tr  )r  r  r{  rs   r  r  r   r  rF   aot_inductoruse_runtime_constant_foldingr
  add_device_infor  rb   r  rF  r  r:  rj   r   r  r  rY  r  rW  r  rS	  )	r1  r   r  rd  x_devicern
  r	  is_destination_pinnedis_source_pinneds	            r   r  DeviceCopy.create  s   <<>###Qqww'>'>>GA4D4D4FGGG''DDww"" ''/''1''//	'	)78%++A.::<<\\^F8==!KfkkU&:K| 	 MMU"Kvfkk':K| 	  5a 8 8'+ALLN$

/ q!"

 
	
r   c                   U R                  5       n[        U5      S:X  d   eU R                  (       a2  UR                  US   U R                  R	                  5       US   5        g UR                  US   U R	                  5       US   5        g )Nr   r   rE   )r7  r   rp
  rc  r  )r  r
  r   s      r   r
  DeviceCopy.codegen  s{      "4yA~~''Q));;=tAw ''Q1G1G1I4PQ7Sr   r   N)r   r   r  r  rd  r   r   r   rn  )r   r   r   r   rl  r  r
  r   r   r   r   r  r    s    -
 -
^Tr   r  c                     ^  \ rS rSrSrS
S jrSS jr              SU 4S jjrSS jr\	" S 5       S   SS jj5       r
SS jrS	rU =r$ )DynamicSelectStorageOffseti  a  
The result of computing a dynamic selection index is determined as follows: when the index in the
select operation is unbacked, the actual index calculation is ambiguous for negative indices
(index + size) versus non-negative indices (just index). To resolve this, we allocate an unbacked
SymInt to represent the storage offset and decompose the select operation into a call to as_strided,
computing the storage offset at runtime with this node.
c                    [        5       $ r   r=   r  s    r   r  $DynamicSelectStorageOffset.get_reads  r  r   c                    gr'  r   r  s    r   rZ  *DynamicSelectStorageOffset.should_allocate  r*  r   c                   > [         TU ]  S [        [        R                  " S5      S9/ 5        Xl        X l        X0l        X@l        XPl	        X`l
        g Nr&  r%  )r  r  r  r  r  unbacked_offset_symbolr   base_offsetbase_dim_strider  r  )r  r  r   r  r  r  r  r  s          r   r  #DynamicSelectStorageOffset.__init__  sG     	ze1DErJ '=#
&.	
r   c                .    [        U R                  /5      $ r   )r>   r  r  s    r   r  3DynamicSelectStorageOffset.get_unbacked_symbol_defs	   s    466788r   c                .    [        U R                  U5      $ r   )r'   r   ru  s     r   rv  /DynamicSelectStorageOffset.get_free_symbol_uses   s      

M::r   c                6    UR                  X R                  S9  g )NrQ
  )codegen_dynamic_select_indexr  r
  s     r   r
  "DynamicSelectStorageOffset.codegen   s    ,,T,Dr   )r  r  r  r   r  r  r  r  )r  sympy.Symbolr   r  r  sympy.Symbol | intr  r  r  r  r  r   r   r   r  r  r  rn  r   r   r   r   r  r  rZ  r  r  r^   rv  r
  r   r  r  s   @r   r  r    s     ,  (	
 , !  
&9 89$);!;	!; :;
E Er   r  c                     ^  \ rS rSrSrS
S jrSS jr          SU 4S jjrSS jr\	" S 5       S   SS jj5       r
SS jrS	rU =r$ )DynamicSliceSizei   a7  
Computes the output size of a slice call, handling the correct semantics in codegen.
We do this for flexible handling for unbacked indices (to not data-dependent error).

Slicing has 4 semantics for indices, i.e. x[start:] could be:
1) start < -x.size(0)            -> x[0:]                    # negative out-of-bounds
2) start in [-x.size(0), 0)      -> x[x.size(0) + start:]    # negative slicing
3) start in [0, x.size(0))       -> x[start:]                # standard slicing
4) start >= x.size(0)            -> empty slice              # positive out-of-bounds

If the appropriate semantics are known beforehand, the output size is computed based on
the start & end indices. If not (with unbacked indices), a new unbacked symbol is created
to represent the output size, and codegen handles computing the correct case.
c                    [        5       $ r   r=   r  s    r   r  DynamicSliceSize.get_reads&   r  r   c                    gr'  r   r  s    r   rZ   DynamicSliceSize.should_allocate)   r*  r   c                   > [         TU ]  S [        [        R                  " S5      S9/ 5        Xl        X l        X0l        X@l        XPl	        g r  )
r  r  r  r  r  unbacked_size_symbolr  r  r  r  )r  r  r  r  r  r  r  s         r   r  DynamicSliceSize.__init__,   s>     	ze1DErJ$8!
		r   c                .    [        U R                  /5      $ r   )r>   r  r  s    r   r  )DynamicSliceSize.get_unbacked_symbol_defs<   s    444566r   c                t    [        U R                  U5      R                  [        U R                  U5      5      $ r   )r'   r  r  r  ru  s     r   rv  %DynamicSliceSize.get_free_symbol_uses?   s0      

M:@@TXX}5
 	
r   c                &    UR                  U 5        g r   )codegen_dynamic_slice_sizer
  s     r   r
  DynamicSliceSize.codegenG   rw  r   )r  r  r  r  r  r  r  )
r  r  r  r  r  r  r  r  r  r  r  r  r  rn  r  r  s   @r   r  r     s    * "  	
 ! ! 7 ./$)
!
	!
 0
1 1r   r  c                  h   ^  \ rS rSrSrS	S jrS
S jr        SU 4S jjrSS jrSS jr	Sr
U =r$ )r   iK   z3
The result of a call to aten._local_scalar_dense.
c                    [        5       $ r   r=   r  s    r   r  DynamicScalar.get_readsP   r  r   c                    gr'  r   r  s    r   rZ  DynamicScalar.should_allocateS   r*  r   c                   > UR                  5         [        TU ]	  S [        [        R
                  " S5      S9U R                  U/5      5        Xl        X l        g r  )	r  r  r  r  r  r  r	  symkeypath)r  r  r  rT  r  s       r   r  DynamicScalar.__init__V   sI     	*ELL$78$:M:Mtf:U	
 r   c                .    [        U R                  /5      $ r   )r>   r  r  s    r   r  &DynamicScalar.get_unbacked_symbol_defs`   s    488*%%r   c                &    UR                  U 5        g r   )codegen_dynamic_scalarr
  s     r   r
  DynamicScalar.codegenc   s    &&t,r   )r  r  r  r  )r  r  r  zpytree.KeyPathrT  r   r   r   r  rn  )r   r   r   r   r  r  rZ  r  r  r
  r   r  r  s   @r   r   r   K   sF    *8@F	&- -r   r   c                     ^  \ rS rSrSrS
S jrSS jrSU 4S jjrSS jr\	" S 5       S   SS jj5       r
SS jrS	rU =r$ )r   ig   z-
The result of a call to aten._assert_scalar
c                    [        5       $ r   r=   r  s    r   r  AssertScalar.get_readsl   r  r   c                    gr'  r   r  s    r   rZ  AssertScalar.should_allocateo   r*  r   c                v   > [         TU ]  S [        [        R                  " S5      S9/ 5        Xl        X l        g r  )r  r  r  r  r  scalarr
  )r  r  r
  r  s      r   r  AssertScalar.__init__r   s3    ell512	
 r   c                    gr  r   r  s    r   r  AssertScalar.has_side_effects}   r  r   c                .    [        U R                  U5      $ r   )r'   r  ru  s     r   rv  !AssertScalar.get_free_symbol_uses   s      ];;r   c           	        [         R                  (       d  g [        [        U R	                  SS95      5      n[
        R                  R                  (       a  g [
        R                  R                  (       a^  SU S3n[
        R                  R                  R                  U R                  SS9nUR                  SU SU R                   SU S	35        g [
        R                  R                  R                  U R                  SS9nUR                  S
U S35        UR                  S[        U R                  5       S35        UR                  U R!                  5        S35        g )NFro  zstd::to_string(r"  )rs  zif (!(z()) { throw std::runtime_error("Expected z but received " + z); }zif not (z):z    raise RuntimeError(z = None)rF   scalar_assertsrl  rm  rv  rs   r  r
  r
  r  codegen_cpp_sizevarr  r  r
  codegen_python_sizevarrQ  rz  )r  r
  rV  
symbol_strsizevars        r   r
  AssertScalar.codegen   s4   $$ d44454IJK77WW  *6(!4Jgg**>>e ? G 	!J488*Tfgqfrrwx gg**AAe B G 	45 7TXX7GqIJ  19:r   )r
  r  r  r  )r  rt   r
  r   r   r   r  r  rn  )r   r   r   r   r  r  rZ  r  r  r^   rv  r
  r   r  r  s   @r   r   r   g   sR    	 N+$)<!<	!< ,<
; ;r   r   c                  *    \ rS rSr% S\S'   S\S'   Srg)ExternKernelNodei   r   r   zexport_schema.Noder   r   Nr   r   r   r   r
  r
     s    
I
r   r
  c                  
  ^  \ rS rSrSr SSS.               SU 4S jjjjrSU 4S jjrSS jrSS jrSS	 jr	\
      SS
 j5       rSS jrSS jrSS jrS r\SS j5       r\
SS j5       r\SS j5       rSrU =r$ )r]	  i   z
A class that represents a fallback kernel for handling operators that are not
directly support by inductor. It currently supports functional ops, view ops,
inplace aten ops, and mutating ops that are auto-functionalizable.
Nrz
  c                 >^  [         TT ]  U[        U5      [        U5      US9  ST l        U=(       d    0 T l        [        U[        R                  R                  [        R                  R                  45      (       d   SU S[        U5       S35       eUT l        UT l        Uc  0 OUT l        T R                  c   e[        R                   R#                  T R                  5        / T l        / T l        [        T R                  [        R                  R                  5      (       a  g ST R                  R)                  5       ;   a  g T R                  R*                  n[        R,                  R.                  R1                  T R                  5      (       a-  T R&                  R3                  US   R5                  5       5        g SS jn	UR6                  (       aI  [9        T R                  5      (       d/  U	" T R                  5      (       d  [;        S	T R                   35      eT R                  T R<                  T R>                  5      u  pSU 4S
 jjn[        R,                  R.                  RA                  XU5       H  u  pU" X5        M     g )Nr]  F#Fails to create FallbackKernel for r!   not supported_c10d_functionalr   c                8   [         R                  R                  U R                  5       [         R                  R                  R
                  5      =(       dD    [        U S5      =(       a1    [         R                  R                  R
                  U R                  ;   $ )N
py_kernels)r  _C%_dispatch_has_kernel_for_dispatch_keyr   DispatchKeyFunctionalizer  r  rd  s    r   has_functionalize_impl7FallbackKernel.__init__.<locals>.has_functionalize_impl   sg    88AA	588//==  L) HHH((66"--G	r   z'NYI: Can't generate FallbackKernel for c                >  >^  [        T R                  [        R                  5      (       a+  [        U[        [
        45      (       d   [        U5      5       e[        R                  " T R                  5      (       a  [        U[
        [        45      (       a   eUc  g T R                  c  g SU U4S jjn[        R                  " T R                  5      (       a  Ub  U H  nU" U5        M     g g [        R                  " T R                  5      (       d   eU" U5        g )Nc                $  > TR                   R                  U R                  5       5        TR                  c   eTR                  R                  (       a<  TR
                  R                  [        [        U R                  5       S9U T5      5        g g r~  )	alias_namesr  rz  
alias_infois_writer	  r	  r  r  )r  infor  s    r   	add_aliasPFallbackKernel.__init__.<locals>.handle_aliasing_and_mutation.<locals>.add_alias!  sj      ''

5222??++))00&z'H!TR ,r   )r  r   r   r   )
r   r   r  ListTyper   r   library_utilsis_tensor_like_typer  is_tensorlist_like_type)r  rN
  r  optional_tensor_argr  s   `   r   handle_aliasing_and_mutation=FallbackKernel.__init__.<locals>.handle_aliasing_and_mutation!  s    $))U^^44!#e}55@tCy@500;; &cE4=9999{&  44TYY???/2+!"56 03 # %88CCCC#r   )re  r  r   r   )r  ztorch._C.ArgumentrN
  r   r   r   )!r  r  r   use_runtime_dispatchrz
  r   r  r
  r
  r
  r   rv
  r
  r   rq
  rs   r  warn_fallbackr  r  r   r
  _libraryr  mutates_and_returns_first_argr  rz  
is_mutabler$   r  rq  rn
  
zip_schema)r  rU  r
  r
  nontensor_argsr
  r   rz
  schemar  r   r&  r  rN
  r  s   `             r   r  FallbackKernel.__init__   s*    	+.!	 	 	
 %*!!2!8bUZZ**EJJ,J,JK
 
 	X04<.W	X 
 ","Nb&&222	d556 '))+d&&

(F(FGG !1!1!6!6!88
 !!)) >>==d>N>NOO&&{1~'>'>'@A	 *4+;+;<<*4+;+;<<%9$:J:J9KL  **4;;8J8JK	> --88vNID(3 Or   c                @  > [         TU ]  5       nU R                  [        R                  R
                  R                  L a]  U R                   HM  n[        U[        5      (       d  M  UR                  [        R                  " UR                  5       5      5      nMO     U$ r   )r  rg  rv
  r  _prims	rng_primsgraphsafe_run_with_rng_statern
  r   r
  	with_readrG   r  rz  )r  r  rN
  r  s      r   rg  FallbackKernel.get_read_writes*!  sw    g-/u||55RRR))c>22"-"7"7$,,S\\^<#K * r   c           	     n    UR                  U R                  5       U R                  [        U SS 5      5      $ Nrz
  )(codegen_unbacked_symbol_defs_for_outputsrz  r	  r   r
  s     r   codegen_unbacked_symbol_defs+FallbackKernel.codegen_unbacked_symbol_defs6!  s0    ??MMOT\\749Ld+S
 	
r   c                    [        U SS 5      =n(       aL  [        [        R                  R                  R
                  U5      nUc   e[        UR                  5       5      $ [        5       $ r8  r   r9   rs   r  r  r   r>   r+  r  rz
  resolveds      r   r  'FallbackKernel.get_unbacked_symbol_defs;!  _     '.A4 HHH0  **,=H '''hmmo..<r   c                ~   [         R                   " S S5      5       n[        U R                  5      (       d   eU R                   Vs/ s H  o!" UR	                  5       5      PM     nnU R                  X0R                  5      u  pE[        R                  R                  (       a  [        U R                  [        R                  R                  5      (       a  U R                  XE5      n[!        U R                  R"                  R$                  U5       VVs/ s H8  u  pb[        R                  R&                  R)                  X&R*                  5      PM:     nnnO9U Vs/ s H,  n[        R                  R&                  R)                  U5      PM.     nnU R,                  R/                  U5        U$ s  snf s  snnf s  snf )Nc                  *    \ rS rSr% S\S'   SS jrSrg))FallbackKernel.codegen_args.<locals>.ShimiF!  r   refc                    U R                   $ r   )rE  r  s    r   rk  2FallbackKernel.codegen_args.<locals>.Shim.__repr__J!  s    xxr   r   Nr  )r   r   r   r   r   rk  r   r   r   r   ShimrD  F!  s    H r   rH  )r  	dataclassr!
  rq  r  r
  rn
  rs   r  r
  r   rv
  r  r
  r
  r,  r   r
  r
  r  r/  r
  r   rw	  )r  rH  r   r
  r   r   params          r   r7  FallbackKernel.codegen_argsE!  s\   				  	  
	   ,,,,<@KKHKqtA//12KH**;8J8JK77:d.>.>

@U@U#V#V..t<D !$D$4$4$<$<$F$F M MHE $$33AG M  D
 EIIDqAGG((77:DDI 	6" I
 Js   F/?F43F:c                ~   U (       a*  U  Vs/ s H  n[        U[        5      (       a  M  UPM     snOS nU(       aD  U (       d   eU  Vs/ s H)  oDR                  5       (       d  M  UR                  5       PM+     nnUS   $ [        U[        R                  5      (       a  UR
                  $ [        U[        R                  R                  [        45      (       d  [        U5      (       a  [        R
                  " S5      $ [        U[        [        45      (       a  [        S U 5       5      nU Vs/ s H  ow(       d  M  UPM     nn[        U5      S:X  a  US   $ U(       d  g U HB  n[        U[        R
                  5      (       d   e[        UR                  5      (       d  M@  Us  $    US   $ g s  snf s  snf s  snf )Nr   r&  c              3  N   #    U  H  n[         R                  S U5      v   M     g 7fr   )r]	  find_devicer  s     r   r   -FallbackKernel.find_device.<locals>.<genexpr>q!  s)      $ (A **433'r  rE   )r   r3
  r  r  r[	  r  r  ScriptObjectr(   r*   r   r   r>   r   rj   r   )r
  r
  r  non_torch_bind_tensor_argsrN
  devices
device_setr  s           r   rN  FallbackKernel.find_device]!  su     $J1:a+IQJ 	#
 &;3>S;C..BR's~~';GS1:nell33!(((UXX224DE
 
^,,<<&&ntUm44# $ ($ J -7AJ&&vJGA7|q qz!!!&%,,7777&++&&!M " 1:? K T Bs"   F0F0F5$F50
F:>F:c                2    SSK Jn  U" U R                  5      $ )Nr   )	is_impure)torch._library.utilsrV  rv
  )r  rV  s     r   r  FallbackKernel.has_side_effects!  s    2 ))**r   c                .   [        U R                  [        R                  R                  [        R                  R
                  45      (       d+   SU R                   S[        U R                  5       S35       e[        U R                  [        R                  R
                  5      (       d_  SU R                  R                  5       ;  aA  U R                  R                  R                  (       a  [        U R                  5      (       a  / $ U R                  $ )Nr  r!  r  r  )r   rv
  r  r
  r
  r
  r   r   r
  r,  r$   r  r  s    r   r  +FallbackKernel.get_inputs_that_alias_output!  s    uzz44ejj6T6TU
 
 	
 2$2B2B1C2D$$%&n6	
 
 4++UZZ-K-KLL"$*:*:*?*?*AA  ((33&t'7'788I###r   c                P    [        U R                  5      S::  d   eU R                  $ r6  )r   r  r  s    r   r  !FallbackKernel.get_mutation_names!  s'    4&&'1,,,"""r   c           
         [         R                  SU R                  5       U R                  5        [	        U [
        5      (       d   [        U 5      5       eU R                  U R                  U R                  5      u  pU R                  X5      nU R                   Vs/ s H  nU R                  " U40 UD6PM     nnU R                  n[        R                  R                  (       d  / UQUQ$ [!        S/ 5      nUR#                  XQU5      n      SS jn[	        U[$        R&                  R(                  R*                  5      (       a#  UR-                  US   US   5      R.                  n	OUR0                  R.                  n	[3        U	5      S:X  aB  U R4                  (       a  U R4                  OU R6                  n
U	S   R8                  nU" X5      /nO:[;        XR4                  5       VVs/ s H  u  pU" UR8                  U5      PM     nnnU R                  c   e[=        U R                  5       [>        R@                  " U R                  RC                  5       UU0 S9S9n[        RD                  RG                  U5        / UQUQ$ s  snf s  snnf )	a  
ProxyExecutor Design Note
We export the ExternFallbackNodes (for custom ops) into a serialized file
and run it with a host side proxy executor to address the ABI problem
This is currently only implemented for fbcode. Eventually, we will also make this work for OSS.
Detailed design doc can be found at
https://docs.google.com/document/d/1wC4DOZFaYym2t1Esz0X5yxlLI3RDnSiyRbUus3bkJ64/edit?usp=sharing
z4Extern kernel node added for node %s with target %s.Nc           	     
   [        U [        R                  [        R                  45      (       a  Un[        U[        [
        45      (       a  [        U5      S:X  d   eUS   n[        U [        R                  5      (       aT  [        U[        5      (       d   e[        R                  R                  [        R                  " UR                  5       S9S9$ Ub   e[        R                  R                  SS9$ [        U [        R                  5      (       a  [        U R                  5       [        R                  5      (       as  [        U[        5      (       d   [!        U5      5       e[        R                  R                  U Vs/ s H%  n[        R                  " UR                  5       S9PM'     snS9$ [        U [        R"                  5      (       a  [        U R                  5       [        R                  5      (       a  Uc8  [        R                  R                  [        R$                  R                  SS9S9$ [        U[        5      (       d   e[        R                  R                  [        R$                  R                  [        R                  " UR                  5       S9S9S9$ [        U [        R&                  5      (       a  [        R                  R                  US	9$ [)        S
[!        U 5       35      es  snf )NrE   r   r  )	as_tensorT)as_none)
as_tensors)as_optional_tensor)as_intzUnsupported return type )r   r  
TensorTypeNoneTyper   r   r   r   export_schemaru   r  TensorArgumentrz  r!  getElementTyper   r   OptionalTypeOptionalTensorArgumentIntTypeRuntimeError)return_typerc  r   s      r   handle_single_outputFFallbackKernel.export_extern_kernel_node.<locals>.handle_single_output!  s    +(8(8%..'IJJftUm44v;!+++ )Ck5+;+;<<%c62222(1188"/">">CLLN"S 9   ;&;(11888FFK88Z**,e.>.>> > "&(33AT&\A3$--44 $* #)C &44#,,.I#)  5   K););<<**,e.>.>B B >(1188+8+O+O+V+V$( ,W , 9   &ff5555(1188+8+O+O+V+V&3&B&B%+__%6' ,W , 9   K77$--44F4CC"%=d;>O=P#QRR7 s   ,L r   rE   )r	  rq  r	  r	
  )r   r   )rm  z1torch.TensorType | torch.ListType | torch.JitTyperc  IRNode | Sequence[IRNode]r   zexport_schema.Argument)$r'  r  rz  rv
  r   r]	  r   r
  rq  rn
  r,  rt
  r:  rs   r  aot_moder#   serialize_inputsr  r
  	torchbindCallTorchBindr/  returnsr
  r   r	  r	  r
  r   r
  rf  r<   r   extern_kernel_nodesr  )r  r   r   r  ordered_kwargsr	  
serializernamed_argumentsrn  ru  r	  rm  output_argumentsreturn_schemarc  r   s                   r   export_extern_kernel_node(FallbackKernel.export_extern_kernel_node!  sl    			BMMO	
 $//;d;/**4;;8J8JK**48 99
9 !!#009 	 
 !!ww+T+N++*44
$55fFK3	SJ3	S-3	S $3	Sj fe55??MMNNmmDGT!W5==Gnn,,Gw<1 '+lldll8M8MG!!*..K 4[ JK .1,,-G 
 .H)M	 %!++ .H    +++##'',,.&(	
 	
$$T*''''K
` s   J*J
c                  ^ ^^ T R                   nUc   eUR                  S:X  a  [        U[        R                  R
                  5      (       d   [        U5      5       e[        R                  R                  (       a2  SSK
Jn  [        U5      U;  a  [        R                  SU5        ST l        OUR                  S:X  a:  [        U[        R                  R
                  5      (       d   [        U5      5       eOA[        R                  R                  (       a"  U[         R"                  R$                  ;  T l        [        R                  R                  (       a  [        U[        R                  R
                  5      (       a  T R                  (       d  SU4S jjmT R'                  T R(                  T R*                  5      u  nm[,        R.                  " UUU 4S	 jT R0                   5       5      n[3        U4S
 j[5        XRR6                  R8                  5       5       5      T l        T R;                  U5        T R                  (       a  T R=                  5       nT R>                  c   eT R                   c   eURA                  T RC                  5       T R>                  U 4S jT R                   UT RD                  (       a  T RD                  OT RF                  5        OcURI                  T 5        [        T RJ                  [L        5      (       a3  T RO                  U5        T RQ                  U5        T RS                  U5        T RU                  U5        g)r  Nr
  r   r
  zG%s is missing a c-shim implementation, using proxy executor as fallbackT
_quantizedc                   > [        U [        R                  5      (       a  T" U R                  5       5      $ [        U [        R                  5      $ r   )r   r  ri  rh  
NumberType)r  	is_numbers    r   r  )FallbackKernel.codegen.<locals>.is_numberC"  s=    a!3!344$Q%5%5%788!!U%5%566r   c              3  J   >#    U  H  nTR                   " U40 TD6v   M     g 7fr   )r:  )r   r>  r   r  s     r   r   )FallbackKernel.codegen.<locals>.<genexpr>P"  s(      ? ))!6v6?s    #c              3  z   >#    U  H0  u  p[        U[        5      =(       a    T" UR                  5      v   M2     g 7fr   )r   complexr
  )r   r  r  r  s      r   r   r  U"  s2      ,DDA 1g&A9Q[[+AADs   8;c                 H   > / T R                  5       QT R                  5       Q$ r   )r7  r?  r  s   r   r  (FallbackKernel.codegen.<locals>.<lambda>c"  s"    F$++-F0C0C0EFr   )r  ztorch.JitTyper   r   )+rv
  r
  r   r  r
  r
  r   rs   r  r
  r
  r
  r   r'  r  r(  rF   r  custom_ops_to_c_shimsr
  rq  rn
  rF  rd  rt
  r  r   r
  r
  r
  r|  rq
  ,generate_fallback_kernel_with_runtime_lookuprz  r	  r	  generate_fallback_kernelrU  r  rL  rQ  rW  r:  )	r  r
  r
  r
  r   	args_iterexported_argsr  r   s	   `      @@r   r
  FallbackKernel.codegen"  sz    !!!!!v%fejj&;&;<<Jd6lJ<ww""Lv;&;; KKa 15D--fejj&;&;<<Jd6lJ<WW   f11GGG % GG65::#8#899--7  ..t{{D<N<NOLD& "!??I ), ,	>>+C+CD, )D%
 	W%$$ ::<M**666##///@@''F   $$2G2G ,,T2$++v..))'2..w7,,W5))'2r   c           	         Sn U R                  5       n[        U R                  U R                  [        U R                  5       5      [        U R                  5       5      US9$ ! [         a     N[f = f)NFr  )rY  rl  rW  r  r
  r`   r  r	  )rc  rY  s     r   r^	  FallbackKernel.tensor_to_layoutr"  sj    		((*I MMLL%fkkm4%fmmo6
 	
  		s   A# #
A0/A0c                h  ^ ^^^^ [         R                  4nX;  a,  [        [        S   [        R
                  R                  5      nO
[        5       nU   T R                  " U/UQ70 UD6u  nnnn	n
SSS5        [        U[        R                  R                  5      (       a  [        W[        R                  5      (       a  SSKJnJnJnJn  SnU" UR&                  5      (       a  U" U5      nUc  U" U5      nUbz  [)        U" U5      5      S:X  ae  [+        UR,                  UR.                  / UR0                  Q/ UR3                  5       QS9n[5        U[7        W5      [7        W5      U[9        U5      US9$ [;        S W 5       5      mT R=                  UW5      mT(       dp  [        U[        R>                  R@                  RB                  5      (       d'  U[        RD                  RF                  RH                  L a  [        R,                  " S5      m[        U[        R                  R                  5      (       aF  [        R
                  RJ                  (       d'  T(       a   [L        RO                  UUTUWW	UW
TS	9	nUb  U$ Uc  T " [Q        TS
9UUWW	UW
S9mO!T(       d   S5       eT " [S        TS
9UUWW	UW
S9mSU UUUU4S jjmT" U/ 5      n[        U[6        [T        45      (       a	  UTl+        U$ [        U[X        5      (       a  [U        U5      Tl+        U$ U/Tl+        U$ ! , (       d  f       GN= f)z9Create an instance of FallbackKernel from an _OpOverloadsNr   )_is_functionalget_out_arg_nameslookup_manual_out_variantto_out_variantrE   r  r
  r  r	  )rU  rq  rn
  r   rq
  rv
  c              3  8   #    U  H  n[        U5      v   M     g 7fr   )r  rM
  s     r   r   (FallbackKernel.create.<locals>.<genexpr>"  s     !K{,s"3"3{r  r&  rz
  has_unaligned_inputr%  r   rz
  z"Not sure where to find device infoc                  >^ ^ [        T [        [        45      (       a/  [        T 5      " UUU 4S j[	        [        T 5      5       5       5      $ [        T [        5      (       a<  T R                  5        VVs0 s H  u  p#UT" UT[        T 5      U4/-   5      _M      snn$ [        T [        R                  5      (       a}  [        TR                  T 5      T	T5      n[        R                  (       d  T(       d  [        T 5      (       d3  [        R                   R"                  R%                  UR&                  5        U$ [        T [(        5      (       a  T $ [        T [        R*                  5      (       a  T R,                  R.                  $ [        T [        R0                  R2                  [4        45      (       d  [7        T 5      (       a  [9        [;        TS9T	TT 5      $ T b   S[        T 5       S35       eg s  snnf )Nc              3  Z   >#    U  H   nT" TU   T[        T5      U4/-   5      v   M"     g 7fr   )r   )r   r   generate_outputr  rc  s     r   r   AFallbackKernel.create.<locals>.generate_output.<locals>.<genexpr>"  s7      $/ $F1Iw4<:K9L/LMM/s   (+r%  zFallbackKernel output type z is not supported)r   r   r   r   r   r   r   r  r  r[	  rR	  r^	  rF    assume_unaligned_fallback_outputrp   rs   r  r  r  r   r   r  r   rT  r  rP  r(   r*   r   r  )
rc  r  r  rY  r  r1  r  r  r  packeds
   ``   r   r  .FallbackKernel.create.<locals>.generate_output"  s   &4-00F| $"3v;/$   FD)) %+LLN$2 g$v,9L8M.MNN$2  FELL11!((0 ;;*,V44GG--11#((;
FC((FELL11{{'''..0@A  (((f-	  ~ 1$v,?PQ~ Es   6%G3)rc  r   r  zlist[tuple[Any, int]]r   r   )-r
  *_fused_moving_avg_obs_fq_helper_functionalr   r	   rs   r  r9  r
   r
  r   r  r
  r
  r[	  torch._library._out_variantr  r  r  r  r
  r   rW  r  r
  rG  r	  rs  r   _make_out_variant_kernel_namer  rN  r
  rs  rt  rq   higher_orderprintr
  ExternKernelMultiOut
try_creater  r$	  r   r	  r   )r1  r
  r   r   fake_incorrect_kernelscontextr
  r
  r
  r
  rz
  r  r  r  r  out_oprU  
out_resultr	  r  r  r  r  s   `                  @@@@r   r  FallbackKernel.create"  s    #'"Q"Q!S/1$79J9JKG!mG ""6;D;F;!  fejj3344ELL:
 :
  Ffnn--'/~26:!c*;F*C&D&I$)00(..0>//05^2245	 '!,"&"7!'DV'L &  "!K{!KKn= vu66@@NNOO//555\\%(F vuzz4455GG''-88"3$7 9 
J %!!!&)"3F ???6!0"3F)	 )	V "."5ge}--$FN  &&"7^FN  &YFNk Ws   L""
L1)r  r   r  rv
  rz
  r
  r(  r   rU  r#  r
  r   r
  rm	  r.  r0	  r
  r	  r   rm  rz
  )dict[sympy.Symbol, pytree.KeyPath] | Noner   r   r  rn  r  rq  )r
  zSequence[torch.Tensor] | Noner
  r0	  r   r   r  r  )rc  r  r   rW  )r
  r   r   r   r   r   r   r]	  )r   r   r   r   r  r  rg  r:  r  r7  r  rN  r  r  r  r|  r   r
  r^	  rl  r  r   r  r  s   @r   r]	  r]	     s    )-u4 HLu4u4 u4 &	u4
 &u4 +u4 &u4 Eu4 
u4 u4n


 0 #2#DQ#	# #J+$*#w(r S3 S3j 
 
 ] ]r   r]	  c                  l   ^  \ rS rSrSrS	S jrS
S jrSSS.               SU 4S jjjrSrU =r	$ )ComplexViewi##  z9View a complex number as two dtyped numbers or vice versac                    gr'  r   r  s    r   rZ  ComplexView.should_allocate'#  r*  r   c                &    U R                  S5      /$ r  rW  r  s    r   r  (ComplexView.get_inputs_that_alias_output*#  s    "##r   Nr  c          
     ,   > [         TU ]  UUUUUUUS9  g )Nr  )r  r  	r  rU  r
  r
  r.  r
  r   rz
  r  s	           r   r  ComplexView.__init__.#  s,     	/ 	 	
r   r   r  r  r  )
r   r   r   r   r  rZ  r  r  r   r  r  s   @r   r  r  ##  sq    C$ )-GK

 
 &	

 &
 +
 &
 E
 

 
r   r  c                  "    \ rS rSrSrSS jrSrg)MemoryCheckKerneliD#  z
Custom kernel for memory checking that generates direct function calls

TODO - the custom op was erroring with str inputs. should be able to custom op directly.
c                    UR                  5         U R                  u  p#n[        U5      n[        U5      nU(       a  UR                  S5        SU SU SU S3nO	SU SU S3nUR                  U5        g)z.Override codegen to write direct function callzV# note: dont currently distinguish between buffers returned and dealloc'd in last stepzcheck_memory_step(allocated=z, freed=z, is_final_step=r"  N)rV  rn
  rQ  r  )r  r
  
alive_list	dead_listis_final_step
alive_repr	dead_reprcalls           r   r
  MemoryCheckKernel.codegenK#  s     	224/3/A/A,
}*%
O	h 2*Xi[P`an`oopqD1*Xi[PQRD$r   r   Nrn  )r   r   r   r   r  r
  r   r   r   r   r  r  D#  s     r   r  c                  *    \ rS rSr% S\S'   SS jrSrg)r$	  i]#  r  r  c                    U R                   $ r   r%  r  s    r   r  MultiOutputLayout.get_devicea#  r'  r   r   Nr  )r   r   r   r   r   r  r   r   r   r   r$	  r$	  ]#  s    r   r$	  c                     ^  \ rS rSrS	S jr S
         SU 4S jjjr\" S 5       S
   SS jj5       rSS jrSS jr	SS jr
SrU =r$ )rR	  ie#  c                    UR                  U 5        U R                  (       d#  U R                  U5        U R                  U5        g g r   )codegen_multi_output!skip_size_stride_alignment_checksrL  rQ  r
  s     r   r
  MultiOutput.codegenf#  s:    $$T*55%%g.**73 6r   c                   > [         TU ]  S X/S5        [        R                  R	                  U 5      U l        [        R                  R                  U 5        X0l        X@l        g r  )	r  r  rs   r  r	  r   r	  r  r  )r  rU  r&
  r  r  r  s        r   r  MultiOutput.__init__l#  sK     	vw3GG++D1		""4(1R.r   c                z    U R                   S   n[        U[        5      (       d   U5       eUR                  U5      $ r  )rq  r   r   rv  )r  rp  r/  s      r   rv   MultiOutput.get_free_symbol_usesy#  s:     [[^
*f--9z9-..}==r   c                z    [        U R                  5      S:H  =(       a    [        U R                  S   [        5      $ )NrE   r   )r   rq  r   r	  r  s    r   rZ  MultiOutput.should_allocate#  s0    4;;1$ 
t{{1~'89	
r   c                    U R                    Vs/ s HI  n[        U[        5      (       d  M  [        UR	                  5       5      S:  d  M9  UR                  5       PMK     sn$ s  snf r  )rq  r   r]	  r   r  rz  r  s     r   r  (MultiOutput.get_inputs_that_alias_output#  s\     {{
"#~.  C4467!; CLLN"
 	
 
s   A"A"A"c                |  ^^ [        5       nU R                   HM  n[        U[        5      (       d  M  UR	                  [
        R                  " UR                  5       5      5        MO     U R                  5       mU R                  5       R                  5       mSUU4S jjnU R                  5       n[        R                  (       + =(       d%    US L =(       d    [        UR                  5      (       + n[
        R                  " X0R!                  5       SUS9n[
        R"                  " UUR$                  [        5       S9$ )Nc                b   > [        U5      S:X  d   e[        R                  " TT" U 5      S5      $ r7	  r9	  r:	  s     r   r-	  *MultiOutput.get_read_writes.<locals>.dummy#  r<	  r   r   r'	  r  r/	  )r>   rq  r   r   r  rG   r  rz  r  r1  r  rF   r  rj   r   rP   r  r  r  )	r  rk  rs  r-	  r  should_normalizewrite_rwr  r   s	          @@r   rg  MultiOutput.get_read_writes#  s     /9l;;C#v&&		,..s||~>?  }}//#002	; 	; "111 '~'&++&& 	
  33==?B2B
 &&??"
 	
r   )r  r   r  rn  r  )
rU  r#  r&
  r   r  list[tuple[Any, ...]]r  r   r   r   r  r  r  r  )r   r   r   r   r
  r  r^   rv  rZ  r  rg  r   r  r  s   @r   rR	  rR	  e#  s    4 38SS S '	S
 ,0S 
S S M*$)>!>	!> +>


 
  
r   rR	  c                  l   ^  \ rS rSrSr          SU 4S jjr\S	S j5       rS
S jrSS jr	Sr
U =r$ )r   i#  zMultiOutput for opaque objects.c                0   > [         TU ]  XUSS9  X@l        g )NTr  )r  r  r
  )r  rU  r&
  r  opaque_valuer  s        r   r  OpaqueMultiOutput.__init__#  s      	SWX$0!r   c                    [        S5      e)NzOpaqueMultiOutput has no dtype)r  r  s    r   r
  OpaqueMultiOutput.dtype#  s    =>>r   c                    U $ r   r   r  s    r   r  #OpaqueMultiOutput.wrap_for_lowering#  r  r   c                h   [        5       nU R                   HM  n[        U[        5      (       d  M  UR	                  [
        R                  " UR                  5       5      5        MO     [        [
        R                  " U R                  5       5      /5      n[
        R                  " UU[        5       S9$ r=  )	r>   rq  r   r   r  rG   r  rz  r  )r  rk  rs  r  s       r   rg  !OpaqueMultiOutput.get_read_writes#  s    .8l;;C#v&&		,..s||~>?  0:!!$--/230
 &&"
 	
r   )r
  )
rU  r#  r&
  r   r  r  r  r   r   r   )r   r   )r   r   r  )r   r   r   r   r  r  r  r
  r  rg  r   r  r  s   @r   r   r   #  s^    )11 1 '	1
 1 
1 ? ?
 
r   r   c                  ,    \ rS rSrSrSS jrSS jrSrg)	AllocatingMultiOutputi#  zMultiOutput with Inductor-controlled allocation for .out() variant ops.

Overrides should_allocate()=True so Inductor allocates the output buffer,
and skips tuple-indexing codegen since .out() writes directly into these buffers.
c                    gr  r   r  s    r   rZ  %AllocatingMultiOutput.should_allocate#  r  r   c                l    U R                   (       d#  U R                  U5        U R                  U5        g g r   )r  rL  rQ  r
  s     r   r
  AllocatingMultiOutput.codegen#  s,    55%%g.**73 6r   r   Nr  rn  )r   r   r   r   r  rZ  r
  r   r   r   r   r  r  #  s    4r   r  c                    U R                   nU R                  R                  R                  S5      S   nU R                  nSU SU SU 3$ )z8Build fully-qualified kernel name for an out-variant op.z::rE   z
torch.ops.r
  )r
  r
  r   r  r
  )r  nsrF  r   s       r   r  r  #  sP    			Bnn!!''-a0G##Ht1WIQxj11r   c                     ^  \ rS rSr% SrS\S'   S\S'             SU 4S jjrSS jr\S	S
S.                   SS jj5       r	Sr
U =r$ )r  i#  zMulti-output .out() variant lowering.

Subclass of FallbackKernel that emits .out() calls with pre-allocated
output buffers. Uses AllocatingMultiOutput child nodes for each output.
r   out_arg_nameszlist[AllocatingMultiOutput]out_variant_output_nodesc               l   > [         TU ]  " U0 UD6  X l        / U l        [	        U5      U l        Xl        g r   )r  r  r  r  r  rq
  rv
  )r  r  r  r   r   r  s        r   r  ExternKernelMultiOut.__init__#  s8     	$)&)*(*%"?"G!r   c                H    U R                  U5        UR                  U 5        g r   )r
   generate_extern_kernel_multi_outr
  s     r   r
  ExternKernelMultiOut.codegen$  s    W%006r   NFr  c               X   SSK Jn
JnJn  U
" UR                  5      (       d  g[        U[        [        45      (       d  gU" U5      nUc  gU" U5      n[        S U 5       5      (       d  g[        U5      [        U5      :w  a  gU " [        US9UUUUUUUUS9	n/ n[        U5       H  u  nn[        UR                  UR                  / UR                  Q/ UR!                  5       QS9n[#        UU[%        U5      U4/S9n[&        R(                  (       d  U	(       d  [+        U5      (       d3  [,        R.                  R0                  R3                  UR4                  5        UR7                  U5        M     UUl        UUl        [        U[        5      (       a  [        U5      $ [        U5      $ )	zGCreate an ExternKernelMultiOut if the op has a matching .out() variant.r   )r  r  r  Nc              3  V   #    U  H  n[        U[        R                  5      v   M!     g 7fr   )r   r  r[	  rN  s     r   r   2ExternKernelMultiOut.try_create.<locals>.<genexpr>'$  s     G1:a..s   ')r%  )r   rz
  r  r  r  )rU  r&
  r  )r  r  r  r  r
  r   r   r   r   r   r$	  r   rW  r  r
  rG  r	  r  r   rF   r  rp   rs   r  r  r  r   r  r  r	  )r1  r
  r
  r  r
  r
  r
  r   rz
  r  r  r  r  r  r  r  r	  r   
tensor_outrU  	multi_outs                        r   r  ExternKernelMultiOut.try_create$  s   	
 	
 fnn--.5$-88'>)&1GGGG~#m"44V,/'

 02&~6MAz !(( &&(z''(-**,-	F .~.23I 77&(44))--inn=NN9%% 7( +2' ne,,>!G}r   )rv
  r  r  rq
  )
r   r   r  r  r  r   r   r   r   r   rn  )r
  r  r
  r   r  r  r
  rm	  r
  r0	  r
  r	  r   rm  rz
  r  r  r   r   z&Sequence[AllocatingMultiOutput] | None)r   r   r   r   r  r   r  r
  rl  r  r   r  r  s   @r   r  r  #  s     99"" &" !	"
 " 
"7  HL$)J%J J 	J
 &J 'J +J &J EJ "J 
0J Jr   r  c                     \ rS rSr% SrS\S'   S.S jrS/S jrS0S jrS1S jr	S2S	 jr
S3S
 jrS4S5S jjrS6S jrS7S jrS8S jrS.S jrS7S jr S9     S:S jjrS;S jrS<S jr S9     S=S jjrS>S jrS?S jrS@S jrSAS jrSBS jrSCS jrS.S jrS.S jrSDS jrSES jrS3S  jr SES! jr!SBS" jr"\#" S 5       S9   SFS# jj5       r$SGS$ jr%SHS% jr&S4SIS& jjr'\(SJS' j5       r)SKS( jr*SJS) jr+SCS* jr,\(SLS+ j5       r-S3S, jr.\.r/S-r0g)Mr  iW$  z;
TensorBox / StorageBox allow in-place mutation of Tensors
r   rT  c                6    U R                   R                  5       $ r   r  r  s    r   r(  !MutableBox.has_exceeded_max_reads_$  r  r   c                6    U R                   R                  5       $ r   r  r  s    r   r  MutableBox.get_deviceb$  r  r   c                6    U R                   R                  5       $ r   r  r  s    r   r-  MutableBox.make_loadere$      yy$$&&r   c                6    U R                   R                  5       $ r   )rT  r1  r  s    r   r1  MutableBox.make_indexerh$      yy%%''r   c                6    U R                   R                  5       $ r   )rT  r:  r  s    r   r:  MutableBox.get_stridek$  r  r   c                6    U R                   R                  5       $ r   r  r  s    r   rz  MutableBox.get_namen$  r  r   Nc                8    U R                   R                  U5      $ r   )rT  rC  rA  s     r   rC  MutableBox.has_large_inner_fnq$  s    yy++I66r   c                8    U R                   R                  U5      $ r   r  rF  s     r   rH  MutableBox.mark_reuset$  r  r   c                6    U R                   R                  5       $ r   r  r  s    r   rL  MutableBox.realize_hintw$  r  r   c                6    U R                   R                  5       $ r   )rT  rw  r  s    r   rw  MutableBox.unwrap_viewz$  r  r   c                6    U R                   R                  5       $ r   )rT  r>  r  s    r   r>  MutableBox.is_input_buffer}$      yy((**r   c                6    U R                   R                  5       $ r   )rT  rQ  r  s    r   rQ  MutableBox.freeze_layout$  s    yy&&((r   c                8    U R                   R                  X5      $ r   )rT  rV  rT  s      r   rV  *MutableBox.freeze_layout_with_stride_order$  s     yy88NNr   c                8    U R                   R                  U5      $ r   )rT  r[  rZ  s     r   r[  (MutableBox.freeze_layout_with_fill_order$  s    yy66u==r   c                8    U R                   R                  U5      $ r   )rT  r_  r^  s     r   r_  (MutableBox.freeze_layout_with_same_order$  s    yy66v>>r   c                8    U R                   R                  X5      $ r   )rT  rd  rb  s      r   rd  +MutableBox.freeze_layout_with_exact_strides$  s     yy99-WWr   c                6    U R                   R                  5       $ r   )rT  rg  r  s    r   rg  MutableBox.get_read_writes$  r  r   c                6    U R                   R                  5       $ r   r  r  s    r   r  MutableBox.get_reads$  r  r   c                6    U R                   R                  5       $ r   r  r  s    r   ro  MutableBox.num_reads$  r  r   c                6    U R                   R                  5       $ r   r  r  s    r   rr  MutableBox.get_storage_numel$  r  r   c                6    U R                   R                  5       $ r   r  r  s    r   ry  MutableBox.get_reduction_type$  r  r   c                6    U R                   R                  5       $ r   r  r  s    r   r|  MutableBox.get_reduction_size$  r  r   c                6    U R                   R                  5       $ r   r  r  s    r   r  MutableBox.is_extern$  r  r   c                6    U R                   R                  5       $ r   )rT  r  r  s    r   r  MutableBox.is_no_op$  r  r   c                8    U R                   R                  U5      $ r   r	  r#  s     r   r  MutableBox.constant_to_device$  s    yy++F33r   c                6    U R                   R                  5       $ r   )rT  r  r  s    r   r  MutableBox.get_mutation_names$  r  r   c                6    U R                   R                  5       $ r   )rT  r  r  s    r   r  MutableBox.get_operation_name$  r  r   c                6    U R                   R                  5       $ r   )rT  r  r  s    r   r  'MutableBox.get_inputs_that_alias_output$  s    yy5577r   c                6    U R                   R                  5       $ r   r  r  s    r   r  MutableBox.realize$  r  r   c                8    U R                   R                  U5      $ r   r  ru  s     r   rv  MutableBox.get_free_symbol_uses$  s     yy--m<<r   c                6    U R                   R                  5       $ r   r  r  s    r   r  MutableBox.get_read_names$  r  r   c                6    U R                   R                  5       $ r   )rT  r  r  s    r   r  MutableBox.get_defining_op$  r  r   c                8    U R                   R                  U5      $ r   )rT  r  r  s     r   r  MutableBox.codegen_reference$  s    yy**622r   c                6    U R                   R                  5       $ r   rT  r  r  s    r   rU  MutableBox.layout$  s     yy((**r   c                6    U R                   R                  5       $ r   r  r  s    r   r  MutableBox.get_layout$  r  r   c                6    U R                   R                  5       $ r   rD  r  s    r   r  MutableBox.get_output_spec$  r  r   c                6    U R                   R                  5       $ r   r  r  s    r   r  MutableBox.get_size$  r  r   c                .    U R                   R                  $ r   )rT  r
  r  s    r   r
  MutableBox.dtype$  s    yyr   c                ~   [        U R                  [        5      (       aQ  [        U 5      R                   S[        U R                  5      R                   S3nSnU R                  R                  nO&[        U 5      R                   S3nU R                  nSnU[        [        U5      5      U/nSR                  U5      $ )Nr  z))r"  r  )r   rT  r  r   r   r  r   r  )r  line0endlr  r  s        r   r!  MutableBox.__str__$  s    dii,,Dz**+1T$))_-E-E,FaHEDIINNEDz**+1-EIIED 3u:

 yyr   r   r  r  r  r  r  r  r   r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  )1r   r   r   r   r  r   r(  r  r-  r1  r:  rz  rC  rH  rL  rw  r>  rQ  rV  r[  r_  rd  rg  r  ro  rr  ry  r|  r  r  r  r  r  r  r  r^   rv  r  r  r  r  rU  r  r  r  r
  r!  rk  r   r   r   r   r  r  W$  sb    L2&'(&$7+('+) ;@O"O37O	O
>? HMX/X@DX	X
+%%-..%$4..8# L)$)=!=	!= *=
*+3 + +&+$   " Hr   r  c                  d    \ rS rSr\\SS j5       5       r\\SS j5       5       r\S	S j5       rSrg)
r   i$  c                    g r   r   rT  s    r   r  TensorBox.create$  s    FIr   c                    g r   r   rT  s    r   r  rU  $  s    +.r   c                X    [        U [        5      (       a  U $ [        [        U 5      5      $ r   )r   r   r   rx  rT  s    r   r  rU  $  s%    d122KD)**r   r   N)rT  r   r   r   )rT  r   r   r   )rT  r   )r   r   r   r   r   r  r  r   r   r   r   r   r   $  s@    I  I.  .+ +r   c                  r    \ rS rSrSrSS jrSS jrSS jrSS jrSS jr	SS jr
SS	 jrSS
 jrSS jrSrg)rx  i$  z/
StorageBox allow in-place mutation of Tensors
c                    [        U R                  [        [        45      (       a5  U R                  R	                  5       [
        R                  R                  ;   $ gr'  )r   rT  r  rZ  rz  rs   r  r=  r  s    r   r>  StorageBox.is_input_buffer%  s=    dii+!?@@99%%'177+?+???r   c                    [        U R                  [        5      =(       a5    U R                  R                  5       [        R
                  R                  ;   $ r   )r   rT  r  rz  rs   r  r
  r  s    r   r  StorageBox.is_module_buffer%  s9    tyy>3 :		""$(9(99	
r   c           
         [         R                  U R                  5      (       a  U R                  R                  5       $ [	        U R                  [
        [        [        [        45      (       d   [        U R                  5      5       eU R                  R                  5       nU R                  R                  5       nU R                  R                  5       nUc   e[        S [        UU R                  R                  5       U R                  R!                  5       SS9U R                  S9U l        ["        R$                  R'                  U R                  5      U R                  l        ["        R$                  R+                  U R                  5        U R,                  U R                  l        XR                  l        X R                  l        U R                  R                  R2                  U R                  l        U R                  R(                  $ )NF)r  r
  r  rY  r  )r   r  rT  rz  r   ru  r  r  rg  r   r  r  r  r  r  r  r  rs   r  r	  r   r	  r  r  r  r  )r  r  r  r  s       r   r  StorageBox.realize%  sl   ""499--99%%''$))iD$%GHH 	
$IIK
 	
H ii//1II++-	%%'!!!"!ii))+YY'')	 	
	 00;			""499- LL		 +		'		#yy~~88		yy~~r   c                    [        U R                  [        [        45      (       a:  U R                  R	                  5       R
                  S:  a  U R                  5         ggg)z<
Called on buffers we expect to be forced to realize later.
rE   N)r   rT  ru  r  rD  nontrivial_read_countr  r  s    r   rL  StorageBox.realize_hint,%  sI    
 tyy9i"899		**,BBQFLLN G :r   c                <   SSK Jn  U R                  5        Vs/ s H1  nU" U5      (       a  M  [        R                  R                  U5      PM3     nnU(       d  g[        U5      n[        U5      n[        U5      nXQ:  =(       a    XV-  S:  =(       a    Xg:H  $ s  snf )Nr   )is_nonfreeable_buffersFr   )	r
  rc  r  rs   r  get_dep_size_hintr  rM  r  )r  rB  rc  r  size_of_reads
total_sizemax_sizemin_sizes           r   $has_accumulated_enough_reads_by_size/StorageBox.has_accumulated_enough_reads_by_size6%  s    @ ~~'
')#. +AGG%%c*' 	 

 '
}%}%# %%*%$	

s
   B#Bc                2   [        U R                  [        5      =(       aw    U R                  5       [        R
                  :  =(       dO    U R                  5       =(       d8    [        R                  S L=(       a    U R                  [        R                  5      $ r   )	r   rT  ru  ro  rF   realize_acc_reads_thresholdrC   realize_acc_reads_size_thresholdri  r  s    r   r(  !StorageBox.has_exceeded_max_readsI%  sq    $))Y/ 	
NNvAAA &&( 77tC ==;;		
r   c                r  ^ US:  a  [        U R                  [        [        45      (       a  [	        U R                  5      (       a9  U R                  R                  5       mSS/n[        U4S jU 5       5      (       a  gU R                  5       [        R                  :  =(       d    U R                  5       $ g)zR
A heuristic to decide if we should realize a tensor
that is used multiple times.
rE   expsigmoidc              3  @   >#    U  H  oTR                   ;   v   M     g 7fr   )used_ops)r   r   opcounts     r   r   5StorageBox.should_realize_on_reuse.<locals>.<genexpr>_%  s     @iG,,,is   TF)r   rT  ru  r  r1  rD  r  ro  rF   realize_reads_thresholdrC  )r  rG  	heavy_opsrt  s      @r   should_realize_on_reuse"StorageBox.should_realize_on_reuseU%  s    
 19DII	9/EFFdii  ))446"I.	@i@@@ 6#A#AA -**, r   c                R    U R                  U5      (       a  U R                  5         g g r   )rx  r  rF  s     r   rH  StorageBox.mark_reuseg%  s!    ''..LLN /r   c                6    U R                   R                  5       $ r   r  r  s    r   ro  StorageBox.num_readsk%  r  r   rT  Nr  r  r  )rB  r   r   r   )rG  r   r   r   r  r  )r   r   r   r   r  r>  r  r  rL  ri  r(  rx  rH  ro  r   r   r   r   rx  rx  $  s4    

<
&

$%r   rx  c                  8    \ rS rSr% S\S'   S\S'   SrS\S'   S	rg)
Subgraphio%  r   r   r  graph_moduleNzGraphLowering | Noner  r   )r   r   r   r   r   r  r   r   r   r   r  r  o%  s    
I&&"&E&r   r  c                    U  Vs/ s H*  n[        U[        5      (       a  UR                  5       OUPM,     n n[        [	        S U  5       5      5      [        U 5      :  $ s  snf )Nc              3  8   #    U  H  n[        U5      v   M     g 7fr   )r\	  )r   r  s     r   r   '_has_aliased_buffers.<locals>.<genexpr>|%  s     ;7"V**7r  )r   rZ  rw  r   r>   )buffersr  s     r   _has_aliased_buffersr  v%  sd     F !+6? C CO  
 z;7;;<s7|KKs   1Ac                     ^  \ rS rSr% SrSrS\S'   SrS\S'   SrS\S'           SU 4S	 jjr	SS
 jr
\      SS j5       rSS jrSrU =r$ )InvokeSubgraphi%  z&
Ir node for the invoke_subgraph HOP.
NSubgraph | Noner  Sequence[IRNode] | Noneoperandsr	  c                   > [         TU ]  S UUS9  Xl        [        R                  R                  U 5      U l        [        R                  R                  U 5        g r|
  )r  r  r  rs   r  r	  r   r	  )r  r  r  rU  r  s       r   r  InvokeSubgraph.__init__%  sO     	 	 	

 !GG++D1		""4(r   c                B    U R                   (       a  U R                   /$ / $ r   )r  r  s    r   r  InvokeSubgraph.get_subgraphs%  s    "&--7R7r   c                8  ^ SSK Jn  [        R                  R                  nSnUR
                  R                  S5      =n(       at  SnUR                  [        R                  R                  R                  L a8  UR                  S   [        R                  R                  R                  L d   eSnUS   US nOSnUR                  [        R                  R                  R                  L a8  UR                  S   [        R                  R                  R                  L d   eSnUR                  US nU V	s/ s H  oR
                  S	   PM     nn	U V	s/ s H  oR                  U	5      PM     nn	/ n
[        U5       HR  u  p[!        U["        [$        [&        45      (       a  U
R)                  U5        M8  U
R)                  U" XU   5      5        MT     U
nUR                  cz  [        R                  R+                  UR,                  UUR.                  S
9Ul        [        R0                  " UR                  5         UR                  R2                  " U6   SSS5        UR                  R4                  nSnU H*  n[!        U["        5      (       a  M  UR7                  5       n  O   Uc   e[9        UU[;        US9S9m      SU4S jjn[        U5       VVs/ s H  u  nnU" UU5      PM     nnnUTl        U$ s  sn	f s  sn	f ! , (       d  f       N= fs  snnf )z|For each operand, get a realized input, force it to have the same
strides as the subgraph inputs, then use an InvokeSubgraphrE   )constrain_to_fake_tensorNeager_input_valsr   r   r   r   rY  rk  r  r  r%  )r  r  rU  c                Z  > [        U [        [        45      (       a  U $ U R                  5       nUc   e[	        [        UU R                  5       U R                  5       U R                  5       U R                  5       R                  U R                  5       R                  S9T[        U4/SS9$ )Nr  Tr  )r   r   ru  r  rR	  rW  r  r  r:  r  rX  rY  r   )rc  indr  invoke_subgraphs      r   create_output,InvokeSubgraph.create.<locals>.create_output%  s     &#8:N"OPP**,)))"%$..0#__.%002%00299"("3"3"5"?"? $C[M6: r   )rc  r   r  r   r   z:ShapeAsConstantBuffer | NoneAsConstantBuffer | MultiOutput)r  r  rs   r  rR
  ri  r  r	  r  rq   r  r
  r   r  rS	  r   r   r   r
  r
  r  r  r  r   r  r  graph_outputsr  r  r$	  r	  )r1  r  r  r  rR
  fake_operandsr  rX  fx_operandsr   new_operandsr   operandr	  r  r  r   rc  outsr  s                      @r   r  InvokeSubgraph.create%  s    	7 ww+++00445GHHHF""eii&<&<&I&II#((+uyy/E/E/U/UUUU,Q/8MF""eii&<&<&I&II#((+uyy/E/E/U/UUUU '++FG4K4?@KqVVE]KM@ AI!I1"3"3A"6!I%'%h/LC/ARS  ##G,##,WC6HI 0  >>!WW22((,&mm 3 HN
 $$X^^4""M2 5 .... Gg'<== ++-   !!!($F3
		!$	G	. ;DG:LM:LYQfa(:LM"&S A "J. 54T Ns   8K;L <LL
Lc                &    UR                  U 5        g r   )codegen_invoke_subgraphr
  s     r   r
  InvokeSubgraph.codegen&  r  r   )r   r  )r  r  r  rm	  rU  r$	  r   r   r  )r  r  r  r   r   z@list[ShapeAsConstantBuffer | NoneAsConstantBuffer | MultiOutput]rn  )r   r   r   r   r  r  r   r  r	  r  r  rl  r  r
  r   r  r  s   @r   r  r  %  s     !%Ho$(,H%,'+G$+
) 
),<
)FW
)	
)8 ff,2f	If fP. .r   r  c                     ^  \ rS rSr% SrSrS\S'   SrS\S'   SrS\S	'   Sr	S\S
'   Sr
S\S'                 SU 4S jjrSS jr\SS j5       r\          SS j5       rSS jrSS jrSrU =r$ )Conditionali&  a  
IR node representing torch.cond

Attributes:
    predicate: A boolean scalar tensor determining which branch to execute.
    operands: Input tensors passed to both true and false subgraphs.
    true_subgraph: Subgraph executed when predicate is True.
    false_subgraph: Subgraph executed when predicate is False.
    outputs: MultiOutput nodes representing the conditional's outputs.
Nr  	predicater  r  r  true_subgraphfalse_subgraphSequence[MultiOutput] | Noner	  c                  > Xl         X l        X0l        X@l        [	        U/UQ5      u  px[
        T	U ]  S UUUS9  Ub  X`l        [        R                  R                  U 5      U l        [        R                  R                  U 5        g N)r   rU  rq  rn
  )r  r  r  r  _split_by_sym_typer  r  rz
  rs   r  r	  r   r	  )
r  r  r  r  r  rU  rz
  sym_argsr
  r  s
            r   r  Conditional.__init__&  s     # *, 2I3I3I J"	 	 	
 (%6"GG++D1		""4(r   c                    / nU R                   (       a  UR                  U R                   5        U R                  (       a  UR                  U R                  5        U$ r   )r  r  r  r  	subgraphss     r   r  Conditional.get_subgraphs4&  sG    	T//0T001r   c                \    [        U [        5      (       a  U $ U R                  R                  $ r   )r   r   r   rT  )r   s    r   _maybe_exprConditional._maybe_expr<&  s"    aHvv{{r   c                B   U R                  U5      nU Vs/ s H  oPR                  U5      PM     nn[        R                  R                  R                  S   n[        U[        5      (       d   [        U5      5       e/ nU HI  n[        U[        5      (       a   UR                  UR                  S   5        M8  UR                  U5        MK     [        R                  R                  R                  S   n	      SS jn
X#4 H  nUR                  b  M  [        R                  R                  UR                  UUR                  S9Ul        [        R                  " UR                  5         UR                  R                  " U6   U
" UR                  R                   U	5      UR                  l        SSS5        M     UR                  c   eUR                  c   eUR                  R                   nUR                  R                   nSU4SU44 H&  u  p[#        U5      (       d  M  [%        SU S	U 35      e   ['        U5      ['        U5      :X  d   X45       e[)        [+        X5      5       H  u  nu  nnUR-                  5       UR-                  5       :X  d
   UUU45       eUR/                  5       UR/                  5       :X  d
   UUU45       eUR1                  5       R2                  UR1                  5       R2                  :X  a  M   UUU45       e   [5        S
 XA/-    5       5      n[7        [        R                  R8                  R:                  [        R                  R                  R                  R=                  SS5      5      nUc   S5       e[?        UUUU[A        US9US9n[)        [+        U[        R                  R                  R                  S   5      5       VVVVs/ s H  u  nu  nn[C        [E        UR-                  5       b  UR-                  5       OUUR/                  5       URG                  5        Vs/ s H  n[>        RI                  U5      PM     snURK                  5        Vs/ s H  n[>        RI                  U5      PM     snUR1                  5       R2                  UR1                  5       RL                  S9U[N        U4/5      PM     nnnnnUUl(        SSK)J*n  U" UR                  5      u      nnnU" UR                  5      u      nnn[W        U5      [W        U5      -  n[Y        U5       Vs/ s H   n[[        UU   R\                  UU   U5      PM"     snUl/        U$ s  snf ! , (       d  f       GM  = fs  snf s  snf s  snnnnf s  snf )zNCreate a Sequence of IRNodes from a conditional statement (see .lowering.cond)r8  rY  c           	         / n[        X5       Hh  u  p4[        U[        5      (       a  UR                  U5        M-  UR                  [        R                  [        U5      UR                  5       SS95        Mj     U$ NFr  )r   r   r   r  r  r  r   r	  )r  fake_tensorsretrc  r8	  s        r   _require_exact_strides2Conditional.create.<locals>._require_exact_strides^&  sm     C #M @f&;<<JJv&JJ$::%f-t{{}E ; 	 !A Jr   Nr  true_fnfalse_fnzVOutput aliasing is currently not supported in compiled torch.cond. The outputs of the z% subgraph of torch.cond are aliased: c              3  p   #    U  H,  n[        U[        5      (       a  M  UR                  5       v   M.     g 7fr   )r   r   r  )r   os     r   r   %Conditional.create.<locals>.<genexpr>&  s,      
+a!67 ALLNN+s   66rz
  zcannot determine devicer%  )r  r  r  r  rU  rz
  r  r   )-check_input_alias_and_mutation_return_outputs)r  rm	  r  zSequence[torch.Tensor]r   r   )0rS	  rs   r  rR
  r   r   r   r   r<   r  ri  r  r  r   r  r  r  r  r(  r   r   r   r  r  r  rX  rl  r9   r  r   r  r  r$	  rR	  rW  r  r  r	  rY  r   r	  torch._higher_order_ops.utilsr  r>   r  r	  rU  r	  )r1  r  r  r  r  r   r  r  fx_opfake_outputsr  r  true_outputsfalse_outputsr   r	  r   t_of_or  rz
  conditionalrc  merged_outputr  r  r   true_mutated_inputsfalse_mutated_inputsmutated_operand_indicesr   s                                  r   r  Conditional.createB&  sP    %%i0	2:;(Q%%a((; ! 4 4 9 9" =+x00C${2CC0 $& E%&&$$UZZ%67 $$U+ ! ww++007	+	0	 	$ !+H~~%!"!6!6,,#0"*-- "7 "
 ((8NN&&6 4J 44l4HNN0 98 ," }}(((~~)))}}22 44(,7*m9TUMD#L11$**./TU\T]_  V < C$66U8UU6&s<'GHMAzS>>#s~~'77F!S#F7==?cmmo5D3}D5>>#**cnn.>.E.EET3PS}TE I  
+
 

 6GG&&GG  %%))*=tD
 !<#<<!!!#$F3/
: /8L!''"6"6";";E"BC/)
(/**FM'  ((*6 ",,. **,@M@R@R@TU@T"+11"5@TU>K>R>R>T>T//3>T ",,.55$//1;; &/) 	 
2 &	

 :':N:NO 	*Aq%q :(:O:OP 	+Aq& #--@"AJ E
 #
 56(
6 8C=//#L6(
$
 [ <T 98v V
V(
s>   U2.AU7(AV
V
 V
4VAV
'V7
V	

V
c           	         UR                  U 5        UR                  U R                  5       U R                  [	        U S0 5      5        g r8  )codegen_conditionalr9  rz  r	  r   r
  s     r   r
  Conditional.codegen&  s9    ##D)88MMOT\\749Lb+Q	
r   c                    [        U SS 5      =n(       aL  [        [        R                  R                  R
                  U5      nUc   e[        UR                  5       5      $ [        5       $ r8  r=  r>  s      r   r  $Conditional.get_unbacked_symbol_defs&  rA  r   )r  r   r  r  r  rz
  )r  r   r  rm	  r  r  r  r  rU  r$	  rz
  r  r   r   r  )r   zint | torch.SymIntr   zint | sympy.Expr)
r  r   r  r  r  r  r  rh	  r   zlist[MultiOutput]rn  r  )r   r   r   r   r  r  r   r  r  r  r	  r  r  r  r  rl  r  r
  r  r   r  r  s   @r   r  r  &  s    	  $I}#(,H%,%)M?)&*NO*,0G)0)) #)  	)
 !) ") E) 
)8  
 XX X 	X
 "X 
X Xt
   r   r  c                    / n/ nU  HF  n[        U[        5      (       a  UR                  UR                  5        M5  UR                  U5        MH     X!4$ r   )r   r   r  rT  )r   non_sym_argsr  rN
  s       r   r  r  &  sO     LHc011OOCHH%$	  !!r   c                  
  ^  \ rS rSr% SrSrS\S'   SrS\S'   SrS\S'   Sr	S\S	'   Sr
S
\S'                   SU 4S jjrSS jr\SS j5       r\SS j5       r\            SS j5       rSS jrSS jrSrU =r$ )	WhileLoopi&  zSThe IR node for while_loop and while_loop_stack_output. It supports input mutation.Nr  carried_inputsadditional_inputsr  cond_subgraphbody_subgraphr  r	  c                  > Xl         X l        X0l        X@l        [	        / UQUQ5      u  p[
        T
U ]  S UU	US9  Ub  X`l        Xpl        [        R                  R                  U 5      U l        [        R                  R                  U 5        g r  )r  r  r  r  r  r  r  rz
  stack_outputrs   r  r	  r   r	  )r  r  r  r  r  rU  rz
  r  r  r
  r  s             r   r  WhileLoop.__init__'  s     -!2** 21n101!
 	"	 	 	
 (%6"(GG++D1		""4(r   c                    / nU R                   (       a  UR                  U R                   5        U R                  (       a  UR                  U R                  5        U$ r   )r  r  r  r  s     r   r  WhileLoop.get_subgraphs%'  sG    	T//0T//0r   c                   [        U 5      (       d  U $ U  Vs/ s H*  n[        U[        5      (       a  UR                  5       OUPM,     nn[	        5       n/ n[        X5       He  u  pV[        U5      U;   a&  UR                  [        R                  U5      5        M:  UR                  [        U5      5        UR                  U5        Mg     U$ s  snf r   )r  r   rZ  rw  r>   r   r\	  r  r  r
  r  )r  r  unwrapped_buffersseen_buffersr  original_inputunwrapped_buffers          r   _clone_aliased_inputsWhileLoop._clone_aliased_inputs0'  s    #N33!! )
( %/v$G$GF VS( 	 
 )3+-03N0V,N"#|3l55nEF  $4!56n- 1W !
s   1Cc                    [        U [        5      (       a  U $ [        U [        [        45      (       a  [        U 5      $ [        U [        5      (       a  [        R                  U 5      $ [        S[        U 5       35      e)NzNYI unsupported output type: )r   r   rx  rZ  rR	  r  rl  r   )r   s    r   _maybe_wrap_as_tensor_box#WhileLoop._maybe_wrap_as_tensor_boxJ'  se    c9%%Jj/:;;S>![))##C((!>tCykJKKr   c                   SSK Jn        SS jn[        R                  R                  R
                  S   n[        R                  R                  R
                  S   n	X-   n
U
 Vs/ s H  oR                  S   PM     nnU Vs/ s H  oR                  S   PM     nnU	 Vs/ s H  oR                  S   PM     nnU Vs/ s H  oR                  U5      PM     nn[        R                  U5      nU" X5      nU Vs/ s H  oR                  U5      PM     nnU" UU5      nUU-   nX4 GH  nUR                  b  M  [        U
[        5      (       d   [        U
5      5       e[        R                  R                  UR                  U
UR                  S9Ul        [        R                   " UR                  5         UR                  R"                  " U6   UUL aZ  [%        UR                  R&                  5      [%        U5      :X  d   eU" UR                  R&                  U5      UR                  l        SSS5        GM     UR                  (       a  UR                  (       d   eUR                  R&                  nUR                  R&                  n[)        U5      (       a  [+        S	U 35      e[%        U5      S
:X  d   U5       eUS   n[        U[,        5      (       dM  UR/                  5       [0        R2                  :X  d   U5       e[%        UR5                  5       5      S:X  d   U5       e[%        U5      S:  d   S5       eUS   R7                  5       nUc   e[%        U5      [%        U5      :X  d	   UU45       e[9        [;        UU5      5       H  u  nu  nn      SS jnU" UR5                  5       UR5                  5       5        U" UR=                  5       UR=                  5       5        UR7                  5       UR7                  5       :X  d   UUUU45       eUR/                  5       UR/                  5       :X  a  M   UUU45       e   Uc   e[?        [        R                  R@                  RB                  [        R                  R                  R                  RE                  SS5      5      n[        UUUU[G        US9UUS9nUR                  b=  [        UR                  RH                  [0        RJ                  RL                  5      (       d   eU" UR                  RH                  U5      S   n[O        U5      nU Vs/ s H  nUU   PM
     n n[Q        U 5      n!/ n"/ Ul)        / Ul*        U(       Ga  [%        U5      S:X  d   S5       e[9        [        R                  R                  R                  S   5       H  u  nn#[W        [Y        U#RZ                  U#R\                  U#R_                  5        V$s/ s H  n$[`        Rc                  U$5      PM     sn$U#Re                  5        V%s/ s H  n%[`        Rc                  U%5      PM     sn%S9U[f        U4/5      n&URR                  Ri                  U&5        U"Ri                  U&5        M     GO[9        U5       GH  u  nn#UU;   ad  U[%        U5      :  d   S5       e[k        U!5      n'URT                  Ri                  [m        U'Rn                  U'U5      5        U"Ri                  U'5        Mq  [W        [Y        U#R7                  5       U#R/                  5       U#R5                  5       U#R=                  5       U#Rq                  5       Rr                  S9U[f        U4/5      n&URR                  Ri                  U&5        U"Ri                  U&5        GM     [;        UU"5       Hk  u  n(n)U(Ru                  5       [        R                  Rv                  ;   d  M4  [        R                  Rx                  R{                  U)Ru                  5       5        Mm     U"$ s  snf s  snf s  snf s  snf s  snf ! , (       d  f       GM   = fs  snf s  sn$f s  sn%f )zcreate the while_loop IR node. stack_output controls whether it stack
each iterations' output, which is necessary for training.
r   )check_input_alias_and_mutationc           	     V   [        U 5      [        U5      :X  d   e/ n[        X5       H}  u  p4[        U[        R                  5      (       aH  [
        R                  U5      nUR                  [        R                  XTR                  5       SS95        Ml  UR                  U5        M     U$ r  )r   r   r   r  r[	  r  r  r  r  r  r	  )tensor_boxesr  r  r2  fknew_tbs         r   r  0WhileLoop.create.<locals>._require_exact_stridesc'  s     |$L(9999Cl9b%,,// '@@DFJJ$::"IIKu ;  JJrN) :* Jr   r8  rY  Nr  zOutput aliasing is currently not supported in compiled torch.while_loop. The outputs of the body_fn subgraph of torch.while_loop are aliased: rE   z9torch.while_loop is assumed to have at least one operand.c                    [        U 5      [        U5      :X  d   e[        X5       H.  u  p#[        R                  R                  R                  X#5        M0     g r   )r   r   rs   r  r  r^  )	lhs_exprs	rhs_exprslhsrhss       r   _guard_list_equals,WhileLoop.create.<locals>._guard_list_equals'  sC     9~Y777 #I 9HCGG$$11#; !:r   rz
  r%  )r  r  r  r  rU  rz
  r  r   z-NYI: while_loop_stack_output input mutations.r  zonly carries can be mutated.)r  r
  r  r	  rX  )r  rm	  r  z'list[int | torch.SymInt | torch.Tensor]r   r   )r  Sequence[int | sympy.Expr]r  r  r   r   )>r  r  rs   r  rR
  r   ri  rS	  r  r  r   r   r   r  r  r   r  r  r   r  r  r(  r   r  r  r   r  r  r   r   r:  r9   r  r   r  r$	  modulefxGraphModuler>   rm  r	  r	  rR	  rW  r  r
  r  r  r  r	  r   r  rl  r	  rU  r  rX  rz  r=  r  r  )*r1  cond_fnbody_fnr  r  r  r  r  fx_carried_inputsfx_additional_inputsfx_all_inputsr   fake_all_inputsfake_carried_inputsfake_additional_inputscarried_inputs_additional_inputs_
all_inputsr  cond_outputsbody_outputsr  r  r   re  bor  rz
  
while_loopmutated_idxsmutated_idx_setr   r	  mutated_inputs_iterall_outputsrc  r  r  r  mutated_inputrs  r   s*                                             r   r  WhileLoop.createU'  s    	Q	*	A	 	: GG0055b9 ww3388<)@2?@-Q66%=-@6GH6Gvve}6GH9M!N9MA&&-9M!N9GHA,,Q/H#99/J0V<MN<Mq//2<MN3 6
 %'99
 *H~~%!-::OD<OO:!"!6!6,,#0"*-- "7 "
 ((8NN&&8  7*"8>>#?#?@C/E      8N$NN88/84 98 +4 }}..}}22}}22-- XXdWeg  < A%3|3%O!233;;=EJJ.11.qzz|$),1,):" 	
G	
" A))+!!!?#s<'88 	
;
 	
8 %S,%GHKAxB<5<5< < r{{}bkkm<r}}@ ==?bmmo5J2r67JJ5<<>R\\^3@aR[@3 I" !!!5GG&&GG  %%))*=tD

 *0!!$F3/%	

 }}(ZMM  %(("6"6.
 .
 	
 

 6MM  /

 %\25DE_c*S/_E #>2$&
&(
#'1, ?,  ))=)=)B)B5)IJV'%}}$llDJKKMRMbk55b9MRFLmmoVo 7 7 ;oV	 C[M		 "")))4""9-  K  )6V/)^!44T6TT4$()<$=M//66&}';';]JW  &&}5 +##)#4#4#6"("2"2"4!'!2#)#4#4#6#)#4#4#6#=#= #
!I &&--i8&&y1-  70 NK8HC||~!5!55 ++//? 9 U AH!NH O 98t F" SVs=   (`#`($`-`2?`7<A9`<a4a'a<
a	c           	         UR                  X R                  5        UR                  U R                  5       U R                  [        U S0 5      5        g r8  )codegen_while_loopr  r9  rz  r	  r   r
  s     r   r
  WhileLoop.codegen/(  s?    ""4):):;88MMOT\\749Lb+Q	
r   c                    [        U SS 5      =n(       aL  [        [        R                  R                  R
                  U5      nUc   e[        UR                  5       5      $ [        5       $ r8  r=  r>  s      r   r  "WhileLoop.get_unbacked_symbol_defs5(  rA  r   )r  r  r  r  r   r  rz
  )r  rm	  r  rm	  r  r  r  r  rU  r$	  rz
  r  r  r   r   r   r  )r  rm	  r   rm	  )r   r   r   r   )r  r  r  r  r  rm	  r  rm	  r  r   r   rp  rn  r  )r   r   r   r   r  r  r   r  r  r  r	  r  r  r  r  r  rl  r  r
  r  r   r  r  s   @r   r  r  &  s%   ].2N+215.5%)M?)%)M?),0G)0)() ,)  	)
  ) ") E) ) 
)>  2 L L WW W )	W
 ,W W 
#W Wr
   r   r  c                  r   ^  \ rS rSr SSS.               S	U 4S jjjjrS
U 4S jjrSS jrSrU =r$ )r   i@(  Nr  c          
        > [         T
U ]  UUUUUS US9  SSKJn  U" U5      n	U	c   eXl        [
        R                  R                  R                  U	S 5      U l	        U [
        R                  R                  U	'   g )Nr  r   )_get_effect)
r  r  torch._higher_order_ops.effectsr  effect_typers   r  effectful_opsr  prev_effect_buffer)r  rU  r
  r
  r.  r
  r   rz
  r  r  r  s             r   r  EffectfulKernel.__init__A(  s     	/ 	 	
 	@!&)&&&&"#''"7"7";";K"N-1k*r   c                   > [         TU ]  5       nU R                  bG  UR                  R	                  [
        R                  " U R                  R                  5       5      5        U$ r   )r  rg  r  rk  r  rG   r  rz  )r  r  r  s     r   rg  EffectfulKernel.get_read_writes^(  sU    g-/"".!!$$T%<%<%E%E%GH r   c                    gr  r   r  s    r   r   EffectfulKernel.has_side_effectsh(  r  r   )r  r  r   r  r  r  )	r   r   r   r   r  rg  r  r   r  r  s   @r   r   r   @(  s|     )-2 HL22 2 &	2
 &2 +2 &2 E2 
2 2: r   r   c                  @    \ rS rSr\" S 5       S   SS jj5       rSrg)r
  il(  c                    [        5       $ r   r=   ru  s     r   rv  !NonTensorObj.get_free_symbol_usesm(  rT  r   r   Nr  r  )r   r   r   r   r^   rv  r   r   r   r   r
  r
  l(  s,    N+$)!	! ,r   r
  c                  `    \ rS rSr% S\S'   S\S'   SS jrSSS jjrSS	 jrSS
 jrSS jr	Sr
g)r3
  it(  r   r   %FakeScriptObject | torch.ScriptObjectr  c                    U R                   $ r   r  r  s    r   rz  TorchBindObject.get_namey(  r>  r   Nc                    U R                   $ r   r  r  s     r   r  !TorchBindObject.codegen_reference|(  r>  r   c                    U R                   $ r   r  r  s    r   r
  TorchBindObject.get_value(  r  r   c                    [        U R                  [        R                  5      (       a  U R                  $ U R                  R                  $ r   )r   r  r  rP  real_objr  s    r   get_real_objTorchBindObject.get_real_obj(  s3    djj%"4"455::::&&&r   c                   U R                  5       n[        U5      (       a  g[        US5      (       d   e[        UR	                  5       5      n[
        R                  " U5      S   nU Vs/ s HE  n[        U[        R                  5      (       d  M$  UR                  5       UR                  5       -  PMG     nn[        R                  " [        R                  US5      $ s  snf )Nr   __obj_flatten__)r0  r*   r  r   r3  r
  r
  r   r  r[	  r  numelr"  rE  operatorr  )r  real_script_obj	flat_dict
flat_elemsr   
flat_sizess         r   get_buf_bytesTorchBindObject.get_buf_bytes(  s    ++-?++(9::::88:;	((3A6
  
!U\\* )ANNqwwy( 	 

 j!<<
s   +#C%Cr   r  r   r  )r   r'  )r   ztorch.ScriptObjectr  )r   r   r   r   r   rz  r  r
  r0  r:  r   r   r   r   r3
  r3
  t(  s&    
I00'=r   r3
  c                  <    \ rS rSr% SrS\S'   S	S jrS
SS jjrSrg)OpaqueValueTypeConstanti(  a  IR node for opaque value type constants that appear directly in graph outputs.

Unlike TorchBindObject (which references named constants loaded at runtime),
this inlines the value's repr into the generated code since value types are
reconstructed from their repr.
r   r  c                ,    [        U R                  5      $ r   )rQ  r  r  s    r   rz   OpaqueValueTypeConstant.get_name(  s    DJJr   Nc                    [        U R                  5      u  p#UR                  5        H"  u  pEU[        R                  R
                  U'   M$     U$ r   )r)   r  r  rs   r  opaque_value_type_classes)r  r  obj_repropaque_typesr9  r  s         r   r  )OpaqueValueTypeConstant.codegen_reference(  sA    !4TZZ!@ &&(DA34AGG--a0 )r   r   r  r   r  	r   r   r   r   r  r   rz  r  r   r   r   r   r=  r=  (  s     J  r   r=  c                  B    \ rS rSr% S\S'   S\S'   S
S jrSSS jjrS	rg)r
  i(  r   r   r  r  c                    U R                   $ r   r  r  s    r   rz  GeneratorState.get_name(  r>  r   Nc                    U R                   $ r   r  r  s     r   r   GeneratorState.codegen_reference(  r>  r   r   r  r   r  )r   r   r   r   r   rz  r  r   r   r   r   r
  r
  (  s    
I r   r
  c                  F    \ rS rSr% SrS\S'   S\S'   SS jrSSS	 jjrS
rg)r
  i(  z
Represents an opaque object (e.g., ProcessGroup) that is passed through
as a graph input. Similar to GeneratorState, this wraps the object with
its placeholder name so codegen can reference it properly.
r   r   r   r  c                    U R                   $ r   r  r  s    r   rz  OpaqueObjectState.get_name(  r>  r   Nc                    U R                   $ r   r  r  s     r   r  #OpaqueObjectState.codegen_reference(  r>  r   r   r  r   r  rE  r   r   r   r
  r
  (  s"     IJ r   r
  c                      \ rS rSrS	S jrS	S jrS
SS jjr\          SS j5       r\          SS j5       r	Sr
g)_CollectiveKerneli(  c                    gr'  r   r  s    r   rZ  !_CollectiveKernel.should_allocate(  r*  r   c                    gr  r   r  s    r   r  "_CollectiveKernel.has_side_effects(  r  r   Nc                n   [        U R                  5      [        R                  R                  L d   S5       eU R                  nUb  Xl        OUR                  R                  U l        UR                  R                   Vs/ s H!  o3R                  (       d  M  UR                  PM#     snU l
        g s  snf )Nz,Setting cpp kernel needs a valid op_overload)r   rv
  r  r
  r
  rr
  r
  r   r
  r
  rt
  )r  rr
  r
  r   s       r   r}
  %_CollectiveKernel.set_cpp_kernel_name(  s    D$$%)>)>> 	
:	
> !!&#2 #)>>#6#6D  #NN44.
4qFAFF4.
* .
s   B2B2c                   [         R                  R                     U R                  " X/UQ70 UD6u  nnnnn	S S S 5        W	(       a   U SU	 35       eW H@  n
U
R	                  5         [         R                  R                  U
R                  5       5        MB     US   R                  5       nU " [        US9UUWW5      n[        R                  " U5      nUR                  R                  U Vs/ s H  n[        [        US9X5      PM     sn5        UR                  R                  U Vs/ s H  oR                  5       PM     sn5        SU;   a]  UR                  R                  [        [        US9US   U5      5        UR                  R                  US   R                  5       5        g g ! , (       d  f       GN~= fs  snf s  snf )Nr!  r   r%  r   )rs   r  r9  r
  r  r  rz  r  r  r
  tree_leavesr	  r  r	  r  r  )r1  r
  rq  r   r   _example_outputr
  r
  r
  rz
  
tensor_argr  r  inpsr  rs  s                   r   create_inplace _CollectiveKernel.create_inplace(  s    WW ""6CDCFC!  %E2C1D&EE$%J GG''
(;(;(=> & Q**,f%
 !!&)&&OSTt^Jf5sCtT	

 	!!T"BTc<<>T"BCF?##**z8&-P %%fUm&<&<&>? ; 0 U #Cs   F/.G+G/
F>c           
     B   [         R                  R                     U R                  " X/UQ70 UD6u  nnnnn	S S S 5        W	(       a   U SU	 35       eW H*  n
[	        U
[
        5      (       a  M  U
R                  5         M,     [	        W[        5      (       a  U R                  Xe5      nUc   eU " [        US9UUWW5      n[        U5       VVs/ s H(  u  p[        U R                  U5      U[        U4/5      PM*     snnUl        [        UR                  U5       H_  u  p[        R                   (       d  [#        U5      (       a  M,  [         R                  R$                  R'                  UR(                  5        Ma     UR                  $ U " U R                  U5      UUWW5      n[        R                   (       d  [#        U5      (       d3  [         R                  R$                  R'                  UR(                  5        U/Ul        U$ ! , (       d  f       GN= fs  snnf )Nr  r%  )rs   r  r9  r
  r   r3
  r  r   rN  r$	  r   rR	  r^	  r	  r   rF   r  rp   r  r  r   )r1  r
  rq  r   r   r
  r
  r
  r
  rz
  r[  r  r  r   r[  r  s                   r   create_out_of_place%_CollectiveKernel.create_out_of_place,)  s    WW ""6CDCFC!  %F3D2E&FF$%Jj/::""$ & nd++__[AF%%%!0F "+>!: ";IA ((0AYK
 ";FN  #6>>>B::BSC C GG--11#((;	  C
 >>!$$^4F 66>O? ? ))--fkk:$XFNMe .s   H	/H	
H)rr
  rt
  r  r   ro  )
r
  r   rq  zIRNode | list[IRNode]r   r   r   r   r   r   )
r
  r   rq  zTensorBox | list[TensorBox]r   r   r   r   r   z%list[MultiOutput] | _CollectiveKernel)r   r   r   r   rZ  r  r}
  rl  r]  r`  r   r   r   r   rQ  rQ  (  s    

( )@)@ &)@ 	)@
 )@ 
)@ )@B 99 ,9 	9
 9 
/9 9r   rQ  c                  b   ^  \ rS rSr SSS.               SU 4S jjjjrS	S jrSrU =r$ )
_AllReduce_Kernelii)  Nr  c          
     N   > [         TU ]  UUUUUS US9  U R                  S5        g )Nr  +aoti_torch_cpu__c10d_functional_all_reduce_r  r  r}
  r  s	           r   r  _AllReduce_Kernel.__init__j)  =     	/ 	 	
 	  !NOr   c                    UR                  S5        UR                  U 5        [        U R                  [        5      (       a  U R                  U5        g g Nz+torch/csrc/inductor/aoti_torch/c/shim_cpu.hinclude_extra_headerr  r   rU  r  rL  r
  s     r   r
  _AllReduce_Kernel.codegen)  C    $$%RS,,T2dkk6**%%g. +r   r   r   r  rn  r  r  s   @r   rc  rc  i)  s     )-P HLPP P &	P
 &P +P &P EP 
P P,/ /r   rc  c                  b   ^  \ rS rSr SSS.               SU 4S jjjjrS	S jrSrU =r$ )
_AllReduceKerneli)  Nr  c          
     N   > [         TU ]  UUUUUS US9  U R                  S5        g )Nr  *aoti_torch_cpu__c10d_functional_all_reducerf  r  s	           r   r  _AllReduceKernel.__init__)  s=     	/ 	 	
 	  !MNr   c                    UR                  S5        UR                  U 5        [        U R                  [        5      (       a  U R                  U5        g g rj  rk  r
  s     r   r
  _AllReduceKernel.codegen)  rn  r   r   r   r  rn  r  r  s   @r   rp  rp  )  s     )-O HLOO O &	O
 &O +O &O EO 
O O,/ /r   rp  c                     ^  \ rS rSr S
SS.               SU 4S jjjjrSS jrSS jr\SS j5       rSU 4S jjr	S	r
U =r$ )_WaitKerneli)  Nr  c          
     N   > [         TU ]  UUUUUS US9  U R                  S5        g )Nr  +aoti_torch_cpu__c10d_functional_wait_tensorrf  r  s	           r   r  _WaitKernel.__init__)  rh  r   c                    UR                  S5        UR                  U 5        [        U R                  [        5      (       a  U R                  U5        g g rj  rk  r
  s     r   r
  _WaitKernel.codegen)  rn  r   c                   U R                   S   n[        U[        5      (       d   e[        U[        5      (       a7  UR                   S   n[        U[        5      (       d   [	        U5      5       eU/$ [        U[
        5      (       aG  UR                   S   n[        U[        5      (       a!  UR                  S   u  pEUR                   U   /$ / $ / $ r  )rq  r   r   rQ  r   rR	  r  )r  rs  r   collr   r   s         r   get_volatile_reads_WaitKernel.get_volatile_reads)  s    kk!n#v&&&&c,--

1Aa((1$q'1(3J[)) ::a=D$ 122QC())I Ir   c                v   [         R                  R                     U R                  X5      u  nnnnnS S S 5        W(       a   U SU 35       eU " [	        UR                  5       S9UWWW5      nUR                  R                  [        [	        UR                  5       S9X(5      5        g ! , (       d  f       N}= f)Nr!  r%  )	rs   r  r9  r
  r  r  r	  r  r	  )	r1  r
  rs  rZ  r
  r
  r
  rz
  r  s	            r   create_wait_WaitKernel.create_wait)  s    WW ""6/!  %E2C1D&EE$cnn./
 	&&:S^^-=>L	
! s   B**
B8c                   > [         TU ]  5       nU R                  5       nU H@  nUR                  R	                  [
        R                  " UR                  5       5      5        MB     U$ r   )r  rg  r  rk  r  rG   r  rz  )r  r  volatile_readsvrr  s       r   rg  _WaitKernel.get_read_writes)  sS    g-/002 B!!,"6"6r{{}"EF !r   r   r   r  rn  r  )r
  r   rs  r   r   r   r  )r   r   r   r   r  r
  r  rl  r  rg  r   r  r  s   @r   rw  rw  )  s     )-P HLPP P &	P
 &P +P &P EP 
P P,/2 
 
* r   rw  c                V   [        U [        [        45      (       a  [        U 5      $ [        U [        [
        45      (       a5  [        [        R                     " 5       nU  H  nU[        U5      -  nM     U$ [        U [        R                  5      (       a  [        U 5      $ [        5       $ r   )r   r;   r   r4   r   r   r>   r   r!   re  r  r[	  r   r3  r  s      r   re  re   *  s    !h%&&$Q''	At}	%	%u||$&A,Q//A 	Au||	$	$$Q''|r   c                V   [        U [        [        45      (       a  [        U 5      $ [        U [        [
        45      (       a5  [        [        R                     " 5       nU  H  nU[        U5      -  nM     U$ [        U [        R                  5      (       a  [        U 5      $ [        5       $ r   )r   r;   r   r3   r   r   r>   r   r!   rf  r  r[	  r  s      r   rf  rf  *  s    !h%&&A	At}	%	%u||$&A#A&&A 	Au||	$	$A|r   c                D   [        U [        5      (       Ga
  [        U R                  [        5      (       Ga  [        U R                  R                  [        5      (       a'  U R                  R                  R                  SU5        g [        U R                  R                  [        5      (       Gan  U R                  R                  R                  SU5        [        U R                  R                  [        5      (       ad  [        U R                  R                  R                  [        5      (       a1  U R                  R                  R                  R                  SU5        g [        U R                  R                  [        5      (       a  U R                  R                  R                  (       dk  [        U R                  R                  R                  S   [        5      (       a4  U R                  R                  R                  S   R                  SU5        g g g g g g g )Nr  r   )r   r   rT  rx  r  r  ry  r  rR	  r  rq  )r  r9  s     r   assign_origin_noder   *  sq    &)$$FKK)L)Lfkk&&..KK//qA((&11KK//qA&++**N;;
  %%uA A   %%88J 6;;++[99((00fkk..55a8&AAKK$$++A.AA-QRS B 1 : 2 *M$r   )r   r   r   zTypeIs[int | Integer])r   r   r   r   )r   r   r   r  )r   r  r   z&Callable[[Sequence[_T]], Sequence[_T]])r   z&Callable[[Sequence[_U]], Sequence[_V]]r   z&Callable[[Sequence[_T]], Sequence[_U]]r   r  r   )r   z#Sequence[int | torch.SymInt | Expr]r   zShapeEnv | Noner   r  )r   Sequence[int | Integer]r   r  r  )r   r   r  r   r   r   )r   r   r  r   r   r  )r   r  r  r   r   ztorch.Tensor | None)r  zSequence[_T] | Noner   zSequence[_T | None] | None)r   z/IRNode | OutputSpec | torch.device | None | strr   r  )r   z"IRNode | torch.device | None | strr   r   )r   zBuffer | TensorBoxr6  r   r   r   )rE  r  rF  r  rG  r  r   r   )r[  r   r\  zSequence[int | torch.SymInt]r   r   )rk  r  r   r   )rq  rm	  r   r  )r   zExpr | Sequence[Expr]r
  r  r   rr   r  )r  r   r
  r  r  r   r   r  )r  r  r	  rp  rX  r   r   r  r   )TFNFN)r   r   r  r   r  r   r  r  rU  r   rc  r  r   ztuple[StorageBox, Layout])r   r   r  r  r   r   r  )r	  r  rG  r  r   r   )r
  r  r   r   )r   r#
  r   zTypeIs[Sequence[IRNode]])r  r  r   r   )r  rm	  r   r   )r   r  r   z-tuple[list[ShapeAsConstantBuffer], list[Any]])r   r   r   rj  )r  r   r9  ztorch.fx.Noder   r   (\  
__future__r   r  r  r  r"  rF  loggingr5  textwrapr  collections.abcr   r   r   r   r   r	   r
   enumr   r   typingr   r   r   r   r   r   r   r   r   r   r   typing_extensionsr   r   r   r   r   r   unittest.mockr   r   r   r    r!   torch._export.serde.schema_exportserder/  rf  rW  r*  r  r"  torch._loggingr  torch.fxtorch.utils._pytree_pytreer
  torch._dynamo.utilsr"   torch._export.serde.serializer#   *torch._higher_order_ops.auto_functionalizer$   torch._inductorr%   r
  r'   "torch._library.fake_class_registryr(   torch._library.opaque_objectr)   r*   torch._prims_commonr+   r,   r-   r.   r/   r0   %torch.fx.experimental.symbolic_shapesr1   r2   r3   r4   r5   r6   r7   r8   r9   r:   r;   torch.fx.noder<   torch.utils._ordered_setr>   torch.utils._python_dispatchr?   torch.utils._sympy.functionsr@   rA   rB   rC   torch.utils._sympy.symbolrD   r  rF   rG   codegen.commonrH   rI   rJ   rK   rL   rM   rN   rO   rP   rQ   rR   	loop_bodyrS   ops_handlerrT   rU   rV   rW   runtime.benchmarkingrX   runtime.hintsrY   rZ   r[   r\   r]   r^   r_   r`   ra   rb   rc   rd   re   rf   rg   rh   ri   rj   rk   rl   rm   rn   ro   rp   virtualizedrq   rr   rs   rt   ru   torch.typesrv   codegen.cutlass.templaterw   codegen.wrapperrx   r  ry   rz   r   r   r*  __version__r[  rZ  ImportErrorr{   r|   r}   r~   r   r   rr  r   r
  r
  r
  r   	getLoggerr   r'  r  r
  r   r   r   r   rI  r   r   r   r   r   r   r  r  r   r   r  r  r  r$  r/  r1  rB  rK  ra  rm  rt  r{  r   r  r  rs  ru  r  r  r  r  rT  r  r  r  r  r  r\  rg  r  r  rV  r  r  rv  r   r  r  r+  r  rZ  r  r  r  r  r  r	  r  r#  r  rW  r  r  r  r  r  r  ry  r]  r  ri  r  ru  r   r  r	  r  rq	  r   r   PrimitiveInfoTyper	  r	  r	  r	  r	  r	  r	  r!
  r  rA
  rG
  r  rs  r~  rd
  r	  r  r  r  r  r  rJ  ra  rr  r  r  r  r  r  r  r  r   r   r
  r]	  r  r  r$	  rR	  r   r  r  r  r  r   rx  r  r  r  r  r  r  r   r
  r3
  r=  r
  r
  rQ  rc  rp  rw  re  rf  r  r   r   r   <module>r     sM   "          M M :      U T   ' ' 2 2 , ,   $ $ ( ? M # 2 ? M      / ? Q Q * "     N N - :     0 * ) B&'95$% "(OY'''NJ t_T]T]T]Dj)  EkD() (**//%**2P2PPi P!			8??4	8yy~~'T  k	c4[();(JKdR	i 	) d#  $$ $F44 , ! $  LP	,9H	 LP
	,
9H

 
 U 
 U 
27+/ 
 :?26D
>6
>
>;('7*    
	$$G$G)$G $GN';S, S,l
 UU U Up F
F F
 F
R& %
 %
 %
P 
i 
 
F |$y!y!uu=)< 8  JN<N<N +<NBF<N<N~ d
 d
 d
R $(1:
   &	& "8D>8D>"BH"LMY M7S9 7St#1 #L`
+ `
F F
5 F
 F
T 	 	 	 [5 [ [|	 !3748999 9 1	9
 9 29 9x4	$ ^
v ^
 ^
B [ [ [| -( - -` A9( A9 A9H !( ! !H [; [ [| Sh S Sl & & &RlA lA^ 6  " K| K K$ S| S S'9	(<7 7  `
Z `
 `
F	C& C[HV [H|!Gf !GHT %{ %D   .V* V*r UEV] E EP U&fi & & & 
K 
[ 
& 6  ( F    Ut4_ t4 t4n d#  $]+_ ]+@<> <~ %K$&,tC#I4E4L/MM L L^"| "
\B. \B~5N 50( (6N >Z%N Z%z5.55 UR? R Rj K9 K\ UE< E EP U"l " "J
/ 
((" ("V
V 
B=L =@%
 %
P
- 
$<
\ <
~Wl Wt
))| ))Z/, /d< 8B5 B$8- 826)l 6)r))| ))X9T 9Tx,E ,E^21| 21j-L -8<;< <;~ U  
t	& t	n U
. 
 
@   2 
  I
, I
X 
  
F4K 4 2f> fV T T Tn+
 +n% n%b U'v ' 'L UB.\ B. B.J Ue , e  e P"
"2" U@  @  @ F
)n )X6  "=l "= "=J l  ( \     "Y Yx/) />/( />S# Sr  T TiL  NJs   &l6 6
mm