
    3j              	         S SK JrJr  S SKrS SKrS SKrS SKrS SKrS SKrS SK	r	S SK
r
S SKJr  S SKJr  S SKJr  S SKJrJrJrJrJrJrJrJrJr  S SKJr  S SKJr  S	S
KJ r   SSK!J!r!  SSKJ"r"  S	SK#J$r$J%r%J&r&J'r'  SSK(J)r)  S SK*J+r+J,r,J-r-  Sr.Sr/\" S5      r0 " S S\Rb                  5      r2S6S jr3 " S S5      r4S7S jr5 " S S\\0   5      r6S r7S r8S r9 " S S 5      r:0 r;\ " S! S"5      5       r<S# r=S$ r> " S% S&\:\6\0   5      r?\S8S' j5       r@\SSSSSSSS(.             S9S) jj5       r@ S:SSSSSSSS(.               S;S* jjjr@ " S+ S,5      rA " S- S.5      rBS/ rCS0 rD " S1 S2\:5      rE " S3 S4\:5      rFS5 rGg)<    )annotationsdivisionN)defaultdict)	dataclass)cached_property)	CallableGenericIterableOptionalTypeVaroverloadDictAnyTuple)BaseBackend)
ModuleType   )knobs   )driver)_async_compile)find_paths_ifget_iterable_pathtype_canonicalisation_dictis_namedtuple)get_cache_key)get_cache_invalidating_env_varsnative_specialize_implirztriton.languagez"triton.experimental.gluon.languageTc                     ^  \ rS rSrSrSU 4S jjr\S 5       rS rS r	SS jr
S rS	 rS
 rS rS rS rS rS rS rSrU =r$ )DependenciesFinder"   a  
This AST visitor is used to find dependencies of a JITFunction. This can
be used to invalidate a JITFunction's hash when its source code -- or
that of its dependencies -- changes.

This visitor also keeps track of the global variables touched by the
JITFunction.  When we launch the kernel, we check that these have the same
values as they did when we ran this visitor.  If not, we raise an error (or
otherwise we could recompile).
c                   > [         TU ]  5         Xl        [        R                  " UR                  S5      5      U l        X l        X0l        1 SkU l	        [        [        SS1U l        0 U l        SU l        g )Nutf-8>
   intlenmaxminlistfloatprintrangegetattr
isinstancecopymathF)super__init__namehashlibsha256encodehasherglobals	nonlocalssupported_python_builtinsGLUON_MODULETRITON_MODULEsupported_modulesused_global_valsvisiting_arg_default_value)selfr4   r9   r:   src	__class__s        L/home/wildlama/miniconda3/lib/python3.13/site-packages/triton/runtime/jit.pyr3   DependenciesFinder.__init__.   sn    	nnSZZ%89 "*
& 	"
" TV*/'    c                6    U R                   R                  5       $ N)r8   	hexdigestrA   s    rD   retDependenciesFinder.retY   s    {{$$&&rF   c                    [         R                  " UR                  5      (       a  g[        USS5      nUR	                  [
        5      $ )NT
__module__ )inspect	isbuiltinfuncr.   
startswithr=   )rA   noderR   modules       rD   _is_triton_builtin%DependenciesFinder._is_triton_builtin]   s9    TYY''|R0  //rF   c                F   [        U[        5      (       d   eU R                  R                  5       UR                  R                  5       -   H]  nUu  p4U R                  U   u  pTUR                  U   u  pdXV:w  d  M0  [	        SU SU SU R
                   SUR                   SU S35      e   U R                  R                  UR                  5        UR                  nU[        [        USS5      5      -  nU R                  R                  UR                  S	5      5        g )
NGlobal variable z has value z when compiling z, but inner kernel z has conflicting value z7 from when it was first compiled.  This is not allowed.noinlineFr%   )r/   JITCallabler?   keysRuntimeErrorr4   __name__update	cache_keystrr.   r8   r7   )rA   rR   kvar_name_v1v2func_keys           rD   _update_hashDependenciesFinder._update_hashc   s&   $,,,, &&++-0E0E0J0J0LLAKH))!,EB))!,EBx"&xjB4?OPTPYPY{Zmnrn{n{m|  }T  UW  TX  XO  P  M 	$$T%:%:;>>Cj%8998??734rF   c                R   SSK Jn  Ub  [        U5      [        L a  g [	        USS5      (       a%  UR
                   H  nU R                  U5        M     g [	        USS5      (       a  g [	        USS5      S:X  a  g [        U[        5      (       a  U R                  U5        g [        U5      (       a3  [        U[        5      (       d  [        X5      (       d  [        S	U 35      eU R                  (       a  g Ub0  [        R                  " U5      U4U R                  U[!        U5      4'   g )
Nr   	constexpr__triton_aggregate__F__triton_builtin__rN   rO   ztriton.language.extra.libdevicez!Unsupported function referenced: )language.corerl   typer   r.   
hash_attrsrecord_referencer/   r[   rh   callabler]   r@   r0   deepcopyr?   id)rA   valvar_dictr4   rl   attrs         rD   rr   #DependenciesFinder.record_referenceu   s   - ;$s)z13.66%%d+ '3,e44 3b)-NNc;''c"C==C!6!6z#?Y?Y!B3%HII **;?==;Mx:XD!!4H"67rF   c                F  ^  [        UR                  5      [        R                  L a  UR                  $ UR                  T R
                  ;   a  g U 4S jnU" UR                  5      u  p4UR                  T R                  ;   a  U$ T R                  X4UR                  5        U$ )Nc                   > TR                   R                  U S 5      nUb  UTR                   4$ TR                  R                  U S 5      nUb  UTR                  4$ g)NNN)r9   getr:   )r4   rv   rA   s     rD   name_lookup2DependenciesFinder.visit_Name.<locals>.name_lookup   sZ    ,,""4.CDLL((..$$T40CDNN**rF   )rp   ctxastStoreru   local_namesr;   rr   )rA   rT   r~   rv   rw   s   `    rD   
visit_NameDependenciesFinder.visit_Name   s|    >SYY&77N77d&&&	 $DGG,77d444JcTWW5
rF   c                b    UR                    Vs/ s H  o R                  U5      PM     sn$ s  snf rH   )eltsvisit)rA   rT   elts      rD   visit_TupleDependenciesFinder.visit_Tuple   s&     ,09959C

39555s   ,c                ~   U R                  UR                  5      n[        U[        R                  5      (       a<  U R                  UR                  5      n[        U[        R                  5      (       a  M<  [        USS5      nUb  X0R                  ;   a  g [        X!R                  5      nU R                  U5        U$ )Nr^   rO   )	r   valuer/   r   	Attributer.   r>   rx   rr   )rA   rT   lhslhs_namerK   s        rD   visit_Attribute"DependenciesFinder.visit_Attribute   s    jj$cmm,,**SYY'C cmm,,3
B/;(&<&<<c99%c"
rF   c                    UR                   R                    Vs1 s H  o"R                  iM     snU l        U R                  U5        g s  snf rH   )argsargr   generic_visit)rA   rT   r   s      rD   visit_FunctionDef$DependenciesFinder.visit_FunctionDef   s6    /3yy~~>~GG~>4  ?s   Ac                  ^  U 4S jn[         R                  " UR                  UR                  UR                  (       a  UR                  /O/ UR
                  5       H  nT R                  U5        M     U" UR                  5        UR                  b  T R                  UR                  5        U" UR                  5        g )Nc                   >  TR                   (       a   eSTl         U  H  nUc  M  TR                  U5        M     STl         g ! STl         f = f)NTF)r@   r   )defaultsexprrA   s     rD   visit_defaults:DependenciesFinder.visit_arguments.<locals>.visit_defaults   sN    8::::26/$D'

4( % 38/%/s    A A 	A)
	itertoolschainposonlyargsr   vararg
kwonlyargsr   kw_defaultskwargr   )rA   rT   r   r   s   `   rD   visit_arguments"DependenciesFinder.visit_arguments   s    	8 ??4#3#3TYYQUQ\Q\bdfjfufuvCJJsO w 	t''(::!JJtzz"t}}%rF   c                    U R                  U5      n[        U[        5      (       a  U =R                  [	        U5      -  sl        g U R                  R                  U5        g rH   )r   r/   r*   r   setadd)rA   rT   targets      rD   visitAssnTarget"DependenciesFinder.visitAssnTarget   sH     D!fd##F+  (rF   c                    [        UR                  5      S:w  a  [        S5      eU R                  UR                  S   5        U R	                  U5        g )Nr   z2Simultaneous multiple assignment is not supported.r   )r'   targets	TypeErrorr   r   rA   rT   s     rD   visit_AssignDependenciesFinder.visit_Assign   sG    t||!
 PQQT\\!_- 	4 rF   c                \    U R                  UR                  5        U R                  U5        g rH   r   r   r   r   s     rD   visit_AnnAssign"DependenciesFinder.visit_AnnAssign   $    T[[) 	4 rF   c                \    U R                  UR                  5        U R                  U5        g rH   r   r   s     rD   	visit_ForDependenciesFinder.visit_For  r   rF   )	r9   r8   r   r4   r:   r>   r;   r?   r@   )returnNoner|   )r^   rN   __qualname____firstlineno____doc__r3   propertyrK   rV   rh   rr   r   r   r   r   r   r   r   r   r   __static_attributes____classcell__rC   s   @rD   r"   r"   "   se    	)0V ' '05$%N06
	!
&@)!!! !rF   r"   c                Z   SS K Js  Jn  [        U [        5      (       a  U R                  5       n U R                  S5      (       a<  U R                  S5      n [        U 5      n U R                  S5      (       d   eSU SS  -   $ U R                  S5      (       a  S[        U S S 5      -   $ U R                  S5      (       a  S[        U SS  5      -   $ U R                  S5      (       a  [        U R                  S5      5      $ O[        XR                  5      (       a  S[        U R                  5       3$ [        XR                  5      (       a  U R                  n O-[        U [        5      (       a  U R                  n O[	        U 5      n [         R"                  " U R%                  S	S
5      U 5      $ )Nr   zconst const**kr   ztl._trO   )triton.language.corelanguagecorer/   ra   striprS   removeprefix_normalize_tyendswithpointer_type
element_tydtyper4   rp   r^   r   r}   replace)tyr   s     rD   r   r     s[   ''"cXXZ=="")Br"B==%%%%"QR&= ;;sr#2w///==r!"v...== !788  	B))	*	*=/011	B

	#	#WW	B		[[W%))"**T2*>CCrF   c                      \ rS rSrSr  SS jr\S 5       r\SS j5       r\SS j5       r	\S 5       r
\S 5       r\S	 5       r\S
 5       rSrg)KernelParami+  zBRepresents a parameter (name plus metadata) to a @jit'ed function.c                4    Xl         X l        X0l        X@l        g rH   )num_paramdo_not_specializedo_not_specialize_on_alignment)rA   r   paramr   r   s        rD   r3   KernelParam.__init__.  s    !2.L+rF   c                .    U R                   R                  $ rH   )r   r4   rJ   s    rD   r4   KernelParam.name5  s    {{rF   c                    U R                   R                  (       a2  U R                   R                  [        R                  R                  :X  a  g[        U R                   R                  5      $ )NrO   )r   
annotationrP   	Parameteremptyr   rJ   s    rD   r   KernelParam.annotation9  sD    {{%%)?)?7CTCTCZCZ)ZT[[3344rF   c                    U R                   nUR                  S5      (       a  USS  nOUR                  S5      (       a  USS  nU[        [        R                  " 5       5      ;   a  U R                   $ g)Nr   r   r   r   rO   )r   rS   r   r   values)rA   as     rD   annotation_typeKernelParam.annotation_type?  sc    OO<<!"A\\#!"A.55788??"rF   c                     SU R                   ;   $ Nrl   )r   rJ   s    rD   is_constexprKernelParam.is_constexprJ  s    doo--rF   c                    U R                   (       a  gSU R                  ;   =(       d    U R                  R                  S5      $ )NFr   r   )r   r   rS   rJ   s    rD   is_constKernelParam.is_constN  s1    $//)MT__-G-G-MMrF   c                .    U R                   R                  $ rH   )r   defaultrJ   s    rD   r   KernelParam.defaultT  s    {{"""rF   c                d    U R                   R                  [        R                  R                  :g  $ rH   )r   r   rP   r   r   rJ   s    rD   has_defaultKernelParam.has_defaultX  s#    {{""g&7&7&=&===rF   )r   r   r   r   N)r   r&   r   zinspect.Parameterr   boolr   r   r   ra   )r^   rN   r   r   r   r3   r   r4   r   r   r   r   r   r   r   r    rF   rD   r   r   +  s    LM15M     5 5
   . . N N
 # # > >rF   r   c                2    SnSn[        [        XX5      S   $ )NFTr   )r   r   )r   
specializer   aligns       rD   mangle_typer   ]  s!    HE!+sjPQRSSrF   c                  6    \ rS rSr% S\S'   S rS rS	S jrSrg)
KernelInterfaceic  r    runc               \    U R                   " [        [        R                  U5      USS.UD6$ )NTgridwarmup)r  map
MockTensor
wrap_dtype)rA   r  r   kwargss       rD   r  KernelInterface.warmupf  s(    xxZ5J5JD1QT$\U[\\rF   c                   [        S5      e)Nzrun not implemented)NotImplementedError)rA   r  r  r   r
  s        rD   r  KernelInterface.runi  s    !"788rF   c                   ^ ^ UU 4S j$ )z
A JIT function is launched with: fn[grid](*args, **kwargs).
Hence JITFunction.__getitem__ returns a callable proxy that
memorizes the grid.
c                 .   > TR                   " U TSS.UD6$ )NFr  )r  )r   r
  r  rA   s     rD   <lambda>-KernelInterface.__getitem__.<locals>.<lambda>r  s    txx$T%'YRX'YrF   r   )rA   r  s   ``rD   __getitem__KernelInterface.__getitem__l  s     ZYrF   r   N)r   r    )	r^   rN   r   r   __annotations__r  r  r  r   r   rF   rD   r  r  c  s    	
F]9ZrF   r  c           
        UR                  5        VVs0 s H  u  pWXWR                  R                  S:X  a  [        U5      OjUR                  R                  S:X  a  SUR                  0OBUR                  R                  S:X  a'  SUR
                   SUR                  R                   30OU_M     nnnSS KnXUR                  5        V	s/ s H  n	[        U	5      PM     sn	[        UR                  5       5      UR                  5        V	s/ s H  n	[        U	5      PM     sn	[        UR                  5       5      UR                  WUR                  S.	n
UR                  U
5      nU$ s  snnf s  sn	f s  sn	f )Nr   rl   JITFunctionjit_function:r   )	r4   	signatureconstant_keysconstant_vals
attrs_keys
attrs_valsoptionskeyr   )itemsrC   r^   ra   r   rU   fnr   jsonr\   r*   r   __dict__dumps)r4   r  	constantsattrsr  r   r   r   r#  xobjserialized_objs               rD   serialize_specialization_datar+  v  sR   
 $//+	 ,JC 	??33w>SZ??##{2 FQRWR]R]D^??##}4 :HELL>YZ[`[c[c[p[pZqIr8s:?	@ ,	   QZQ_Q_Qa?bQaAQQa?bY %**,0O,Qa,0O_cdidpdpdr_s##C6??C
 ZZ_N @c0Os   BE'
E-E2c                `   [        U R                  5      [        U5      :X  d   e/ n[        U R                  R                  5       U5       GH0  u  pEUR                  (       a  UR                  SU S35        M.  UR                  (       a  SOSnUR                  (       a  SOSnUR                  (       a  SOSnSU SU SU SU S3	n	UR                  (       a  [        UR                  [        5      (       a%  UR                  S:X  d  UR                  SS	 S
;   a  SnU(       a$  UR                  SUR                   SU	 S35        M  UR                  SUR                   S35        GM  UR                  U	 5        GM3     S n
SSR                  [        [        XR                  R                  5       5      5      S/-   5       SSR                  U R                  R                  5        Vs/ s H  nSU SU 3PM     sn5       SSR                  U5       S3nU R                  R                  5        VVs0 s H>  u  pLUR                   ["        R$                  R&                  Ld  M.  SU 3UR                   _M@     nnn[(        nXS'   X-S'   [*        US'   [-        X5        US   $ s  snf s  snnf )a  
Equivalent to sig.bind followed by apply_defaults. This generates a
native Python function (using exec) which can be memoized on a per-kernel
basis to avoid having to run these expensive functions -- which constitute
much of the kernel launch overhead -- every time we run the kernel.
z("constexpr", )TrueFalsezspecialize_impl(backend, , u1Nr   )fpbfFz("z",) + z[1:]z", None)c                z    U S   R                   [        R                  R                  L a  U S   $ U S    SU S    3$ )Nr   r   z	=default_)r   rP   r   r   )r(  s    rD   r  0create_function_from_signature.<locals>.<lambda>  sC    AaDLLG,=,=,C,CCAaDaAaD6QZ[\]^[_Z`IaarF   z
def dynamic_func(z	**optionsz):
    params = {'z': z}
    specialization = [,z-]
    return params, specialization, options
default_specialize_implbackendr[   dynamic_func)r'   
parameterszipr\   r   appendr   r   r   r   r/   ra   joinr*   r  r!  r   rP   r   r   r   r[   exec)sigkparamsr:  specializationr4   kpr   r   r   rK   r   	func_bodyr   func_namespacer9  s                  rD   create_function_from_signaturerG    s    s~~#g,...N++-w7??!!N4&":;!#v'H$&$8$8fJ!@@GfE-dV2hZr*RPUwVWXC!!b00#66))T1R5G5G5K|5[%*
"))Br/A/A.B&T*RS #))Br/A/A.B(*KL%%/' 8, bC))DS..*>*>*@!ABk]RST U		3>>;N;N;PQ;P4QtfCv.;PQRS Txx/0 1I >>//11KD== 1 1 7 77 	)(4&5==(1   -O(7$% '9$/N=! 	# .))+ Rs   +J%6-J*'J*c                8    U R                    SU R                   3$ )N.)rN   r   r"  s    rD   get_full_namerK    s    mm_Aboo.//rF   c                  z    \ rS rSrS rS r\SS j5       rS rS r	\S 5       r
SS jrS	 rS
 rS r\" \\S9rSrg)r[   i  c                   Xl         [        R                  " U5      U l         [        R                  " U5      u  U l        U l        [        U5      U l	        [        R                  " 5       U l        [        R                  " SR                  U R                  5      5      nU[         R"                  " SU[         R$                  5      R'                  5       S  nX0l        S U l        0 U l        UR.                  U l        UR0                  U l        UR2                  U l        UR4                  U l        UR6                  U l        g ! [         a  n[        S5      UeS nAff = f)Nz1@jit functions should be defined in a Python filerO   z^def\s+\w+\s*\()r"  rP   r  getsourcelinesraw_srcstarting_line_numberOSError
ValueErrorrK  _fn_name	threadingRLock
_hash_locktextwrapdedentr?  research	MULTILINEstart_srchashr?   r   r^   r   __globals__rN   )rA   r"  erB   s       rD   r3   JITCallable.__init__  s    **2.	Y6=6L6LR6P3DL$3 &b)#//+ oobggdll34")).R\\BHHJKL		 TV zzOO>>--7  	YPQWXX	Ys   #E   
E
EEc                   U R                   nUR                  c  U R                  $ [        UR                  R
                  UR                  5       VVs0 s H  u  p#X#R                  _M     nnnU R                  U-  $ s  snnf rH   )r"  __closure__r_  r=  __code__co_freevarscell_contents)rA   r"  r4   cellr:   s        rD   get_capture_scopeJITCallable.get_capture_scope  sq    WW>>!###@CBKKD[D[]_]k]k@lm@l*$T---@l	m)++ ns   A=c                   U R                      U R                  b  U R                  sS S S 5        $ SU R                   3U l        [        R                  " U R
                  5      R                  n[        U R                  U R                  UU R                  S9nUR                  U R                  5       5        UR                  [        U R                  5      -   U l        [        [!        UR"                  R%                  5       5      5      U l        SSKJn  U =R                  [        U R"                  R%                  5        VVVs/ s H  u  u  pEu  pe[+        Xc5      (       d  M  XF4PM!     snnn5      -  sl        [,        R.                  " U R                  R1                  S5      5      R3                  5       U l        S S S 5        U R                  $ s  snnnf ! , (       d  f       U R                  $ = f)Nz
recursion:)r4   r9   r:   rB   r   rk   r%   )rV  r^  rS  rP   getclosurevarsr"  r:   r"   r_  rB   r   parserK   ra   rP  dictsortedr?   r!  r   rl   r/   r5   r6   r7   rI   )rA   r:   dependencies_finderrl   r4   rd   rv   s          rD   r`   JITCallable.cache_key  st    __yy$yy _
 %T]]O4DI..tww7AAI"4$--QUQaQamv9=#C%%djjl3+//#d6O6O2PPDI$(0C0T0T0Z0Z0\)]$^D!6II9=9N9N9T9T9V=9V"5)4Xc!+C!; *tk9V= > >I  tyy'7'7'@AKKMDI# $ yy	= _$ yys*   GDGG#G*AGG
G0c                ,    [        U R                  5      $ rH   )r^  r`   rJ   s    rD   __hash__JITCallable.__hash__
  s    DNN##rF   c                   [         R                  " U R                  5      n[        U[         R                  5      (       d   e[        UR                  5      S:X  d   e[        UR                  S   [         R                  5      (       d   eU$ )Nr   r   )r   rl  r]  r/   Moduler'   bodyFunctionDef)rA   trees     rD   rl  JITCallable.parse  se    yy#$

++++499~"""$))A,8888rF   c                    SSK Jn  U" U 5      $ )Nr   )constexpr_type)r   r{  )rA   r{  s     rD   rp   JITCallable.type  s    7d##rF   c                    g rH   r   )rA   handless     rD   _flatten_irJITCallable._flatten_ir  s    rF   c                    SU l         Xl        g)a  
The only method allowed to modify src.
Bypasses the __setattr__ restriction by calling super().__setattr__ directly.

Note that it is the callers responsibility to make sure any triton functions that call this function have the `.hash` value reset to None.
N)r^  r]  )rA   new_srcs     rD   _unsafe_update_srcJITCallable._unsafe_update_src  s     		rF   c                    [        S5      e)NzqCannot set attribute 'src' directly. Use '_unsafe_update_src()' and manually clear `.hash` of all callersinstead.)AttributeErrorrJ   s    rD   _set_srcJITCallable._set_src)  s     ( ) 	)rF   c                    U R                   $ rH   )r]  rJ   s    rD   _get_srcJITCallable._get_src.  s    yyrF   )fgetfset)r   r_  rN   r^   r   rS  rV  r]  r"  r^  rO  r  rP  r?   Nr   )r~  zlist[ir.value]r   r   )r^   rN   r   r   r3   rh  r   r`   rr  rl  rp   r  r  r  r  rB   r   r   rF   rD   r[   r[     sa     (D,  ,$ $ $)
 x
0CrF   r[   c                  4    \ rS rSr% S\S'   S\S'   S\S'   Srg	)
JitFunctionInfoi7  r   rU   ra   r4   r  r  r   N)r^   rN   r   r   r  r   r   rF   rD   r  r  7  s    
IrF   r  c                   ^ [        U5      [        U5      4nU R                  US 5      nUb  U$ U4S jm[        T" U5      5      [        U5      -   nX@U'   U$ )Nc                |  > [        U [        5      (       a  U  Vs/ s H  nT" U5      PM     sn$ [        U 5      (       a'  U  Vs/ s H  nT" U5      PM     nnU R                  " U6 $ [        U [        5      (       a  [	        U4S jU  5       5      $ [        U [
        5      (       a  U R                  $ U $ s  snf s  snf )Nc              3  4   >#    U  H  nT" U5      v   M     g 7frH   r   ).0r   replace_callabless     rD   	<genexpr>?compute_cache_key.<locals>.replace_callables.<locals>.<genexpr>L  s     ?3C*3//3s   )r/   r*   r   rC   tupler[   r`   )r)  r   resultsr  s      rD   r  ,compute_cache_key.<locals>.replace_callablesE  s    c4  69:cs%c*c::39<=#(-G==='**U##?3???[))== 
 ;=s   B4B9)r  ra   r}   )kernel_key_cacherC  r  r   r`   r  s        @rD   compute_cache_keyr  >  sb     #g,
/C $$S$/I
 %n56WEI%SrF   c                    [        U [        5      (       d  U $ [        U 5       H  u  p[        U5      X'   M     [	        U 5      $ rH   )r/   r*   	enumerateconvert_to_tuple_if_listr  )iteminested_values      rD   r  r  V  s>    dD!! %T?*<8 + ;rF   c                  z   ^  \ rS rSrS r  SS jrS rS rS rS r	S r
  SU 4S	 jjrS
 rS rS rS rSrU =r$ )r  ib  c                    g)NFr   rJ   s    rD   is_gluonJITFunction.is_gluond  s    rF   c
                   U(       d  g U R                   R                  n
U R                   R                  nSR                  [	        U R
                  US   5       VVs/ s H  u  pUR                   SU 3PM     snn5      nU
 SUR                   SUR                   SUR                   SUR                   SUR                   S	U S
3n[        U R                   5      n[        UX6US   XrU5      nUUUUR                  UR                  UR                  UR                  UR                  UR                  UUU	S.nU" UU[        XU 5      SU0UEU	SS9$ s  snnf )Nr0  r   z: z[num_warps=z, num_ctas=z, num_stages=z, enable_fp_fusion=z, launch_cooperative_grid=](r-  r   )r  devicer&  	num_warpsnum_ctas
num_stagesenable_fp_fusionlaunch_cooperative_gridextern_libsconfigsspecialization_data	is_warmupr   F)r   reprr"  compileis_manual_warmupalready_compiled)r"  r   rN   r?  r=  paramsr4   r  r  r  r  r  rK  r+  r  r  )rA   hookr   r  r   r  r&  r  r  r  r4   rU   r   r   	arg_reprsr  	full_namer  r
  s                      rD   
_call_hookJITFunction._call_hookg  s    ww####IIc$++WZ[\W]F^_F^%**Rt4F^_`	{7#4#4"5[AQAQ@RR_`g`r`r_s  tG  HO  H`  H`  Ga  a{  |C  |[  |[  {\  \^  _h  ^i  ij  k!$''*	;Iy]def]gip<BD #" **((!,, ' 8 8'.'F'F"..#6"
 vT2C*6*&"
 	
- `s   E
c                ^    [        U5      (       d   eU R                  R                  U5        g)zu
Add a hook that will be executed prior to the execution of run
function with args and kwargs passed into the kernel
N)rs   pre_run_hooksr>  )rA   r  s     rD   add_pre_run_hookJITFunction.add_pre_run_hook  s&    
 ~~~!!$'rF   c                    SSK JnJnJnJn  [
        R                  R                  5       nU" U5      nXl        X l        X0l        [        U R                  U R                  U5      n0 0 XVU4$ )z!
Precompute as much as possible.
r   )CompiledKernelr  	ASTSourcemake_backend)compilerr  r  r  r  r   activeget_current_targetrG  r  r  )rA   r  r  r  r  r   r:  binders           rD   create_binderJITFunction.create_binder  s\     	PO113v&,"/WU2v..rF   c           
        UR                  U5      nU R                   Vs/ s H  ofR                  PM     nnU Vs/ s H  ofS   PM	     nn[        Xx5       V	V
s0 s H  u  pX_M	     nn	n
SU;  d   S5       eSU;  d   S5       eSU;  d   S5       eU H'  n	XR                  ;  d  M  X;  d  M  [        SU	-  5      e   [        US	 5      nU Vs0 s H&  o[        [        UR                  5       5      U5      _M(     nnU Vs/ s H  ofS   S
:X  a  SOUS   PM     nn[        US 5      nU V	s0 s H  oUR                  [        X5      5      _M     nn	X[X4$ s  snf s  snf s  sn
n	f s  snf s  snf s  sn	f )Nr   device_typez=device_type option is deprecated; current target will be usedr  z8device option is deprecated; current device will be usedstreamz8stream option is deprecated; current stream will be usedz2Keyword argument %s was specified but unrecognisedc                    US:H  $ r   r   )rd   rv   s     rD   r  (JITFunction._pack_args.<locals>.<lambda>  s	    3+;MrF   rl   rO   r   c                "    [        U[        5      $ rH   )r/   ra   )rd   r(  s     rD   r  r    s    Z35GrF   )parse_optionsr  r4   r=  r$  KeyErrorr   r   r*   r   
parse_attr)rA   r:  r
  
bound_argsrC  r  r(  sigkeyssigvalsrb   vr  
constexprspathattrvalsr'  s                   rD   
_pack_argsJITFunction._pack_args  s   ''/#';;/;a66;/!/0AQ40(+G(=>(=fqQT(=	>F*k,kk*v%a'aa%v%a'aa%A(((Q-=SVWWXX  #7,MN
[ef[eSW-d:3D3D3F.GNN[e
fAOPAA$+-B1Q47Ph(GHPUVPU1G&&'8'EFFPUV:44% 00> gPVs"   EEE"-E(E-.$E2c               n   UR                  SU R                  5      =(       d    [        R                  R                  US'   [        R                  R
                  US'   [        R                  R                  5       n[        R                  R                  U5      nU R                   H  nU" U0 UD6  M     U R                  U   u  ppnU" U0 UD6u  pn[        R                  R                  b6  [        R                  R                  5       u  nnUR                  SU S35        [        XU5      nUR                  US 5      nUc3  U R                  XXU5      u  nnnnU R!                  UUUUUUU5      nUc  g [#        5       nU R$                  R'                  5        H8  u  u  nnu  nnUR                  UU5      =nU:w  d  M&  [)        SU SU SU 35      e   U(       d  Uc   e[+        U5      (       a  U" U5      n[-        U5      nUS   nUS	:  a  US	   OS	nUS
:  a  US
   OS	n UR.                  " X/UR1                  5       Q76 n!UR2                  " UUU UUR4                  UR6                  U![        R                  R8                  [        R                  R:                  /	UR1                  5       Q76   U$ )Ndebuginstrumentation_modez("custom_pipeline", r-  rY   z1 has changed since we compiled this kernel, from z to r   r   r   )r}   r  r   runtimecompilationr  r   r  get_current_deviceget_current_streamr  device_cachesadd_stages_inspection_hookr>  r  r  _do_compileobjectr?   r!  r]   rs   r'   launch_metadatar   r  functionpacked_metadatalaunch_enter_hooklaunch_exit_hook)"rA   r  r  r   r
  r  r  r  kernel_cacher  r   r:  r  r  rC  r  inspect_stages_keyinspect_stages_hashr   kernelr  r  r'  not_presentr4   rd   rv   globals_dictnewVal	grid_sizegrid_0grid_1grid_2r  s"                                     rD   r  JITFunction.run  s    **Wdjj9PU]]=P=Pw).):):)O)O%& 11311&9 &&D$!&! ' CGBTBTU[B\? /5d.Ef.E+
G ==33?6;mm6^6^6`3 3!!$89L8MQ"OP 0'J!!#t, >48OOGU_DK5M1GY
E %%c9fj'SXZ`aF~ h.2.C.C.I.I.K*IT1*\&**4==#E"&tf,]^a]bbfgmfnoq q /L
 ###~~J'D	I!WF )AT!W1F )AT!W1F$44TXJDUDUDWXOJJvvvvvH^H^`o}}668V8VnYcYjYjYlnrF   c                V    U R                   c  U R                  $ U R                  U5      $ rH   )_reprrS  )rA   rd   s     rD   r  JITFunction.repr  s"     $

 2t}}E

1ErF   c	           	       > U(       a  UO/ nU(       a  UO/ n[         TU ]  U5        UR                  U l        X l        X0l        X@l        Xpl        Xl        U [        U R                   SU R                  R                   3'   / U l        [        U R                  R                  R!                  5       5       H^  u  pX;   =(       d    U
R"                  U;   nX;   =(       d    U
R"                  U;   nU R                  R%                  ['        XX5      5        M`     [)        U R*                  5      U l        S U l        XPl        X`l        U R                   Vs/ s H  oR"                  PM     snU l        U R                   Vs/ s H!  oR6                  (       d  M  UR8                  PM#     snU l        / U l        g s  snf s  snf )Nr  )r2   r3   rN   rU   versionr   r   r  r  _triton_jit_function_registryr"  r   r  r  r  r<  r   r4   r>  r   r   r  r  r  r  rZ   	arg_namesr   r   r  r  )rA   r"  r  r   r   r  rZ   r  r  r  r   dnsdns_oaprC   s                 rD   r3   JITFunction.__init__   sb   1B-Ki)Goq&mm!2.L+
.QU%Qtww7K7K6L&MN!$..";";"B"B"DEHA(KEJJ:K,KC8hEJJJh<hFKK{1SAB F )););< 
  +/++6+Q&&+6*.++H+Q5155+H  	 7Hs   F6>F;F;c           
       ^ SS K nSS KJm  [        R                  R                  5       nUR                  U5      nUS   U R                  :w  a  [        SUS    SU R                   35      e[        [        US   5      nUS   nU R                  U   u    pxpUS   n
UR                  U
:w  a  [        SU
 SU 35      eU4S jn[        XV5       VVs0 s H  u  pX" U5      _M     nnn[        [        US	   5      nUS
   n[        [        UU5      5      nUS   R                  5        VVs0 s H  u  pU[!        U5      _M     nnnUS   R                  5        VVs0 s H(  u  pU[#        U[$        5      (       a  [        U5      OU_M*     nnnUS   nU	R'                  U5      nU R)                  UUUUUUSS9$ s  snnf s  snnf s  snnf )Nr   r4   zSpecialization data is for z but trying to preload for r  r  r   c                "  > TR                   R                  U 5      (       a  TR                  U 5      $ [        U [        5      (       aG  SU ;   a  TR	                  U S   5      $ SU ;   a'  U S   nU[
        ;   a	  [
        U   $ [        SU S35      eU $ )Nrl   r  zUnable to resolve JITFunction z for preload)r   is_dtyper/   rm  rl   r  r]   )r   jf_keytls     rD   _decode_constant-JITFunction.preload.<locals>._decode_constant7  s    xx  ''xx&%&&%'<<k(:;;!U*">2F!>><VDD&)Gx|'\]]LrF   r  r  r  r  r   T)r  )r#  triton.languager   r   r  r  loadsrS  r]   r  r  r  r$  r=  rm  r!  r  r/   r*   r  r  )rA   r  r#  r  deserialized_objr  r  rd   r   r:  deserialized_targetr  r   r   r  r  r  r'  r  r  r  s                       @rD   preloadJITFunction.preload&  s   $113::&9:F#t}}4-.>v.F-GGbcgcpcpbqrt tE#3O#DE(9#'#5#5f#= 1g.x8 ??11!<=P<QQlmsltuvv	 FIEfgEfzsc+E22Ef
g 0 >?
%l3
SZ01 M]]hLiLoLoLqrLqjcS2599Lq	r /y9??A
A
 E4!8!8ueCA 	 
 u%''0   
 	
 h s
s   F9+F?/Gc                X  ^ ^^^^^^^^^^^ T R                   T   u  mnmpT R                  [        R                  R                  TTTTTTT/T5	      (       a  g T R                  T TTT5      m[        R                  R                  5       n
U
bJ  [        5       m[        TU	TT5      nUUU UU4S jnUUUUUUU UUU4
S jnU
R                  XU5      nUTT'   U$ T R                  TTTR                  S9nUTT'   T R                  [        R                  R                  TTTTTTT/T5	        U$ )Nc                 <   > TR                  TTTR                  T S9$ )N)r   r  	_env_vars)r  r$  )env_varsr  rA   rB   r   s   rD   async_compile.JITFunction._do_compile.<locals>.async_compilei  s!    ||C@P@P\d|eerF   c                t   >
 U TT'   TR                  [        R                  R                  TTT	TTTT/T
5	        g rH   )r  r   r  jit_post_compile_hook)r  r'  r  r  r  r   r  rA   r  r   r  s    rD   finalize_compile1JITFunction._do_compile.<locals>.finalize_compilel  s;    $*S! C CS)U[]ceo '%&:rF   )r   r  )r  r  r   r  jit_cache_hookr  r   active_moder}   r   r   submitr  r$  r  )rA   r   r  r  r  r  r'  r  rd   r:  
async_moder`   r  r  r  r  r  rB   r   s   ````````       @@@@rD   r  JITFunction._do_compile[  s1   .2.@.@.H+a??5==77iQWYcelotnu!# #nnT9j%@#//335
!68H%c7GXFIf f: :
  &&yAQRF &L 	 \\#fg>N>N\OF &LOOEMM??iQWY_akmt"GV-rF   c                    [        S5      e)Nz:Cannot call @triton.jit'd outside of the scope of a kernel)r]   rA   r   r
  s      rD   __call__JITFunction.__call__z  s    WXXrF   c                P    SU R                    SU R                  R                   S3$ )NzJITFunction(r  r-  )rU   r"  r   rJ   s    rD   __repr__JITFunction.__repr__}  s&    dkk]!DGG,@,@+ACCrF   )r  r  r  r  r  r  r  r  r   r   r  r  rU   rZ   r  r  r  )r   zbool | None)NNNNNNN)r^   rN   r   r   r  r  r  r  r  r  r  r3   r  r  r!  r$  r   r   r   s   @rD   r  r  b  s`    .
 
.
`(/507rF mq;?$ L3
j>YD DrF   r  c                    g rH   r   rJ  s    rD   jitr'    s    rF   r  r  r  r   r   r  rZ   c                    g rH   r   r(  s          rD   r'  r'    s     rF   c               F   ^^^^^^^ SUUUUUUU4S jjnU b  U" U 5      $ U$ )a  
Decorator for JIT-compiling a function using the Triton compiler.

:note: When a jit'd function is called, arguments are
    implicitly converted to pointers if they have a :code:`.data_ptr()` method
    and a `.dtype` attribute.

:note: This function will be compiled and run on the GPU. It will only have access to:

       * python primitives,
       * builtins within the triton package,
       * arguments to this function,
       * other jit'd functions

:param fn: the function to be jit-compiled
:type fn: Callable
c                   > [        U 5      (       d   e[        R                  R                  (       a  SSKJn  U" U TTTTTTTS9$ [        U TTTTTTTS9$ )Nr   )InterpretedFunction)r  r   r   r  rZ   r  r  )rs   r   r  	interpretinterpreterr,  r  )	r"  r,  r  r   r   r  rZ   r  r  s	     rD   	decoratorjit.<locals>.decorator  sl    |||==""8&r7N_Fdlq08tUdf f "3/M! /	 	rF   r"  r    r   zJITFunction[T]r   )	r"  r  r  r  r   r   r  rZ   r/  s	    ``````` rD   r'  r'    s&    : & 
~} rF   c                  X    \ rS rSrSr\S 5       rS
S jrS r\S 5       r	\S 5       r
S	rg)r  i  zf
Can be used in place of real tensors when calling:
    kernel.warmup(MockTensor(torch.float32), ...)
c                p    U R                   R                  S:X  a  U R                  S:X  a  [        U 5      $ U $ )Nr   torch)rC   r^   rN   r  )r   s    rD   r	  MockTensor.wrap_dtype  s.    ==!!W,71Jc?"
rF   Nc                (    Uc  S/nXl         X l        g )Nr   r   shape)rA   r   r8  s      rD   r3   MockTensor.__init__  s    =CE

rF   c                    S/nU R                   SS   H  nUR                  US   U-  5        M     [        [        U5      5      $ )Nr   r   )r8  r>  r  reversed)rA   stridessizes      rD   strideMockTensor.stride  sB    #JJqrNDNN72;-. #Xg&''rF   c                     gNr   r   r   rF   rD   data_ptrMockTensor.data_ptr      rF   c                     grA  r   r   rF   rD   	ptr_rangeMockTensor.ptr_range  rD  rF   r7  rH   )r^   rN   r   r   r   staticmethodr	  r3   r>  rB  rF  r   r   rF   rD   r  r    sM    
  
(    rF   r  c                  T    \ rS rSrS rS rS rSS jrS rS r	S r
S	 rS
 rS rSrg)TensorWrapperi  c                    X l         Xl        UR                  U l        UR                  U l        U R                  R                  U l        g rH   )r   basedatar  r8  )rA   rL  r   s      rD   r3   TensorWrapper.__init__  s1    
	II	kkYY__
rF   c                6    U R                   R                  5       $ rH   )rL  rB  rJ   s    rD   rB  TensorWrapper.data_ptr  s    yy!!##rF   c                4    U R                   R                  " U6 $ rH   )rL  r>  )rA   r   s     rD   r>  TensorWrapper.stride  s    yy&&rF   c                <    SU R                    SU R                   S3$ )NzTensorWrapper[r  r-  )r   rL  rJ   s    rD   __str__TensorWrapper.__str__  s    

|2dii[::rF   c                6    U R                   R                  5       $ rH   )rL  element_sizerJ   s    rD   rW  TensorWrapper.element_size  s    yy%%''rF   c                ^    [        U R                  R                  5       U R                  5      $ rH   )rJ  rL  cpur   rJ   s    rD   rZ  TensorWrapper.cpu  s    TYY]]_djj99rF   c                N    U R                   R                  UR                   5        g rH   )rL  copy_)rA   others     rD   r]  TensorWrapper.copy_  s    		

#rF   c                ^    [        U R                  R                  5       U R                  5      $ rH   )rJ  rL  cloner   rJ   s    rD   ra  TensorWrapper.clone  s    TYY__.

;;rF   c                `    [        U R                  R                  U5      U R                  5      $ rH   )rJ  rL  tor   )rA   r  s     rD   rd  TensorWrapper.to  s     TYY\\&14::>>rF   c                `    [        U R                  R                  U5      U R                  5      $ rH   )rJ  rL  	new_emptyr   )rA   sizess     rD   rg  TensorWrapper.new_empty  s"    TYY007DDrF   )rL  rM  r  r   r8  Nr   )r^   rN   r   r   r3   rB  r>  rT  rW  rZ  r]  ra  rd  rg  r   r   rF   rD   rJ  rJ    s5    %$';(:$<?ErF   rJ  c                
   [        U [        5      (       a;  XR                  R                  :X  a  U R                  $ [        U R                  U5      $ [	        U S5      (       a  [        X5      $ [        S[        U 5       S35      e)NrB  zCannot reinterpret a rI  )r/   rJ  rL  r   hasattrr   rp   )tensorr   s     rD   reinterpretrm    sm    &-((KK%%%;; !e44		$	$V++/V~Q?@@rF   c                h   U n[        U[        5      (       d#  UR                  n[        U[        5      (       d  M#  UR                  R                  R                  nUR
                  n[        UR                  5       H1  u  pEUR                  5       R                  S5      (       d  M+  X4-  n  X#4$    X#4$ )Nzdef )
r/   r[   r"  rd  co_filenamerP  r  rO  r   rS   )r"  base_fn	file_name
begin_lineidxlines         rD   get_jit_fn_file_lineru  *  s    G+..** +..

##//I--J w/	::<""6**J  	 0   rF   c                  0    \ rS rSrS r\S 5       rS rSrg)BoundConstexprFunctioni<  c                    Xl         X l        g rH   )__self____func__)rA   instancer"  s      rD   r3   BoundConstexprFunction.__init__>  s     rF   c                .    U R                   R                  $ rH   )rz  r`   rJ   s    rD   r`    BoundConstexprFunction.cache_keyB  s    }}&&&rF   c                B    U R                   " U R                  /UQ70 UD6$ rH   rz  ry  r   s      rD   r!  BoundConstexprFunction.__call__F  s    }}T]]<T<V<<rF   r  N)	r^   rN   r   r   r3   r   r`   r!  r   r   rF   rD   rw  rw  <  s      ' '=rF   rw  c                  <   ^  \ rS rSrU 4S jrS rSS.S jrSrU =r$ )ConstexprFunctioniJ  c                $   > [         TU ]  U5        g rH   )r2   r3   )rA   r"  rC   s     rD   r3   ConstexprFunction.__init__L  s    rF   c                "    Ub  [        X5      $ U $ rH   )rw  )rA   r)  objclasss      rD   __get__ConstexprFunction.__get__O  s    ?)#44rF   N)	_semanticc               *   SSK JnJn  U Vs/ s H
  od" U5      PM     nnUR                  5        VVs0 s H  u  pxXt" U5      _M     nnnU R                  " U0 UD6n	Uc  U	$ [
        R                  R                  (       a  U	$ U" U	5      $ s  snf s  snnf )Nr   )_unwrap_if_constexprrl   )r   r  rl   r!  r"  r   r  r-  )
rA   r  r   r
  r  rl   r(  rb   r  ress
             rD   r!  ConstexprFunction.__call__U  s    H156A$Q'6;A<<>J>!!)!,,>J ggt&v&J ==""J~ 7Js
   B
Br   )	r^   rN   r   r   r3   r  r!  r   r   r   s   @rD   r  r  J  s     )-  rF   r  c                    [        U 5      $ )z
Wraps an arbitrary Python function so that it can be called at
compile-time on constexpr arguments in a Triton function and
returns a constexpr result.
)r  rJ  s    rD   constexpr_functionr  h  s     R  rF   r   )Fr1  )r  Optional[Callable]r  r  r   Optional[Iterable[int | str]]r   r  r  Optional[bool]rZ   r  r   zCallable[[T], JITFunction[T]]rH   )r"  zOptional[T]r  r  r  r  r   r  r   r  r  r  rZ   r  r   zKernelInterface[T])H
__future__r   r   r   r0   r5   rP   r   rT  rY  rW  collectionsr   dataclassesr   	functoolsr   typingr   r	   r
   r   r   r   r   r   r   triton.backendsr   typesr   rO   r   r   r   _utilsr   r   r   r   cacher   triton._C.libtritonr   r   r   r=   r<   r    NodeVisitorr"   r   r   r   r  r+  rG  rK  r[   r  r  r  r  r  r'  r  rJ  rm  ru  rw  r  r  r   rF   rD   <module>r     s;   , 
      	  # ! % ] ] ] '     ` `   [ [!3CLg! g!^D4/> />dTZgaj Z&$9*x0i1 i1X !#    0	\D+q1 \DH	 
 
 
 #*.7;DH #
 
 (	

 5
 %B
 
 
 #
 

 4 #*.7;DH #44 	4
 (4 54 %B4 4 4 4x B"E "EJA!$=[ = <!rF   