
    3j                       % S SK Jr  S SKrS SKrS SKrS SKJrJrJrJ	r	J
r
Jr  S SKrS SKrS SKrS SKJr  S SKrS SKJr  S SKJr  S SKJr  S SKJr  SS	KJr  S S
KJr  SSKJ r!  SSKJ"r#  SSK$J%r%  \
" S5      r&\ " S S5      5       r' " S S5      r( " S S5      r)\" SS9 " S S5      5       r*S r+S r,S r-S r.S r/S r0\Rb                  " \/\Rd                  /S 9r3\Rb                  " \/\Rh                  /S 9r5\Rb                  " \0\Rl                  /S 9r7 " S! S"5      r8 " S# S$5      r9\:" 5       r;\9" 5       r<\" \<5      r=S%\>S&'    " S' S(5      r?S@S) jr@S@S* jrAS@S+ jrB " S, S-5      rC " S. S/\C5      rD " S0 S1\C5      rES@S2 jrFS@S3 jrGS4 rHS5 rIS6 rJS7 rK " S8 S95      rL " S: S;\R                  5      rN " S< S=5      rO " S> S?\\&   5      rPg)A    )annotationsN)TupleListDictCallableTypeVarOptional)	dataclass)TritonSemantic)KernelInterface)TensorDescriptor   )InterpreterError)partial   )interpreter)ir)_tuple_createTc                  v    \ rS rSr% SrS\S'   S\S'   \R                  " \S9r	S\S	'   S
 r
S rS rS rS rSrg)TensorHandle   z
data: numpy array
dtype: triton type, either pointer_type or scalar_type.
we don't store block_type here because the shape information is already available in the data field
attr: a dictionary of attributes
z
np.ndarraydataztl.dtypedtype)default_factoryr   attrc                    [        U R                  U R                  5      (       dI  [        SU R                  R                  S-   SU R                  R
                   SU R                   35      eg )Nznumpy data itemsize (   z) bits) exceeds dtype primitive_bitwidth (z bits) for triton type )_validate_np_data_sizer   r   
ValueErroritemsizeprimitive_bitwidthselfs    T/home/wildlama/miniconda3/lib/python3.13/site-packages/triton/runtime/interpreter.py__post_init__TensorHandle.__post_init__'   sm    %dii<<4TYY5G5G!5K4L M!!%!>!> ??VW[WaWaVbd e e =    c                H    [        U R                  R                  5       5      $ N)boolr   allr#   s    r%   __bool__TensorHandle.__bool__,   s    DIIMMO$$r(   c                ~    U R                   n[        US5      (       a  UR                  n[        US5      (       a  M  U$ )N
element_ty)r   hasattrr0   )r$   r   s     r%   get_element_tyTensorHandle.get_element_ty/   s7    

e\**$$E e\**r(   c                ^    [        U R                  R                  5       U R                  5      $ r*   )r   r   copyr   r#   s    r%   cloneTensorHandle.clone5   s    DIINN,djj99r(   c                     X R                   U'   g r*   )r   )r$   keyvalues      r%   set_attrTensorHandle.set_attr8   s    		#r(    N)__name__
__module____qualname____firstlineno____doc____annotations__dataclassesfielddictr   r&   r-   r2   r6   r;   __static_attributes__r=   r(   r%   r   r      sC     O""48D$8e
%:r(   r   c                       \ rS rSrS rS rSrg)BlockPointerHandle<   c                L    Xl         X l        X0l        X@l        XPl        X`l        g r*   )baseshapestridesoffsetsblock_shapeorder)r$   rL   rM   rN   rO   rP   rQ   s          r%   __init__BlockPointerHandle.__init__>   s!    	
&
r(   c                d   U R                   R                  5       nUR                  S-  n[        R                  " U R                   R
                  U R                  5      n[        R                  " U R                  [        S9n[        [        U R                  5      5       H  nS/[        U R                  5      -  nU R                  U   Xv'   U R                  U   R
                  [        R                  " U R                  U   5      -   R                  U5      nXCU-  U R                  U   R
                  -  R                  [        R                   5      -   nXa;   d  M  XXU R"                  U   R
                  :  -  US:  -  nM     [%        X@R                   R&                  R(                  5      n	X4$ )Nr   r   r   r   )rL   r2   r"   npbroadcast_tor   rP   onesr+   rangelenrO   arangereshaperN   astypeuint64rM   r   r   scalar)
r$   boundary_checkdtype_ttn_bytes	ptrs_datamasksdim
bcast_dimsoffptrs_handles
             r%   materialize_pointers'BlockPointerHandle.materialize_pointersF   sS   99++---2OODIINND4D4DE	((5T--./Cs4#3#344J"..s3JO<<$))BIId6F6Fs6K,LLUUV`aC!s]T\\#5F5K5K%K$S$STVT]T]$^^I$tzz#';';!;<qI 0 #9iioo.D.DE!!r(   )rL   rP   rO   rQ   rM   rN   N)r>   r?   r@   rA   rR   ri   rG   r=   r(   r%   rI   rI   <   s    "r(   rI   c                  2    \ rS rSr  SS jrS rSS jrSrg)	TensorDescHandleV   c                `    Xl         [        U5      U l        X l        X0l        X@l        XPl        g r*   )rL   rZ   ndimrM   rN   rP   padding)r$   rL   rM   rN   rP   rp   s         r%   rR   TensorDescHandle.__init__X   s'    	J	
&r(   c                r   U R                   R                  R                  5       S-  S:X  d   S5       e[        U R                  5      U R
                  :X  d   e[        U R                  5      U R
                  :X  d   eU R
                  S:  d   S5       eU R                   R                  R                  nUR                  S-  nU R                  S S  H0  nUR                  R                  5       U-  nUS-  S:X  a  M+   S5       e   U R                  S   R                  R                  5       S:X  d   S	5       eg )
N   r   zbase must be 16-byte alignedr   z"descriptor cannot be 0 dimensionalr   zstride must be 16-byte alignedzlast dim must be contiguous)
rL   r   itemrZ   rN   ro   rP   r   r0   r"   )r$   	scalar_tyr!   stridebyte_strides        r%   validateTensorDescHandle.validatea   s   yy~~""$r)Q.N0NN.4<< DII---4##$		111yyA~CCC~IIOO..	//14ll3B'F ++**,x7K#q(J*JJ( ( ||B$$))+q0O2OO0r(   c                   [        U5      U R                  :X  d   eU R                  R                  R                  nUR
                  S-  nUS   R                  U-  S-  S:X  d   S5       e[        R                  " U R                  R                  U R                  5      n[        R                  " U R                  [        S9n[        [        U R                  5      5       H  nS/[        U R                  5      -  nU R                  U   Xv'   X   R                  [        R                  " U R                  U   5      -   R                  U5      nXCU-  U R                  U   R                  -  R!                  [        R"                  5      -   nUSU:*  -  XR$                  U   R                  :  -  nM     UR                  [        R"                  :X  d   e['        X@R                  R                  R(                  5      n	X4$ )Nr   rt   rs   r   z*block offset start must be 16-byte alignedrU   r   )rZ   ro   rL   r   r0   r"   r   rV   rW   rP   rX   r+   rY   r[   r\   rN   r]   r^   rM   r   r_   )
r$   rO   rv   r!   rc   rd   re   rf   rg   rh   s
             r%   ri   %TensorDescHandle.materialize_pointersn   s   7|tyy(((IIOO..	//14  8+r1Q6d8dd6OODIINND4D4DE	((5T--./Cs4#3#344J"..s3JO<$$ryy1A1A#1F'GGPPQ[\C!^dll36G6L6L%L$T$TUWU^U^$__IQ#X&#

30D0D*DEE 0 "))+++"9iioo.D.DE!!r(   )rL   rP   ro   rp   rM   rN   N)rL   r   rM   List[TensorHandle]rN   r}   rP   	List[int])rO   r}   )r>   r?   r@   rA   rR   ry   ri   rG   r=   r(   r%   rl   rl   V   s    'P"r(   rl   T)frozenc                      \ rS rSr% SrS\S'   SrS\S'   SrS\S	'   SrS
\S'   Sr	S\S'   Sr
S\S'   SrS\S'   SrS\S'   SrS\S'   SrS\S'   Srg)InterpreterOptions   NzOptional[dict]extern_libsFr+   debugTsanitize_overflowzOptional[str]arch)fp8e5fp8e5b16fp8e4nvfp8e4b8fp8e4b15zTuple[str, ...]supported_fp8_dtypesr=   !deprecated_fp8_dot_operand_dtypestf32strdefault_dot_input_precision)r   tf32x3ieeeallowed_dot_input_precisionsr   intmax_num_imprecise_acc_defaultr   backend_name)r>   r?   r@   rA   r   rC   r   r   r   r   r   r   r   r   r   rG   r=   r(   r%   r   r      sl    "&K&E4"t"D-,c/c9;%;'--4N /N)*!3*%L#%r(   r   c                    [        U[        R                  5      (       a  gU R                  S-  nUR                  nUS:  a  SnX#:  a  gg)NTr   F)
isinstancetlpointer_typer!   r"   )np_arraytl_dtypenp_dtype_bitwidthtl_dtype_bitwidths       r%   r   r      sO    (BOO,, ))A- 33 1,r(   c                &   U [         R                  :X  a  [         R                  $ U [         R                  :X  a  [         R                  $ U [         R
                  :X  a  [         R                  $ U [         R                  :X  a  [         R                  $ U $ r*   )	rV   uint8int8uint16int16uint32int32r^   int64rU   s    r%   _get_signed_np_dtyper      s[    ww		xx		xx		xxLr(   c                   [        U [        R                  5      (       a$  [        R                  " [        R
                  5      $ 0 [        R                  [        R                  " [        5      _[        R                  [        R                  " [        R                  5      _[        R                  [        R                  " [        R                  5      _[        R                  [        R                  " [        R                  5      _[        R                  [        R                  " [        R                  5      _[        R                  [        R                  " [        R                  5      _[        R                  [        R                  " [        R                  5      _[        R                  [        R                  " [        R                  5      _[        R                  [        R                  " [        R                  5      _[        R                   [        R                  " [        R                   5      _[        R"                  [        R                  " [        R"                  5      _[        R
                  [        R                  " [        R
                  5      _[        R$                  [        R                  " [        R                  5      _[        R&                  [        R                  " [        R                  5      _[        R(                  [        R                  " [        R                  5      _[        R*                  [        R                  " [        R                  5      _[        R,                  [        R                  " [        R                  5      _[        R.                  [        R                  " [        R                  5      0En[        U [        R0                  5      (       a[  [        U R2                  [        R                  5      (       a$  [        R                  " [        R
                  5      $ XR2                     $ X   $ r*   )r   r   r   rV   r   r^   int1r+   float16float32float64r   r   r   r   r   r   r   bfloat16float8e5float8e5b16
float8e4nv
float8e4b8float8e4b15
block_typer0   )tt_dtypenp_typess     r%   _get_np_dtyper      s   (BOO,,xx		""
$


BHHRZZ( 	

BHHRZZ( 	

BHHRZZ(	
 	"''" 	"((288$ 	"((288$ 			288BII& 	"((288$ 			288BII& 	"((288$ 			288BII& 	RXXbii(  	RXXbhh'!" 	*#$ 	rxx)%& 	rxx)'( 	*)H, (BMM**h))2??;;88BII&&++,,r(   c                   [        [        SUR                   35      n[        [        SUR                   35      n[        R                  " U R	                  5       US9nXaR                  S-
  -	  S-  nUR                  UR
                  -
  S-
  nUR                  UR
                  -
  S-
  n	USUR
                  -  S-
  -  n
UR                  nUR                  nXaR
                  -	  SU-  S-
  -  R                  [        R                  5      nUS:H  n[        R                  " U5      (       a  [        R                  " U[        R                  S9n[        UR
                  5       H   nU
U-	  S-  nUR
                  U-
  UUS:H  '   M"     U
S:H  nSX   -
  X'   X-
  UUU-  '   X   X   -  SUR
                  -  S-
  -  X'   [        R                  " S[        R                  " X-
  U-   SU	-  S-
  5      5      nUR                  U5      nUR                  U5      nUR                  UR                  :  a  XR
                  UR
                  -
  -	  SUR
                  -  S-
  -  nU[        R                  R                   :X  a*  U
SUR
                  UR
                  -
  S-
  -  -  nUUS:  -   nUR                  U5      nO>U
R                  U5      UR
                  UR
                  -
  -  SUR
                  -  S-
  -  nUS:H  n[        R                  " U5      (       a  XaR
                  -	  SU-  S-
  -  R                  [        R                  5      nUS:g  nUU-  n[        R                  " U[        R                  S9nSU-
  X   U-
  -
  UU'   UU   UU   -	  SUR
                  UU   -
  -  -  UU'   UUR                  S-
  -  UUR
                  -  -  U-  nUR#                  U R$                  5      $ )NuintrU   r   r   )getattrrV   r"   
frombuffertobytesfp_mantissa_widthexponent_biasr]   r   any
zeros_likerY   maximumminimum_irROUNDING_MODERTNEr\   rM   )inputinput_dtypeoutput_dtyperounding_modeinput_uint_dtypeoutput_unint_dtype	input_binsigninput_exponent_widthoutput_exponent_widthsignificand
bias_inputbias_outputexponentsubnormal_indexbit_posi	bit_indexzero_significand_indexexponent_outputsign_outputsignificand_outputcut_offnon_zero_exponent_indexshiftoutputs                             r%   _convert_floatr      s   rT+*H*H)I#JK tL,K,K+L%MNemmo5EFI881<=ED&99K<Y<YY\]](;;l>\>\\_``[%B%B BaGHK**J,,K;;;FZAZ^_@_`hhikiqiqrH!mO	vvo
 --	:{445A%*d2I&1&C&Ca&GGIN# 6 "-!1$%(@$@!=G=U'/9:(3(DH`(`+///14(6$ jjBJJ0E0SWX\qWquvVv$wxO%,,-?@O++01K%%(G(GG).K.KlNlNl.lm,000A57C--222!Q;+H+H<KiKi+ilm+m%noG!3w{!C/667IJ)001CD+==@]@]]_#$(F(F#F!"KM &*O	vvo
 "?"??QJ^E^bcDcdllmomumuv"*a-),CCirxx8"#k/h6OR\6\!]o/A//RV[\kVl/l,0053IIJ/L?+l==AB<999;=OPF>>%++&&r(   c                .    [         R                  " U 5      $ r*   )matherfxs    r%   _erfr     s    88A;r(   c                6    [        U 5      [        U5      -  S-	  $ )N@   )r   )abs     r%   
_umulhi_64r     s     FSVO""r(   )otypesc                  $    \ rS rSr\S 5       rSrg)ExtraFunctionsi  c                x    [         R                  " UR                  R                  U R                  X5      U5      $ r*   )r   tensorbuildercreate_fp_to_fphandle)r   dst_tyfp_downcast_rounding	_semantics       r%   _convert_custom_types$ExtraFunctions._convert_custom_types  s+    yy**::5<<fhnoor(   r=   N)r>   r?   r@   rA   staticmethodr   rG   r=   r(   r%   r   r     s    p pr(   r   c                     \ rS rSr\R
                  R                  \R
                  R                  \R
                  R                  \R
                  R                  \R
                  R                  \R
                  R                  \R
                  R                  \R
                  R                  0r\R                  R                  \R                  R                  \R                  R                  \R                  R                  \R                  R                   \R                  R                   \R                  R"                  \R                  R"                  \R                  R$                  \R                  R$                  \R                  R&                  \R                  R&                  \R                  R(                  \R                  R(                  \R                  R*                  \R                  R*                  \R                  R,                  \R                  R,                  \R                  R.                  \R                  R.                  0
rSS jrS rS rS rS rS rS rS	 r S
 r!S r"S r#S r$S r%S r&S r'S r(S r)S r*S r+S r,S r-S r.S r/S r0S r1S r2S r3S r4S r5S r6S  r7S! r8S" r9S# r:S$ r;S% r<S& r=S' r>S( r?S) r@S* rAS+ rBS, rCS- rDS. rES/ rFS0 rGS1 rHS2 rIS3 rJS4 rKS5 rLS6 rMS7 rNS8 rOS9 rPS: rQS; rRS< rSS= rTS> rUS? rVS@ rWSA rXSB rYSC rZSD r[SE r\SF r]SG r^SH r_SI r`SJ raSK rbSL rcSM rdSN reSO rfSP rgSQ rhSR riSS rjST rkSU rlSV rmSW rnSX roSY rpSZ rqS[ rrS\ rsS] rtS^ ruS_ rvS` rwSa rxSb rySc rzSd r{Se r|Sf r}\Lr~\LrSg rSh rSi rSj rSk rSl rSm rSn rSo rSp rSq rSr rSs rSt rSu rSv rSw rSx rSy rSz rS{ rS| rS} rS~ rS rS rS rS rS rS rS rS rS rS rS rS rS rS rS rS rS rS rS rS rS rS rS rS r S     SS jjrSS jrSS jrSS jr  SS jrS rSrg)InterpreterBuilderi"  c                    S U l         [        5       U l        0 U l        [        R
                  U R                  S'   S U R                  S'   g )Nconvert_custom_typesc                    g)N)r   r   r   r=   )lhsTyperhsTypes     r%   <lambda>-InterpreterBuilder.__init__.<locals>.<lambda><  s    Ir(   min_dot_size)r   r   optionscodegen_fnsr   r   r#   s    r%   rR   InterpreterBuilder.__init__7  sB    	)+3A3W3W/0+M(r(   c                    XR                   S   :  d  [        S5      eX R                   S   :  d  [        S5      eX0R                   S   :  d  [        S5      eXU4U l        g )Nr   zx >= grid_dim[0]r   zy >= grid_dim[1]r   zz >= grid_dim[2])grid_dimr    grid_idxr$   r   yzs       r%   set_grid_idxInterpreterBuilder.set_grid_idx>  s^    ==##/00==##/00==##/00q	r(   c                    XU4U l         g r*   )r  )r$   nxnynzs       r%   set_grid_dimInterpreterBuilder.set_grid_dimG  s    r(   c                "    [         R                  $ r*   )r   r   r#   s    r%   get_half_tyInterpreterBuilder.get_half_tyL      zzr(   c                "    [         R                  $ r*   )r   r   r#   s    r%   get_bf16_tyInterpreterBuilder.get_bf16_tyO      {{r(   c                "    [         R                  $ r*   )r   r   r#   s    r%   get_float_tyInterpreterBuilder.get_float_tyR  r  r(   c                "    [         R                  $ r*   )r   r   r#   s    r%   get_double_ty InterpreterBuilder.get_double_tyU  r  r(   c                "    [         R                  $ r*   )r   r   r#   s    r%   get_int1_tyInterpreterBuilder.get_int1_tyX      wwr(   c                "    [         R                  $ r*   )r   r   r#   s    r%   get_int8_tyInterpreterBuilder.get_int8_ty[  r%  r(   c                "    [         R                  $ r*   )r   r   r#   s    r%   get_uint8_tyInterpreterBuilder.get_uint8_ty^      xxr(   c                "    [         R                  $ r*   )r   r   r#   s    r%   get_int16_tyInterpreterBuilder.get_int16_tya  r,  r(   c                "    [         R                  $ r*   )r   r   r#   s    r%   get_uint16_ty InterpreterBuilder.get_uint16_tyd      yyr(   c                "    [         R                  $ r*   )r   r   r#   s    r%   get_int32_tyInterpreterBuilder.get_int32_tyg  r,  r(   c                "    [         R                  $ r*   )r   r   r#   s    r%   get_uint32_ty InterpreterBuilder.get_uint32_tyj  r3  r(   c                "    [         R                  $ r*   )r   r   r#   s    r%   get_int64_tyInterpreterBuilder.get_int64_tym  r,  r(   c                "    [         R                  $ r*   )r   r^   r#   s    r%   get_uint64_ty InterpreterBuilder.get_uint64_typ  r3  r(   c                "    [         R                  $ r*   )r   r   r#   s    r%   get_fp8e4nv_ty!InterpreterBuilder.get_fp8e4nv_tys      }}r(   c                "    [         R                  $ r*   )r   r   r#   s    r%   get_fp8e4b15_ty"InterpreterBuilder.get_fp8e4b15_tyv      ~~r(   c                "    [         R                  $ r*   )r   r   r#   s    r%   get_fp8e4b8_ty!InterpreterBuilder.get_fp8e4b8_tyy  rC  r(   c                "    [         R                  $ r*   )r   r   r#   s    r%   get_fp8e5_tyInterpreterBuilder.get_fp8e5_ty|  r  r(   c                "    [         R                  $ r*   )r   r   r#   s    r%   get_fp8e5b16_ty"InterpreterBuilder.get_fp8e5b16_ty  rG  r(   c                .    [         R                  " X5      $ r*   )r   r   )r$   elt_ty
addr_spaces      r%   
get_ptr_tyInterpreterBuilder.get_ptr_ty  s    v22r(   c                .    [         R                  " X5      $ r*   )r   r   )r$   r   rM   s      r%   get_block_tyInterpreterBuilder.get_block_ty  s    }}U**r(   c                z    [        [        R                  " U/[        R                  S9[        R
                  5      $ NrU   )r   rV   arraybool_r   r   r$   r:   s     r%   get_int1InterpreterBuilder.get_int1  s$    BHHeWBHH=rwwGGr(   c                z    [        [        R                  " U/[        R                  S9[        R                  5      $ rZ  )r   rV   r[  r   r   r]  s     r%   	get_uint8InterpreterBuilder.get_uint8  $    BHHeWBHH=rxxHHr(   c                z    [        [        R                  " U/[        R                  S9[        R                  5      $ rZ  )r   rV   r[  r   r   r]  s     r%   get_int8InterpreterBuilder.get_int8  s$    BHHeWBGG<bggFFr(   c                z    [        [        R                  " U/[        R                  S9[        R                  5      $ rZ  )r   rV   r[  r   r   r]  s     r%   
get_uint16InterpreterBuilder.get_uint16  $    BHHeWBII>		JJr(   c                z    [        [        R                  " U/[        R                  S9[        R                  5      $ rZ  )r   rV   r[  r   r   r]  s     r%   	get_int16InterpreterBuilder.get_int16  rc  r(   c                z    [        [        R                  " U/[        R                  S9[        R                  5      $ rZ  )r   rV   r[  r   r   r]  s     r%   
get_uint32InterpreterBuilder.get_uint32  rj  r(   c                z    [        [        R                  " U/[        R                  S9[        R                  5      $ rZ  )r   rV   r[  r   r   r]  s     r%   	get_int32InterpreterBuilder.get_int32  rc  r(   c                z    [        [        R                  " U/[        R                  S9[        R                  5      $ rZ  )r   rV   r[  r^   r   r]  s     r%   
get_uint64InterpreterBuilder.get_uint64  rj  r(   c                z    [        [        R                  " U/[        R                  S9[        R                  5      $ rZ  )r   rV   r[  r   r   r]  s     r%   	get_int64InterpreterBuilder.get_int64  rc  r(   c                z    [        [        R                  " U/[        R                  S9[        R                  5      $ rZ  )r   rV   r[  r   r   r]  s     r%   get_fp16InterpreterBuilder.get_fp16  $    BHHeWBJJ?LLr(   c                z    [        [        R                  " U/[        R                  S9[        R                  5      $ rZ  )r   rV   r[  r   r   r]  s     r%   get_fp32InterpreterBuilder.get_fp32  r}  r(   c                z    [        [        R                  " U/[        R                  S9[        R                  5      $ rZ  )r   rV   r[  r   r   r]  s     r%   get_fp64InterpreterBuilder.get_fp64  r}  r(   c                T    [        [        R                  " S/[        U5      S9U5      $ Nr   rU   )r   rV   r[  r   )r$   types     r%   get_null_value!InterpreterBuilder.get_null_value  s!    BHHaSd0CDdKKr(   c                    U R                   c  [        S5      e[        [        R                  " U R                   U   /[        R
                  S9[        R
                  5      $ )Nzgrid_idx is NonerU   )r  r    r   rV   r[  r   r   r$   axiss     r%   create_get_program_id(InterpreterBuilder.create_get_program_id  sD    == /00BHHdmmD&9%:"((KRXXVVr(   c                    [        [        R                  " U R                  U   /[        R                  S9[
        R                  5      $ rZ  )r   rV   r[  r  r   r   r  s     r%   create_get_num_programs*InterpreterBuilder.create_get_num_programs  s.    BHHdmmD&9%:"((KRXXVVr(   c                    [        [        R                  " UR                  [        S9[
        R                  5      nS nU R                  XXbX45      $ rZ  )r   rV   	ones_liker   r+   r   r   create_masked_load)r$   ptr_0_1is_volatilemaskothers          r%   create_loadInterpreterBuilder.create_load  s;    BLL>H&&s%RMMr(   c                    [        [        R                  " UR                  [        S9[
        R                  5      nU R                  XUS S 5      $ rZ  )r   rV   r  r   r+   r   r   create_masked_store)r$   r  valr  r  r  s         r%   create_storeInterpreterBuilder.create_store  s8    BLL>H''$dCCr(   c                   UR                  5       n[        U5      nUc)  [        [        R                  " UR
                  US9U5      n[        R                  " UR
                  UR
                  UR
                  U5      n	[        X5      $ rZ  )r2   r   r   rV   r   r   _interpreterload)
r$   ptrsr  r  cache_modifiereviction_policyr  ra   dtype_nprets
             r%   r  %InterpreterBuilder.create_masked_load  sg    &&( *= tyy!I8TE		499ejj(KC**r(   c                n    [         R                  " UR                  UR                  UR                  5      $ r*   )r  storer   )r$   r  r:   r  r  r  s         r%   r  &InterpreterBuilder.create_masked_store  s#    !!$))UZZCCr(   c                   UR                   R                  nUR                  nU[        R                  :X  a  U[        R                  :X  d(  U[        R                  :X  aX  U[        R                  :X  aD  [        UR                  X4S 5      R                  [        U5      5      n[        XRR                  5      $ [        UR                  R                  [        U5      5      UR                  5      $ r*   )r   r_   r   r   r   r   r   viewr   r   r]   )r$   srcdst_typesrc_element_typedst_element_typer   s         r%   	cast_implInterpreterBuilder.cast_impl  s    99++#??+0@BJJ0N

*/?2;;/N!#((,<PTUZZ[hiq[rsDoo66h0G H(//ZZr(   c                $    U R                  X5      $ r*   r  r$   r  r  s      r%   r   InterpreterBuilder.<lambda>      $..2Or(   c                $    U R                  X5      $ r*   r  r  s      r%   r   r    r  r(   c                $    U R                  X5      $ r*   r  r  s      r%   r   r    r  r(   c                $    U R                  X5      $ r*   r  r  s      r%   r   r    r  r(   c                $    U R                  X5      $ r*   r  r  s      r%   r   r    s    s0Mr(   c                $    U R                  X5      $ r*   r  r  s      r%   r   r    r  r(   c                $    U R                  X5      $ r*   r  )r$   r  r  	is_signeds       r%   r   r    s    T^^C=Zr(   c                    UR                   R                  nUR                  n[        UR                  XEU5      R	                  [        U5      5      n[        XbR                  5      $ r*   )r   r_   r   r   r  r   r   )r$   r  r  r   r  r  r   s          r%   r   "InterpreterBuilder.create_fp_to_fp  sP    99++#??chh(8MZ__`mnv`wxD//22r(   c                r    [        UR                  R                  [        U5      5      UR                  5      $ r*   )r   r   r  r   r_   r  s      r%   create_bitcast!InterpreterBuilder.create_bitcast  s%    CHHMM-*ABHOOTTr(   c                    U" UR                   UR                   5      nUR                  R                  n[        XE5      (       d  UR	                  [        U5      5      n[        XE5      $ r*   r   r   r_   r   r]   r   r   )r$   lhsrhsopr   r   s         r%   	binary_opInterpreterBuilder.binary_op  sN    CHHchh'99##%f77]]=#:;FF--r(   c                B    U R                  X[        R                  5      $ r*   r  rV   addr$   r  r  s      r%   r   r    s    "&&)Ir(   c                B    U R                  X[        R                  5      $ r*   r  rV   multiplyr  s      r%   r   r        "++)Nr(   c                B    U R                  X[        R                  5      $ r*   r  rV   divider  s      r%   r   r    s    ")))Lr(   c                B    U R                  X[        R                  5      $ r*   r  rV   fmodr  s      r%   r   r        "'')Jr(   c                B    U R                  X[        R                  5      $ r*   r  rV   subtractr  s      r%   r   r    r  r(   c                B    U R                  X[        R                  5      $ r*   r  r  s      r%   r   r        s(Mr(   c                B    U R                  X[        R                  5      $ r*   r  r  s      r%   r   r    s    "))1Tr(   c                $    U R                  X5      $ r*   create_idivr  s      r%   r   r        )9)9#)Cr(   c                $    U R                  X5      $ r*   r  r  s      r%   r   r    r  r(   c                B    U R                  X[        R                  5      $ r*   r  r  s      r%   r   r    r  r(   c                B    U R                  X[        R                  5      $ r*   r  r  s      r%   r   r    r  r(   c                B    U R                  X[        R                  5      $ r*   r  r  s      r%   r   r    s    s(Hr(   c                B    U R                  X[        R                  5      $ r*   r  r  s      r%   r   r     r  r(   c                B    U R                  X[        R                  5      $ r*   )r  rV   
left_shiftr  s      r%   r   r    s    s(Or(   c                B    U R                  X[        R                  5      $ r*   )r  rV   right_shiftr  s      r%   r   r    s    "..)Qr(   c                B    U R                  X[        R                  5      $ r*   r  rV   r   r  s      r%   r   r        $..2::*Nr(   c                B    U R                  X[        R                  5      $ r*   r  r  s      r%   r   r    r  r(   c                B    U R                  X[        R                  5      $ r*   r  r  s      r%   r   r        T^^Cbjj-Qr(   c                B    U R                  X[        R                  5      $ r*   r  r  s      r%   r   r        DNN3RZZ,Pr(   c                B    U R                  X[        R                  5      $ r*   r  rV   r   r  s      r%   r   r    r  r(   c                B    U R                  X[        R                  5      $ r*   r  r  s      r%   r   r    r  r(   c                B    U R                  X[        R                  5      $ r*   r  r  s      r%   r   r  	  r  r(   c                B    U R                  X[        R                  5      $ r*   r  r  s      r%   r   r  
  r  r(   c                B    U R                  X[        R                  5      $ r*   r  rV   
less_equalr  s      r%   r   r        DNN3R]],Sr(   c                B    U R                  X[        R                  5      $ r*   r  rV   lessr  s      r%   r   r        DNN3RWW,Mr(   c                B    U R                  X[        R                  5      $ r*   r  rV   greater_equalr  s      r%   r   r        DNN3REUEU,Vr(   c                B    U R                  X[        R                  5      $ r*   r  rV   greaterr  s      r%   r   r    r  r(   c                B    U R                  X[        R                  5      $ r*   r  r  s      r%   r   r    r  r(   c                B    U R                  X[        R                  5      $ r*   r  r  s      r%   r   r    r  r(   c                B    U R                  X[        R                  5      $ r*   r  r  s      r%   r   r    r  r(   c                B    U R                  X[        R                  5      $ r*   r	  r  s      r%   r   r    r  r(   c                B    U R                  X[        R                  5      $ r*   r  rV   equalr  s      r%   r   r    s    4>>#BHH+Mr(   c                B    U R                  X[        R                  5      $ r*   r  rV   	not_equalr  s      r%   r   r    s    4>>#BLL+Qr(   c                B    U R                  X[        R                  5      $ r*   r  r  s      r%   r   r    r  r(   c                B    U R                  X[        R                  5      $ r*   r	  r  s      r%   r   r    r  r(   c                B    U R                  X[        R                  5      $ r*   r  r  s      r%   r   r    r  r(   c                B    U R                  X[        R                  5      $ r*   r  r  s      r%   r   r    r  r(   c                B    U R                  X[        R                  5      $ r*   r  r  s      r%   r   r        DNN3RXX,Nr(   c                B    U R                  X[        R                  5      $ r*   r  r  s      r%   r   r        DNN3R\\,Rr(   c                B    U R                  X[        R                  5      $ r*   r  r  s      r%   r   r    r  r(   c                B    U R                  X[        R                  5      $ r*   r	  r  s      r%   r   r    r  r(   c                B    U R                  X[        R                  5      $ r*   r  r  s      r%   r   r    r  r(   c                B    U R                  X[        R                  5      $ r*   r  r  s      r%   r   r    r  r(   c                B    U R                  X[        R                  5      $ r*   r  r  s      r%   r   r    r  r(   c                B    U R                  X[        R                  5      $ r*   r  r  s      r%   r   r     r  r(   c                B    U R                  X[        R                  5      $ r*   )r  rV   bitwise_andr  s      r%   r   r  !      s(Pr(   c                B    U R                  X[        R                  5      $ r*   )r  rV   bitwise_xorr  s      r%   r   r  "  r%  r(   c                B    U R                  X[        R                  5      $ r*   )r  rV   
bitwise_orr  s      r%   r   r  #  s    t~~c'Nr(   c                    [        UR                  [        R                  " UR                  UR                  5      -
  UR                  -  UR                  R
                  5      $ r*   )r   r   rV   r  r   r_   r  s      r%   r  InterpreterBuilder.create_idiv'  sC     SXX#(((CCPRUR[R[RbRbccr(   c                >   [        UR                  R                  5      n[        UR                  R                  5      nUR                  R                  U5      Ul        UR                  R                  U5      Ul        U R	                  X[
        R                  5      $ r*   )r   r   r   r]   r  rV   r  )r$   r  r  	lhs_dtype	rhs_dtypes        r%   create_ashrInterpreterBuilder.create_ashr-  sc    (8	(8	88??9-88??9-~~c77r(   c                V   UR                   R                  nU[        R                  :X  d  U[        R                  :X  a>  [        [        UR                   UR                   5      UR                  R                  5      $ [        [        SUR                  S-  S-   35      nUR                   R                  U5      nUR                   R                  U5      n[        R                  " XV5      UR                  S-  -	  n[        UR                  U5      UR                  R                  5      $ )Nr   r   r   )r   r   rV   r   r^   r   np_umulhi_u64r_   r   r!   r]   r  )r$   r  r  r   compute_dtypelhs_datarhs_dataret_datas           r%   create_umulhi InterpreterBuilder.create_umulhi5  s    BHH 2chh A399CSCSTT#B$u~~/AA/E.F(GHMxx}5Hxx}5H{{865>>A;MNH 6		8H8HIIr(   c                    U" UR                   UR                   UR                   5      nUR                  R                  n[        XV5      (       d  UR	                  [        U5      5      n[        XV5      $ r*   r  )r$   r  r  r  r  r   r   s          r%   
ternary_opInterpreterBuilder.ternary_opA  sT    CHHchh

3;;%%%f77]]=#:;FF--r(   c                D    U R                  XU[        R                  5      $ r*   )r:  rV   clip)r$   arglohipropagate_nanss        r%   r   r  J  s    doocWY[][b[b>cr(   c                D    U R                  XU[        R                  5      $ r*   )r:  rV   where)r$   condr  r  s       r%   r   r  K  s    CQSQYQY1Zr(   c                    [        UR                  UR                  -  UR                  -   UR                  R                  5      $ r*   r   r   r   r_   r	  s       r%   
create_fmaInterpreterBuilder.create_fmaM  s,    AFFQVVOaff4aggnnEEr(   c                b    [        U" UR                  5      UR                  R                  5      $ r*   rF  )r$   r>  r  s      r%   unary_opInterpreterBuilder.unary_opQ  s!    BsxxL#))*:*:;;r(   c                .   UR                   nUR                  S-
  n[        [        SUR                   35      nUR                  R                  U5      nSU-  S-
  nXV-  R                  [        U5      5      n[        XqR                   R                  5      $ )Nr   r   )	r   r"   r   rV   r   r  r   r   r_   )r$   r>  ra   mask_bitwidthnp_uint_dtyper   r  r  s           r%   create_fabsInterpreterBuilder.create_fabsT  s    99 33a7d8+F+F*G$HIxx}}]+]"a'{  x!89C!1!122r(   c                B    U R                  U[        R                  5      $ r*   )rJ  rV   cosr$   r>  s     r%   r   r  ^      4==bff#=r(   c                B    U R                  U[        R                  5      $ r*   )rJ  rV   exprS  s     r%   r   r  _  rT  r(   c                B    U R                  U[        R                  5      $ r*   )rJ  rV   exp2rS  s     r%   r   r  `      DMM#rww$?r(   c                B    U R                  U[        R                  5      $ r*   )rJ  rV   absrS  s     r%   r   r  a  s    DMM#rvv$>r(   c                B    U R                  U[        R                  5      $ r*   )rJ  rV   floorrS  s     r%   r   r  b  s    T]]3%Ar(   c                B    U R                  U[        R                  5      $ r*   )rJ  rV   ceilrS  s     r%   r   r  c  rY  r(   c                B    U R                  U[        R                  5      $ r*   )rJ  rV   logrS  s     r%   r   r  d  rT  r(   c                B    U R                  U[        R                  5      $ r*   )rJ  rV   log2rS  s     r%   r   r  e  rY  r(   c                B    U R                  U[        R                  5      $ r*   rJ  rV   sqrtrS  s     r%   r   r  f  s    DMM#rww,Gr(   c                B    U R                  U[        R                  5      $ r*   re  rS  s     r%   r   r  g  rY  r(   c                B    U R                  U[        R                  5      $ r*   )rJ  rV   sinrS  s     r%   r   r  h  rT  r(   c                    UR                   R                  [        R                  :X  a  [	        UR                   5      O[        UR                   5      n[        X!R                  R                  5      $ r*   )r   r   rV   r   np_erf_fp32np_erf_fp64r   r_   )r$   r>  r  s      r%   
create_erfInterpreterBuilder.create_erfj  sF    '*xx~~'Ck#((#UXU]U]I^C!1!122r(   c                    [        S[        R                  " UR                  5      -  UR                  R
                  5      $ Nr   )r   rV   rf  r   r   r_   rS  s     r%   create_rsqrtInterpreterBuilder.create_rsqrtn  s+    A 113993C3CDDr(   c                t    [        UR                  R                  U5      UR                  R                  5      $ r*   )r   r   r\   r   r_   )r$   r>  rM   allow_reorders       r%   r   r  r  s(    \#((JZJZ[`Jacfclclcscs=tr(   c                    [        [        R                  " UR                  U5      UR                  R
                  5      $ r*   )r   rV   	transposer   r   r_   )r$   r>  perms      r%   create_transInterpreterBuilder.create_transt  s(    BLL48#)):J:JKKr(   c                   UR                   nUR                   nUR                  R                  S:X  a  UR                  R                  5       (       d9  UR                  R                  S:X  a  UR                  R                  5       (       a  [	        XaR                  [
        R                  S 5      R                  [        R                  5      n[	        XrR                  [
        R                  S 5      R                  [        R                  5      n[        [        R                  " XgUR                   R                  S9UR                   -   UR                  R                  5      $ )Nr   rU   )r   r   r"   is_floatingr   r   r   r  rV   r   matmulr_   )r$   r   r   dinput_precisionmax_num_imprecise_acca_datab_datas           r%   
create_dotInterpreterBuilder.create_dotw  s    GG&&!+0C0C0E0EGG&&!+0C0C0E0E#FGGRZZFKKBJJWF#FGGRZZFKKBJJWFBIIfAFFLLIAFFRTUT[T[TbTbccr(   c                x    [        [        R                  " X#[        R                  S9[        R                  5      $ rZ  )r   rV   r[   r   r   )r$   ret_tystartstops       r%   create_make_range$InterpreterBuilder.create_make_range  s"    BIIeBBHHMMr(   c                T   Uc;  [        [        R                  " UR                  [        S9[
        R                  5      n[        R                  " UR                  UR                  R                  S9n[        R                  " UR                  UR                  [        R                  " UR                  5      5      n[        R                  " XSU4US9S   nUS==   [        R                  " UR                  5      R                  5       -  ss'   [        U[
        R                  5      $ )NrU   r   )binsrY   weights)r   rV   r  r   r+   r   r   r   rC  r   	histogramlogical_notsumr   )r$   r   r  r  dummy_weightsr  s         r%   create_histogram#InterpreterBuilder.create_histogram  s    <TYYd CRWWMD TYYdiiooF xx		499bmmDII.FGLLD	=YZ[\	!tyy15577Irxx00r(   c                    [        [        R                  " UR                  UR                  US9UR                  R
                  5      $ )Nr  )r   rV   take_along_axisr   r   r_   )r$   r  indicesr  s       r%   create_gather InterpreterBuilder.create_gather  s3    B..sxxDQSVS\S\ScScddr(   c                    UR                  5       nUR                  n[        SUS-  5      n[        UR                  XRR                  R                  [        R                  5      -  -   UR                  5      $ )Nr   r   )	r2   r"   maxr   r   r]   rV   r^   r   )r$   r  offsetra   element_bitwidthelement_bytewidths         r%   create_addptr InterpreterBuilder.create_addptr  sc    %%'#66#3q#89CHH'8;;;M;Mbii;X'XXZ]ZcZcddr(   c                   UR                  U5      u  pxUR                  5       n	[        U	5      n
Uc  S nOU[        R                  R
                  :X  a*  [        [        R                  " UR                  U
S9U	5      nO`U[        R                  R                  :X  a4  [        [        R                  " UR                  [        S5      U
S9U	5      nO[        SU 35      eU R                  XxXXV5      $ )NrU   nanzunsupported padding option )ri   r2   r   r   PADDING_OPTIONPAD_ZEROr   rV   r   r   PAD_NAN	full_likefloatr    r  )r$   r  r`   padding_optionr  r  r  r  rd   ra   r  r  s               r%   create_tensor_pointer_load-InterpreterBuilder.create_tensor_pointer_load  s    ..~>&&( *!Es11::: tyy!I8TEs11999 diiuX!VX`aE:>:JKLL&&tE?hhr(   c                N    UR                  U5      u  pgU R                  XbXtU5      $ r*   ri   r  )r$   r  r:   r`   r  r  r  rd   s           r%   create_tensor_pointer_store.InterpreterBuilder.create_tensor_pointer_store  s)    ..~>''UO\\r(   c                    [        [        R                  " UR                  U5      UR                  R
                  5      $ r*   )r   rV   expand_dimsr   r   r_   )r$   r>  r  s      r%   create_expand_dims%InterpreterBuilder.create_expand_dims  s(    BNN388T:CII<L<LMMr(   c                    [        [        R                  " UR                  U5      UR                  R
                  5      $ r*   )r   rV   rW   r   r   r_   )r$   r>  rM   s      r%   create_broadcast#InterpreterBuilder.create_broadcast  s(    BOOCHHe<cii>N>NOOr(   c                    [        [        R                  " UR                  UR                  /5      UR                  R
                  5      $ r*   )r   rV   concatenater   r   r_   r  s      r%   
create_catInterpreterBuilder.create_cat  s/    BNNCHHchh+?@#))BRBRSSr(   c                    [        [        R                  " UR                  UR                  /SS9UR                  R
                  5      $ )Nrt   r  )r   rV   stackr   r   r_   r  s      r%   create_joinInterpreterBuilder.create_join  s1    BHHchh%9CSYYEUEUVVr(   c                    [        UR                  S   UR                  R                  5      [        UR                  S   UR                  R                  5      4$ )N).r   ).r   rF  )r$   r  s     r%   create_splitInterpreterBuilder.create_split  sE    SXXf-syy/?/?@,sxxX^O_adajajaqaqBrssr(   c           	        UR                   n[        UR                  [        R                  5      (       aS  [        [        R                  " X2R                  S   [        UR                  5      S9UR                  R                  5      $ [        [        R                  " X2R                  [        UR                  5      S9UR                  R                  5      $ r  )rM   r   r   r   r   r   rV   fullr   r   r_   )r$   r  r>  rM   s       r%   create_splatInterpreterBuilder.create_splat  s    cii//xx{-PSPYPYBZ []`]f]f]m]mnnxx}SYY?W XZ]ZcZcZjZjkkr(   c           	         [        [        R                  " SUR                  S   [	        UR
                  5      S9UR
                  R                  5      $ )Nr   r   rU   )r   rV   r  r   r   r   r_   rS  s     r%   create_unsplat!InterpreterBuilder.create_unsplat  s:    BGGE388A;mCII>VWY\YbYbYiYijjr(   c                   X@R                   ;  a  [        SU 35      eU R                   U   n[        [        R                  " UR
                  UR
                  UR
                  U5      UR                  R                  5      $ )Nunsupported semantic )ir_sem_to_interpreter_semr    r   r  
atomic_casr   r   r_   )r$   r  cmpr  semscopes         r%   create_atomic_cas$InterpreterBuilder.create_atomic_cas  si    4444SE:;;,,S1L33CHHchhRUVX[XaXaXhXhiir(   c           	     \   XR                   ;  a  [        SU 35      eXPR                  ;  a  [        SU 35      eU R                   U   nU R                  U   n[        [        R
                  " XR                  UR                  UR                  U5      UR                  R                  5      $ )Nzunsupported rmwOp r  )	ir_rmw_op_to_interpreter_rmw_opr    r  r   r  
atomic_rmwr   r   r_   )r$   rmwOpr  r  r  r  r  s          r%   create_atomic_rmw$InterpreterBuilder.create_atomic_rmw  s    <<<1%9::4444SE:;;44U;,,S1L33E88SXXtyyZ]^`c`i`i`p`pqqr(   c                    [        S5      e)Nz4extern_elementwise not supported in interpreter modeNotImplementedError)r$   libNamelibPathsymbolargListretTypeisPures          r%   create_extern_elementwise,InterpreterBuilder.create_extern_elementwise  s    !"XYYr(   c                    [        S5      e)Nz,inline_asm not supported in interpreter moder  )r$   	inlineAsmconstraintsvaluesr  r  packs          r%   create_inline_asm$InterpreterBuilder.create_inline_asm      !"PQQr(   c                D   SU R                   S    SU R                   S    SU R                   S    S3nU(       a  USU 3-  nU(       a  [        R                  " SS	 0S
9  U H  n[        USUR                   3-   5        M      U(       a  [        R                  " S S
9  g g )N(r   z, r   r   ) r,   c                    SU S 3$ )N0x02xr=   r   s    r%   r   1InterpreterBuilder.create_print.<locals>.<lambda>  s    b3Lr(   )	formatter)r  rV   set_printoptionsprintr   )r$   prefixhexr  isSignedmsgr:   s          r%   create_printInterpreterBuilder.create_print  s    
 $--"#2dmmA&6%7r$--:J9K1MQvh<C52H*IJE#!EJJ<(() $/ r(   c                "    U(       d   U 5       eg r*   r=   )r$   	conditionmessages      r%   create_assert InterpreterBuilder.create_assert  s    &WI&yr(   c                     U(       d   S5       eg )NzAssume failedr=   )r$   r  s     r%   create_assume InterpreterBuilder.create_assume  s    )/)yr(   c                    g r*   r=   r#   s    r%   create_barrier!InterpreterBuilder.create_barrier  s    r(   c                d    U Vs/ s H  owR                  5       PM     nn[        XX8XV5      $ s  snf r*   )r6   rI   )	r$   rL   rM   rN   rO   rP   rQ   r  new_offsetss	            r%   create_make_block_ptr(InterpreterBuilder.create_make_block_ptr  s.    4;<G&||~G<!$w[XX =s   -c                   [        UR                  5      [        U5      :w  a  [        S5      eUR                   Vs/ s H  o3R                  5       PM     nn[	        UR
                  UR                  UR                  XAR                  UR                  5      n[        [        U5      5       H1  nUR                  U   =R                  X&   R                  -  sl        M3     U$ s  snf )Nz len(ptr.offsets) != len(offsets))rZ   rO   r    r6   rI   rL   rM   rN   rP   rQ   rY   r   )r$   r  rO   r  r  r  r   s          r%   create_advance!InterpreterBuilder.create_advance  s    s{{s7|+?@@47KK@K&||~K@ 399ckk;P_P_adajajks7|$AKKN7:??2 %
	 As   C#c                @    [        XX4U5      nUR                  5         U$ r*   )rl   ry   )r$   rL   rM   rN   tensor_shaper  rp   descs           r%   create_make_tensor_descriptor0InterpreterBuilder.create_make_tensor_descriptor  s    WGLr(   c           	        [        U[        5      (       d   eUR                  U5      u  pVUR                  5       n[	        U5      nUR
                  n	U	[        R                  R                  :X  a*  [        [        R                  " UR                  US9U5      n
O`U	[        R                  R                  :X  a4  [        [        R                  " UR                  [        S5      US9U5      n
O[!        SU	 35      eU R#                  XVXUSS9$ )NrU   r  zunsupported padding F)r  r  r  )r   rl   ri   r2   r   rp   r   r  r  r   rV   r   r   r  r  r  r    r  )r$   r  r  r  r  r  r  ra   r  rp   r  s              r%   create_descriptor_load)InterpreterBuilder.create_descriptor_load  s    $ 01111..w7
&&( *,,c((111 tyy!I8TE**222 diiuX!VX`aE3G9=>>&&t57FTY ' [ 	[r(   c                P    UR                  U5      u  pEU R                  XBUS S 5      $ r*   r  )r$   r  r:   r  r  r  s         r%   create_descriptor_store*InterpreterBuilder.create_descriptor_store#  s+    ..w7
''T4FFr(   c                   UR                   R                  R                  n[        U5      n[        R
                  " UR                  R                  S   UR                  S   /US9nS nS n	[        UR                  5       HC  u  p[        U[        R                  5      U/nU R                  XX5      R                  XzS S 24'   ME     [        Xu5      $ )Nr   rt   rU   )rL   r   r0   r   rV   zerosr   rM   rP   	enumerater   r   r   r  )r$   r  	x_offsetsy_offsetr  r   np_dtyperesultr  r  r   x_offsetr  s                r%   create_descriptor_gather+InterpreterBuilder.create_descriptor_gather'  s    		** '9>>//2D4D4DR4HIQYZ$Y^^4KA#Hbhh7BG66tnfkkFa4L 5 F**r(   c                    [        UR                  5       HV  u  pV[        UR                  U   UR                  5      n[        U[        R
                  5      U/nU R                  XU5        MX     g r*   )r  r   r   r   r   r   r  )	r$   r  r:   r  r  r   r  slicer  s	            r%   create_descriptor_scatter,InterpreterBuilder.create_descriptor_scatter2  sT    $Y^^4KA A<E#Hbhh7BG((g> 5r(   c                $   [        U5      nSUR                  ;   a*  [        [        R                  " SSUS9UR
                  5      $ U[        R                  :X  a*  [        [        R                  " SSUS9UR
                  5      $ [        SU 35      e)Nr   r   rt   rU   Tzunsupported type )r   namer   rV   r  r_   r\  	TypeError)r$   r  np_types      r%   get_all_ones_value%InterpreterBuilder.get_all_ones_value9  ss    %GLL 2W =t{{KK 4w ?MM/v677r(   )r   r  r  r  r  NreturnNone)zero)rL   r   rM   r}   rN   r}   r  r~   r  r+   rp   r   )r  rl   r  r}   )r  rl   r:   r   r  r}   )r  rl   r  r   r  r   )r  rl   r:   r   r  r   r  r   )r>   r?   r@   rA   r   MEM_SEMANTICACQUIREr  RELEASERELAXEDACQUIRE_RELEASEr  	ATOMIC_OPADDRMW_OPFADDMINUMINMAXUMAXANDORXORXCHGr  rR   r  r  r  r  r  r   r#  r'  r*  r.  r1  r5  r8  r;  r>  rA  rE  rI  rL  rO  rT  rW  r^  ra  re  rh  rl  ro  rr  ru  rx  r{  r  r  r  r  r  r  r  r  r  r  create_si_to_fpcreate_ui_to_fpcreate_fp_to_sicreate_fp_to_uicreate_fp_extcreate_fp_trunccreate_int_castr   r  r  create_faddcreate_fmulcreate_fdivcreate_fremcreate_fsub
create_mulcreate_precise_divfcreate_sdivcreate_udivcreate_sremcreate_urem
create_add
create_sub
create_shlcreate_lshrcreate_minsicreate_minuicreate_minimumfcreate_minnumfcreate_maxsicreate_maxuicreate_maximumfcreate_maxnumfcreate_icmpSLEcreate_icmpSLTcreate_icmpSGEcreate_icmpSGTcreate_icmpULEcreate_icmpULTcreate_icmpUGEcreate_icmpUGTcreate_icmpEQcreate_icmpNEcreate_fcmpOLTcreate_fcmpOGTcreate_fcmpOLEcreate_fcmpOGEcreate_fcmpOEQcreate_fcmpONEcreate_fcmpULTcreate_fcmpUGTcreate_fcmpULEcreate_fcmpUGEcreate_fcmpUEQcreate_fcmpUNE
create_and
create_xor	create_orcreate_int_to_ptrcreate_ptr_to_intr  r/  r7  r:  create_clampfcreate_selectrG  rJ  rO  
create_cos
create_expcreate_exp2create_iabscreate_floorcreate_ceil
create_logcreate_log2create_precise_sqrtcreate_sqrt
create_sinrm  rq  create_reshaperx  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r   r$  r*  rG   r=   r(   r%   r   r   "  s     ,";";"C"C  ,";";"C"C  ,";";"C"C((,*C*C*S*S	! 	<..22L//44<..22L//44<..22L//44<..22,--00<..22L//44'#N"%
3+HIGKIKIKIMMMLW
WN
D+D[ POOOOOOOMMOOZO3U. JKNKLKJKNKMJTCKCKJKJKHJMJOJQKNLNLQOPNNLNLQOPNSNMNVNPNSNMNVNPNMMQMMNPNSNVNNNRNMNPNSNVNNNRNPJPJNI&&d8	J. dMZMF<3 >J=J?K>KAL?K=J?KG?K=J3E uNLdN1$e
ei]NPTWtlkjrZR0'*Y
 `f4=JNY\[ G	+?,8?8r(   r   r   interpreter_semanticc                  6    \ rS rSrSrSS jrS	S jrSS jrSrg)
_LangPatchScopeiH  z2Tracks patched attributes so they can be restored.c                    / U l         g r*   _changesr#   s    r%   rR   _LangPatchScope.__init__K  s	    :<r(   c                v    [        X[        5      nU R                  R                  XU45        [	        XU5        g r*   )r   _MISSINGr  appendsetattr)r$   objr'  r:   originals        r%   r;   _LangPatchScope.set_attrN  s/    3h/c235!r(   c                    U R                   (       aR  U R                   R                  5       u  pnU[        L a  [        X5        O[	        XU5        U R                   (       a  MQ  g g r*   )r  popr  delattrr  )r$   r  r'  r  s       r%   restore_LangPatchScope.restoreS  sE    mm"&--"3"3"5Cx8#"8, mmmr(   r  Nr,  )r  objectr'  r   r:   r  r-  r.  )	r>   r?   r@   rA   rB   rR   r;   r  rG   r=   r(   r%   r  r  H  s    <="
-r(   r  c                6    US.S jnUR                  XU5        g )N)memberc           
         U " U0 UR                  5        VVs0 s H  u  p4US:w  d  M  X4_M     snnDS[        0D6$ s  snnf )Nr   )itemsr  )r  argskwargskvs        r%   r   _patch_attr.<locals>.<lambda>]  s[     :wMS\\^AVM[TQDEDT BFM[AV:w bv:wAVs   <<)r;   )r  r'  r  r   r  
new_members         r%   _patch_attrr  \  s    &, xJ 
NN3j)r(   c                    [         R                  " U 5       H8  u  p4[        R                  R	                  U5      (       d  M+  [        XXAU5        M:     g r*   )inspect
getmembersr   core
is_builtinr  )pkgr   r  r'  r  s        r%   _patch_builtinr  d  s:    **3/77f%%6E: 0r(   c                   ^ S mS nUR                  U SS 5        UR                  U SU4S j5        UR                  U SS 5        UR                  U S	S
 5        UR                  U S[        U5      5        g )Nc                h    U R                   R                  nUR                  S:X  a  [        U5      $ S$ )Nr   T)r   r   sizer+   )r$   r   s     r%   	_get_bool%_patch_lang_tensor.<locals>._get_booll  s,    {{ "YY!^tDz55r(   c                   [        [        R                  " U R                  R                  5      U R                  R
                  5      nU R                  R                  5       (       d   e[        U R                  R                  5      nUS   US   sUS'   US'   [        R                  R                  U R
                  U5      n[        R                  R                  X5      $ )Nrt   )r   rV   rv  r   r   r   r  is_blocklistrM   r   r  r   r   )r$   r   rP   res_tys       r%   _get_transpose*_patch_lang_tensor.<locals>._get_transposer  s    bll4;;+;+;<dkk>O>OPyy!!####499??++6r?KO(BR##DJJ<ww~~f--r(   	__index__c                \    [        U R                  R                  R                  5       5      $ r*   )r   r   r   squeezer#   s    r%   r   $_patch_lang_tensor.<locals>.<lambda>z  s    S9I9I9Q9Q9S5Tr(   r-   c                   > T" U 5      $ r*   r=   )r$   r  s    r%   r   r  {  s	    IdOr(   __repr__c                @    [        U R                  R                  5      $ r*   )reprr   r   r#   s    r%   r   r  |  s    D9I9I4Jr(   __str__c                @    [        U R                  R                  5      $ r*   )r   r   r   r#   s    r%   r   r  }  s    3t{{7G7G3Hr(   r   )r;   property)r   r  r  r  s      @r%   _patch_lang_tensorr  j  sh    6. 
NN6;(TU	NN6:'CD	NN6:'JK	NN69&HI	NN63 89r(   c                  8    \ rS rSrS rS rS rS rS rS r	Sr
g	)
ReduceScanOpInterfacei  c                    Xl         X l        g r*   r  
combine_fn)r$   r  r  s      r%   rR   ReduceScanOpInterface.__init__  s    	$r(   c                L    Ub!  U[        U5      :  a  [        SU SU 35      eg g )Nzaxis z out of bounds for shape )rZ   r    )r$   rM   r  s      r%   
check_axis ReduceScanOpInterface.check_axis  s4    E
 2uTF*CE7KLL !3r(   c                    U Hi  n[        U[        R                  R                  5      (       d  [	        S[        U5       35      eU R                  UR                  U R                  5        Mk     g )Nzinput must be a tensor, got )	r   r   r  r   r    r  r  rM   r  )r$   r   r>  s      r%   check_tensor"ReduceScanOpInterface.check_tensor  sN    Cc277>>22 #?S	{!KLLOOCIItyy1 r(   c                j   [        U5      n[        US5      (       aM  UR                  (       a<  UR                  U5      n[        R
                  " U[        UR                  5      5      nO[        R                  " U/US9nUn[        R                  R                  [        XR                  5      U5      $ )NrM   rU   )r   r1   rM   r]   r   r   r  rV   r[  r  r   r   r_   )r$   r  r   r  ret_types        r%   	to_tensorReduceScanOpInterface.to_tensor  sz     '3  SYY**X&C}}UDO<H((C51CHww~~l3=xHHr(   c                    [        S5      e)Nz,apply_impl must be implemented by subclassesr  r$   r   s     r%   
apply_impl ReduceScanOpInterface.apply_impl  r  r(   c                    [        U[        5      (       d  U R                  U45      S   $ U R                  U5        U R	                  U5      n[        U[
        [        45      (       a  [        U5      $ U4$ Nr   )r   tupleapplyr  r  r  )r$   r   r  s      r%   r  ReduceScanOpInterface.apply  sb    %''::ui(++% ooe$'dE];;uSzH#Hr(   r  N)r>   r?   r@   rA   rR   r  r  r  r  r  rG   r=   r(   r%   r  r    s$    %M2IRIr(   r  c                  J   ^  \ rS rSrU 4S jrS rS rS	S jrS rS r	Sr
U =r$ )
	ReduceOpsi  c                0   > [         TU ]  X5        X0l        g r*   )superrR   	keep_dims)r$   r  r  r  	__class__s       r%   rR   ReduceOps.__init__  s    *"r(   c                    / nU Hh  nUb  UR                  U5        M  SnUR                  U R                  UR                  R                  R	                  5       UR
                  5      5        Mj     [        U5      U4$ r  )r  r  r   r   flattenr   r  )r$   r   r  r  r   s        r%   unravelReduceOps.unravel  sg    D

4 

4>>$++*:*:*B*B*DdjjQR  Sz4r(   c                  ^ ^^^ T R                   nT R                  TT R                   5      u  mn/ n/ nTS   R                  R                  R                  nUSU XcS-   S  -   nT Hi  nUR                  UR                  R                  5        UR                  [        R                  " XxR                  R                  R                  S95        Mk     [        US   R                  5       GHf  n	[        R                  " X5      mTSU TUS-   S  -   m[        UUU 4S j[        U5       5       5      n
TU   S:X  aH  [        [        U5      5       H.  nX   R                  R                  R                  5       X[   T'   M0     M  [        UUU 4S j[        U5       5       5      nT R                   R"                  " / UQU
Q76 n[%        U[        5      (       d  U4OUn[        [        U5      5       H]  n[%        X   [&        R(                  R*                  5      (       a&  X   R                  R                  R                  5       OX   X[   T'   M_     GMi     / n[        U5       H  u  pT R,                  (       aM  Ub  [        R.                  " X5      nOF[        [        U5      5       H  n[        R.                  " US5      nM     OUc  UR                  5       nUR                  T R1                  UTU	   R                  5      5        M     U$ )Nr   r   rU   c              3  l   >#    U  H)  u  pTR                  UT   TU   R                  5      v   M+     g 7fr*   r  r   ).0iir}  r   input_indexr$   s      r%   	<genexpr>+ReduceOps.generic_reduce.<locals>.<genexpr>  s1     s]rTYTVq~uRy O O]r   14c              3  l   >#    U  H)  u  pTR                  UT   TU   R                  5      v   M+     g 7fr*   r  )r  oior   output_indexr$   s      r%   r  r    s1     !w`vW\WY$..<%)//"R"R`vr  )r  r  r   r   rM   r  rV   r  r   rY   r  unravel_indexr  r  rZ   ru   r  fnr   r   r  r   r  r  r  )r$   r   original_axisr  
input_dataoutput_datainput_shapeoutput_shaper>  r   input_tuplej	acc_tuplecombine_fn_retr  r   _r  r  s   ``               @@r%   generic_reduceReduceOps.generic_reduce  s   		ll5$))4t
Ahoo**00"1T*[-CCCcjjoo.rxxJJOO<Q<QRS  z!}))*A**1:K&q.TAXY1GGLs]fgq]rssK4 A%s;/0A3>>3H3H3M3M3R3R3TKN<0 1 "!w`iju`v!ww	!%!3!3!MY!M!M6@QV6W6W^.]k	s;/0AV`!bggnnW6 W69<3F3F3K3K3P3P3R;D<  N<0 1 +"  -GA~~ ,>>$5D"3{#34!~~dA6 5 &yy{JJt~~dE!HNN;< . 
r(   c                   [        U[        5      (       a  US   OUnS nS nU(       aJ  U R                  U" UR                  R                  U R
                  U R                  S9UR                  5      nU(       aN  U R                  U" UR                  R                  U R
                  U R                  S9[        R                  5      nUb  Ub  XE4$ Ub  U$ Ub  U$ [        S5      e)Nr   r  keepdimsz-val_reduce_op and idx_reduce_op are both None)r   r  r  r   r   r  r  r   r   r   r    )r$   r   val_reduce_opidx_reduce_opr  idxs         r%   min_maxReduceOps.min_max  s    &ue44a%..u||/@/@tyy[_[i[i!jlqlwlwxC..u||/@/@tyy[_[i[i!jlnltltuC?s8O_J_JLMMr(   c                    U R                  [        R                  " UR                  R                  U R
                  U R                  S9UR                  5      $ )Nr  )r  rV   r  r   r   r  r  r   r  s     r%   r  ReduceOps.sum  s<    ~~bffU\\%6%6TYYQUQ_Q_`bgbmbmnnr(   c                $   U R                   [        R                  R                  :X  a0  U R	                  US   [
        R                  [
        R                  S9$ U R                   [        R                  R                  :X  a0  U R	                  US   [
        R                  [
        R                  S9$ U R                   [        R                  R                  :X  a"  U R	                  US   [
        R                  S S9$ U R                   [        R                  R                  :X  a"  U R	                  US   [
        R                  S S9$ U R                   [        R                  R                  :X  a  U R!                  US   5      $ U R#                  U5      $ )Nr   )r  r	  )r  r   standard_argmin_combine_tie_break_leftr  rV   minargmin_argmax_combine_tie_break_leftr  argmax_elementwise_maxnanmax_elementwise_minnanmin_sum_combiner  r  r  s     r%   r  ReduceOps.apply_impl  s   ??bkkHHH<<abii<XX__ J JJ<<abii<XX__ < <<<<a		QU<VV__ < <<<<a		QU<VV__ 8 8888E!H%% &&u--r(   )r  r*   )r>   r?   r@   rA   rR   r  r  r  r  r  rG   __classcell__r  s   @r%   r  r    s)    # )VN$o. .r(   r  c                  @   ^  \ rS rSrU 4S jrS rS rS rS rSr	U =r
$ )ScanOpsi  c                0   > [         TU ]  X5        X0l        g r*   )r  rR   reverse)r$   r  r  r!  r  s       r%   rR   ScanOps.__init__  s    *r(   c                    U R                  [        R                  " UR                  R                  U R
                  S9UR                  S9/$ Nr  rU   )r  rV   cumsumr   r   r  r   r  s     r%   r%  ScanOps.cumsum  s8    ryy):):KSXS^S^_``r(   c                    U R                  [        R                  " UR                  R                  U R
                  S9UR                  S9/$ r$  )r  rV   cumprodr   r   r  r   r  s     r%   r(  ScanOps.cumprod  s8    rzz%,,*;*;$))LTYT_T_`aar(   c           	       ^ ^^^ / n/ nTS   R                   R                  R                  nT Hi  nUR                  UR                   R                  5        UR                  [        R
                  " XER                   R                  R                  S95        Mk     [        US   R                  5       GH  n[        R                  " Xd5      m[        UUU 4S j[        U5       5       5      nTT R                     S:X  aH  [        [        U5      5       H.  nXx   R                   R                  R                  5       X8   T'   M0     M  [        UU 4S j[        [        T5      5       5       5      m[        UUU 4S j[        U5       5       5      n	T R                  R                   " / U	QUQ76 n
[#        U
[        5      (       d  U
4OU
n	[        [        U5      5       H]  n[#        X   [$        R&                  R(                  5      (       a&  X   R                   R                  R                  5       OX   X8   T'   M_     GM     / n[        U5       H3  u  pgUR                  T R+                  UTU   R                  5      5        M5     U$ )Nr   rU   c              3  l   >#    U  H)  u  pTR                  UT   TU   R                  5      v   M+     g 7fr*   r  )r  r  r}  indexr   r$   s      r%   r  'ScanOps.generic_scan.<locals>.<genexpr>  s/     fPeur%%)//BBPer  c              3  \   >#    U  H!  oTR                   :X  a  TU   S -
  OTU   v   M#     g7f)r   Nr  )r  r   r,  r$   s     r%   r  r-  #  s-     "kYjTU		>58a<uQx#OYjs   ),c              3  l   >#    U  H)  u  pTR                  UT   TU   R                  5      v   M+     g 7fr*   r  )r  r  r  r   
prev_indexr$   s      r%   r  r-  $  s1     !u^tUZUW$..:b	"P"P^tr  )r   r   rM   r  rV   r  r   rY   r  r  r  r  r  rZ   ru   r  r  r   r   r  r   r  )r$   r   r  r  rM   r>  r   r   r  r   r  r  r,  r0  s   ``          @@r%   generic_scanScanOps.generic_scan  s   
a$$**Ccjjoo.rxxZZ__5J5JKL  z!}))*A$$Q.EfPYZdPeffDTYY1$s;/0A,0GNN,?,?,D,D,FKN5) 1 #"kY^_bch_iYj"kk
!!u^ghs^t!uu	!%!3!3!FY!F!F6@QV6W6W^.]k	s;/0AOY!bggnnP6 P6IL,?,?,D,D,I,I,K;D<  N5) 1 +"  -GAJJt~~dE!HNN;< .
r(   c           
        / nU R                   (       af  U H_  nUR                  U R                  [        R                  " UR
                  R                  U R                  S9UR                  5      5        Ma     OUnU R                  [        R                  R                  :X  a  U R                  US   5      nONU R                  [        R                  R                  :X  a  U R                  US   5      nOU R!                  U5      nU R                   (       aK  U HE  n[        R                  " UR
                  R                  U R                  S9UR
                  l        MG     U$ )Nr  r   )r!  r  r  rV   flipr   r   r  r   r  r   r  r  r%  _prod_combiner(  r1  )r$   r   	new_inputr>  r  s        r%   r  ScanOps.apply_impl0  s    	<<  

dii0XZ]ZcZc!de  I??bkk666++il+C__ 9 99,,y|,C ##I.C<<"$''#**//		"J

 
r(   )r!  )r>   r?   r@   rA   rR   r%  r(  r1  r  rG   r  r  s   @r%   r  r    s#    ab< r(   r  c                    SS jnSS jnU R                  [        SU5        U R                  [        SU5        U R                  [        R                  SU5        U R                  [        R                  SU5        g )Nc                8    [        XU5      R                  U 5      $ r*   )r  r  )r   r  r  r  r  s        r%   _new_reduce'_patch_reduce_scan.<locals>._new_reduceH  s    95;;EBBr(   c                8    [        XU5      R                  U 5      $ r*   )r  r  )r   r  r  r!  r  s        r%   	_new_scan%_patch_reduce_scan.<locals>._new_scanK  s    t177>>r(   reduceassociative_scan)F)r;   r   r  )r  r:  r=  s      r%   _patch_reduce_scanrA  D  sY    C? 
NN2x-	NN2)95	NN277Hk2	NN277.	:r(   c           	        S nSS jnSS jnS nUR                  U SU5        UR                  U SU5        UR                  U SU5        UR                  U S[        5        UR                  U R                  S	U5        UR                  U S
[        USS95        UR                  U S[        USS95        UR                  U S[        USS95        [	        U5        g )Nc                `   U R                   S:X  a  UR                  5       $ U R                   S:X  a  UR                  5       $ U R                   S:X  a  UR                  5       $ U R                   S:X  a  UR	                  5       $ U R                   S:X  a  UR                  5       $ U R                   S:X  a  UR                  5       $ U R                   S:X  a  UR                  5       $ U R                   S:X  a  UR                  5       $ U R                   S	:X  a  UR                  5       $ U R                   S
:X  a  UR                  5       $ U R                   S:X  a  UR                  5       $ U R                   S:X  a  UR                  5       $ U R                   S:X  a  UR                  5       $ U R                   S:X  a  UR                  5       $ U R                   S:X  a  UR                  5       $ U R                   S:X  a  UR!                  5       $ U R                   S:X  a  UR#                  5       $ [%        SU  S35      e)Nvoidr   r   r   r   r   r   r   r   r^   r   r   r   fp16bf16fp32fp64zfail to convert z to ir type)r'  get_void_tyr#  r'  r*  r.  r1  r5  r8  r;  r>  rL  rA  rE  r  r  r  r   r    )r$   r   s     r%   
_new_to_ir$_patch_lang_core.<locals>._new_to_irV  s   99&&((YY& &&((YY& &&((YY'!''))YY'!''))YY("((**YY'!''))YY("((**YY'!''))YY("((**YY'!''))YY)#))++YY*$**,,YY& &&((YY& &&((YY& ''))YY& ((**+D6=>>r(   c                6    Uc  SnUc  SU pTOXpT[        XEU5      $ )Nr   r   )rY   )arg1arg2stepr  r  ends         r%   
_new_range$_patch_lang_core.<locals>._new_range~  s*    <D<D33U&&r(   c                     U (       d   U5       eg r*   r=   )rD  r  s     r%   _new_static_assert,_patch_lang_core.<locals>._new_static_assert  s    Str(   c                   [        U [        R                  5      (       d  U $ [        U[        [        45      (       d  U/OUnU Vs/ s H0  n[        U[        R
                  5      (       a  UR                  OUPM2     nn[        U5      [        S[        U R                  5      5      :w  a  [        SU 35      eU R                  R                  X!5        U $ s  snf )Nr   z$len(values) != len(input.shape) for )r   r   r   r  r  	constexprr:   rZ   r  rM   r    r   r;   )r   r  r'  r  s       r%   	_set_attr#_patch_lang_core.<locals>._set_attr  s    %++L!+FT5M!B!B&IOPAZ2<<88!''a?Pv;#aU[[!122CD6JKKd+	 Qs   7CrY   static_rangestatic_assertstatic_printto_irmultiple_ofztt.divisibility)r'  max_contiguousztt.contiguitymax_constancyztt.constancy)NN) )r;   r  r   r   rA  )langr  rJ  rQ  rT  rX  s         r%   _patch_lang_corerc  T  s    $?P'
 
NN4*-	NN44	NN4*<=	NN4/	NN4::w
3	NN4	@Q(RS	NN4)79?+ST	NN4').*QRur(   c                h   [        5       nU R                  R                  5        VVs/ s H@  u  p#[        R                  " U5      (       d  M"  U[
        [
        R                  4;   d  M>  UPMB     nnn[        U5      S:  d   S5       eU Hu  n[        U[        U5        [        UR                  [        U5        U[
        :X  a  [        UR                  [        U5        [        UR                  U5        [        XQ5        Mw     [        [
        R                  R                  [        U5        U$ s  snnf )Nr   z:triton.language must be visible from within jit'd function)r  __globals__r  r  ismoduler   r  rZ   r  interpreter_builderr   r   r  rc  tensor_descriptor_base)r  r  r  r:   langsrb  s         r%   _patch_langrj    s    E#%>>#7#7#9p#9xqW=M=Me=TUY^cegigngnboYoU#9Epu:?XXX?t0%8t{{$7?2:499&95A4;;.%  277113FNL qs   !D.D.)D.c                   [        U [        5      (       Ga  [        R                  " [        R
                  R                  R                  U 5      S 5      n[        R                  nSU s=::  a  S:  a  O  O[        R                  nOqSU s=::  a  S:  a  O  O[        R                  nOPSU s=::  a  S:  a  O  O[        R                  nO/SU s=::  a  S:  a  O  O[        R                  nO[        SU  35      e[        [        R                  " U /US9U5      n[        R                   " X15      $ [#        U S	5      (       a  [        R                  " [        R
                  R                  R                  U 5      S 5      n[        [        R                  " U R%                  5       /[        R                  S9U5      n[        R                   " X15      $ [        U [&        5      (       a  [)        U [+        [,        U 5      5      $ [        U [.        5      (       a  U R0                   Vs/ s H  n[-        U5      PM     nnU R0                  S
   S:X  d   e[        R2                  " S5      US
'   [4        R7                  [-        U R8                  5      U R:                   Vs/ s H  n[-        U5      PM     snUU R<                   Vs/ s H  n[        R2                  " U5      PM     snU R>                  S9$ U $ s  snf s  snf s  snf )Ni   l        l        l         l            l            zUnsupported integer value rU   data_ptrrt   r   )rL   rM   rN   rP   r  ) r   r   r   	str_to_tytritonruntimejitmangle_typerV   r   r   r   r^   r    r   r[  r   r1   rl  r  r   map_implicit_cvtr   rN   rW  r  make_tensor_descriptorrL   rM   rP   rp   )r>  tyr   r   srN   r   s          r%   rs  rs    s2   #s\\&..,,88=tDS 5 HHEc!E!IIEs"U"HHEc!E!IIE9#?@@bhhuE:B?yy$$sJ\\&..,,88=tDbhh'7ryyI2Nyy$$	C		S#mS"9::	C)	*	*-0[[9[=#[9{{2!###ll1o#::chh@W\_\e\eAf\eWX-PQBR\eAfpwadapapGqap\]UVapGqJM++ ; W 	W J : BgGqs    K.K3
< K8c                    [        U [        R                  R                  R                  5      (       a  U R
                  $ U $ r*   )r   rn  ro  rp  TensorWrapperrL   )ts    r%   _unwrap_tensorrz    s-    !V^^''5566vvHr(   c                    [        U[        R                  R                  R                  5      (       a3  [        R                  R                  R	                  XR
                  5      $ U $ r*   )r   rn  ro  rp  rx  r   )ry  original_tensors     r%   _rewrap_tensorr}    sE    /6>>#5#5#C#CDD~~!!//3H3HIIHr(   c                  2    \ rS rSr/ 4S jrS rS rS rSrg)GridExecutori  c                   SSK Jn  Xl        X l        X0l        X@l        UR                  R                  5        VVs0 s H  u  pgXe" U5      _M     nnnU Vs/ s H  ohR                  U5      S:X  d  M  UPM     snU l	        g s  snnf s  snf )Nr   )_normalize_tyrW  )
rp  r  r  	arg_namesgridpre_run_hooksrC   r  get
constexprs)	r$   r  r  r  r  r  r'  ru  rC   s	            r%   rR   GridExecutor.__init__  s{    &"	*CECUCUC[C[C]^C]xt4r!22C]^,5bID9L9LT9RVa9a4Ib _bs   BB3Bc                   ^^	 0 m	UU	4S jmU Vs/ s H  nT" U5      PM     nn0 nUR                  5        H  u  pgT" U5      XV'   M     XE4$ s  snf )Nc                  > [        U [        5      (       a  [        U [        TU 5      5      $ [        U [        5      (       aR  [	        T" U R
                  5      U R                  U R                  U R                  U R                  U R                  5      $ [        U S5      (       d  U $ [        U 5      nUR                  5       R                  5       T;  a1  UR                  5       nUR                  5       TUR                  5       '   TUR                  5       R                  5          nUR!                  SSS9nUR#                  X!R%                  5       UR'                  5       UR)                  5       5        [+        X0S9nU$ )Nrl  r   cpu)device)r|  )r   r  r   rr  r   rL   rM   rN   rP   rp   round_f32_to_tf32r1   rz  untyped_storagerl  r  	new_emptyset_storage_offsetr  rw   r}  )r>  unwrapped_argstoragecpu_arg_to_cpustoragess       r%   r  ,GridExecutor._init_args_hst.<locals>._to_cpu  s;   #u%%$S#gs*;<<C!122'CHH%IIKKOOKK))  S*--
*3/M,,.779I'779/6{{}))+,}<<>GGIJG#--a->GLL">">"@-BTBTBVXeXlXlXno$WBGNr(   )r  )
r$   args_devr  r>  args_hst
kwargs_hstr9   r:   r  r  s
           @@r%   _init_args_hstGridExecutor._init_args_hst  sY    	4 -55HSGCLH5 
 ,,.JC%enJO )## 6s   Ac                   ^
^ 0 mU
U4S jm
[        X5       H  u  pVT
" XV5        M     UR                  5        H  u  pxXG   n	T
" X5        M     TR                  5        H  u  pVUR                  U5        M     g )Nc                  > [        U S5      (       aU  [        U 5      [        U5      pU R                  5       UR                  5       4TU R                  5       R                  5       '   g [	        U [
        5      (       a  [        X5       H  u  pT" X5        M     g [	        U [        5      (       a  T" U R                  UR                  5        g g )Nrl  )	r1   rz  r  rl  r   r  zipr   rL   )arg_devarg_hst	_from_cpur  s     r%   r  1GridExecutor._restore_args_dev.<locals>._from_cpu  s    w
++#1'#:N7<SBIBYBYB[]d]t]t]vAw002;;=>GU++*-g*?&Wg/ +@G%566',,5 7r(   )r  r  r  copy_)r$   r  r  r  r  r  r  r9   	kwarg_dev	kwarg_hstr  r  s             @@r%   _restore_args_devGridExecutor._restore_args_dev  sn    		6 !$H 7Gg' !8 %llnNC"Ii+ - #+//"3WMM'" #4r(   c                   [         R                  " U R                  5      nUR                  5        VVs0 s H  u  pEXCR                  ;   d  M  XE_M     nnnU R                  X5      u  pgU R                   H  nU" U0 UD6  M     [        U R                  5      n	 [         R                  " U R                  /UQ70 UD6n
U
R                  5        VVs0 s H"  u  pXU R                  ;   a  UO
[        U5      _M$     n
nn[        U R                  5      (       a  U R                  U
5      OU R                  n[        U5      S::  d   S5       eUSS[        U5      -
  -  -   n[        R                  " U6    [!        US   5       HU  n[!        US   5       H@  n[!        US   5       H+  n[        R#                  XU5        U R                  " S0 U
D6  M-     MB     MW      U	R3                  5         U R5                  XX'5        g s  snnf s  snnf ! [$         aD  n[&        R(                  R*                  R,                  (       a  e [/        [1        U5      5      UeS nAff = f! U	R3                  5         f = f)N   z#grid must have at most 3 dimensionsr  r   r   r   r=   )r  getfullargspecr  r  r  r  r  rj  getcallargsr  rs  callabler  rZ   rg  r  rY   r  	Exceptionrn  knobscompilationfront_end_debuggingr   r  r  r  )r$   r  r  argspecr  r  r  r  hookpatch_scoper  r'  r>  r  r   r
  r  es                     r%   __call__GridExecutor.__call__)  s    ((1#)<<>G>41Q,,5F$!$>G#228D&&D()j) ' "$''*	" &&twwHHZHDbfblblbnobnU^UYD!8#mC>PPbnDo&.tyy&9&9499T?tyyDt9>H#HH>%1s4y=11D,,d3	7tAwA"47^!&tAwA/<<Q1E GGOdO "0 , ( !x6F? H p  7<<++??&tAw/Q67
 !sI   G=G=7I )H:A7I 2A'H	 I 	
I?III I,)r  r  r  r  r  N)	r>   r?   r@   rA   rR   r  r  r  rG   r=   r(   r%   r  r    s    :< c#$J#2$Gr(   r  c                      \ rS rSrS rSrg)ASTTransformeriP  c           	        / nUR                    H  nX R                  U5      /-  nM     [        U5      S:  a  [        S5      e[        R
                  " [        R                  " [        R                  " S[        R                  " 5       S9S[        R                  " 5       S9UR                  [        R                  " SS9// S	9Ul	        U$ )
Nr   z&Multiple assignments are not supportedr  )idctxr  )r:   r   r  F)r:   )funcr  keywords)targetsvisitrZ   r    astCall	AttributeNameLoadr:   Constant)r$   nodenamestargets       r%   visit_AssignASTTransformer.visit_AssignR  s    llFjj())E #u:>EFF XXSXX1GSXXZ%X_j#&88:/6:jj#,,UZB[5\gik
 r(   r=   N)r>   r?   r@   rA   r  rG   r=   r(   r%   r  r  P  s    r(   r  c                  L    \ rS rSr\" 5       rS rS rS rS r	S r
S rS rS	rg
)FunctionRewriteri`  c                8    Xl         X l        SU l        SU l        g )Nra  r   )r  r  filenamedef_file_lineno)r$   r  r  s      r%   rR   FunctionRewriter.__init__c  s    $%r(   c                L    [         R                  " U R                  5      u  pU R	                  5       u  U l        U l        U R                  U5      U l        U R                  U5      nU R                  U5      nU R                  U5      $ ! [         a    U R                  s $ f = fr*   )r  getsourcelinesr  r  _get_jit_fn_file_liner  r  	_find_def
def_lineno_prepare_source_transform_ast_compile_and_exec)r$   linesr  r  transformed_asts        r%   rewrite_astFunctionRewriter.rewrite_astj  s    	--dgg6HE /3.H.H.J+t+../""5)--c2%%o66  	77N	s   "B
 
B#"B#c                B    SSK JnJn  U" U" U R                  5      5      $ )Nr   )get_jit_fn_file_lineJITFunction)rp  r  r  r  )r$   r  r  s      r%   r  &FunctionRewriter._get_jit_fn_file_line~  s    :#K$899r(   c                    Sn[        U5       H0  u  p4UR                  5       R                  S5      (       d  M+  US-   nM2     U$ )Nr   zdef r   )r  strip
startswith)r$   r  r  r   lines        r%   r  FunctionRewriter._find_def  s@    
 'GAzz|&&v..U
 ( r(   c                r    XR                   S-
  S  nSR                  U5      n[        R                  " U5      $ )Nr   ra  )r  jointextwrapdedent)r$   r  r  s      r%   r   FunctionRewriter._prepare_source  s2    oo)*+ggens##r(   c                    [         R                  " U5      nU R                  R                  U5      n[         R                  " U5        U R
                  S-
  n[         R                  " X45        U$ rp  )r  parseast_transformerr  fix_missing_locationsr  increment_lineno)r$   r  
parsed_astr  
inc_linenos        r%   r  FunctionRewriter._transform_ast  sY     YYs^
..44Z@!!/2))A-
_9r(   c                   [        XR                  SS9n0 U R                  EnU R                  R                  n[        5       R                  5        H  u  pVXT;  d  M  XdU'   M     [        X$U5        X0R                  R                     $ )Nexec)r  mode)	compiler  r  r  re  globalsr  r  r>   )r$   r  compiled_codelocal_namespace
fn_globalsr9   r:   s          r%   r  "FunctionRewriter._compile_and_exec  so    --fU)T[[/WW((
!)//+JC$"'3 , 	]8ww//00r(   )r  r  r  r  r  N)r>   r?   r@   rA   r  r  rR   r  r  r  r  r  r  rG   r=   r(   r%   r  r  `  s-    $&O&7(:$
	1r(   r  c                  V    \ rS rSr% 0 rS\S'   SS jrS rS rS r	\
S 5       r S	 rS
rg)InterpretedFunctioni  zDict[Callable, Callable]rewritten_fnc                    Xl         [        U40 UD6U l        X l        / U l        [
        R                  " U5      nUR                  R                  5        Vs/ s H  oDR                  PM     snU l
        g s  snf r*   )r  r  rewriterr  r  r  	signature
parametersr  r'  r  )r$   r  r  r  r  s        r%   rR   InterpretedFunction.__init__  sa    (6v6%%b)	*3*>*>*E*E*GH*GQ&&*GHHs   A5c               ~    U(       a  g U R                  5       n[        XPR                  XR                  5      " U0 UD6$ r*   )rewriter  r  r  )r$   r  warmupr  r  r  s         r%   runInterpretedFunction.run  s5    \\^B6H6HI4ZSYZZr(   c                ^    [        U5      (       d   eU R                  R                  U5        g r*   )r  r  r  )r$   r  s     r%   add_pre_run_hook$InterpretedFunction.add_pre_run_hook  s$    ~~~!!$'r(   c                    U R                   U R                  ;  a1  U R                  R                  5       U R                  U R                   '   U R                  U R                      $ r*   )r  r  r   r  r#   s    r%   r  InterpretedFunction.rewrite  sJ    77$+++)-)B)B)DDdgg&  ))r(   c                .    U R                   R                  $ r*   )r  r>   r#   s    r%   r>   InterpretedFunction.__name__  s    wwr(   c                    [        U R                  5        U R                  5       n U" U0 UD6$ ! [         a  n[	        [        U5      5      UeS nAff = fr*   )rj  r  r  r  r   r  )r$   r  r  r  r  s        r%   r  InterpretedFunction.__call__  sO    DGG\\^	3t&v&& 	3"47+2	3s   / 
AAA)r  r  r  r  r   Nr,  )r>   r?   r@   rA   r  rC   rR   r  r
  r  r  r  rG   r=   r(   r%   r  r    s<    -/L*/I[(*
    3r(   r  )r  r  )Q
__future__r   r  r  r  typingr   r   r   r   r   r	   r   numpyrV   rn  triton.languagelanguager   rD   r
   triton.language.semanticr   triton.runtime.jitr   triton.tools.tensor_descriptorr   errorsr   	functoolsr   _C.libtritonr   r  r   r   _utilsr   r   r   rI   rl   r   r   r   r   r   r   r   	vectorizer   rk  r   rl  r^   r2  r   r   r  r  rg  r  rC   r  r  r  r  r  r  r  rA  rc  rj  rs  rz  r}  r  NodeTransformerr  r  r  r=   r(   r%   <module>r      s   " 
   A A      ! 3 . ; $  6 $ "CL   @" "4(" ("V $
& 
& 
& 	@='@
# ll45ll45Z<p p^8 ^8B 8(* '56I'J n J- -(*;:."I "IJ].% ].@;# ;|; K\ BnG nGbS((  B1 B1J'3/!, '3r(   