o
    n0j`"                     @   sh  d dl Z d dlZd dlZd dlZd dlZd dlmZ d dlm	Z	 d dl
mZmZmZ d dlZd dlmZ d dlmZmZmZ g dZg dZee Zg dZed	g Zee Zed	g Zd
dgZdge dg e d	g Zeeeh d Z dd Z!dd Z"dd Z#dd Z$dd Z%dd Z&dd Z'dd Z(dd  Z)d!d" Z*d#d$ Z+d%d& Z,d'd( Z-d)d* Z.d+d, Z/d-d. Z0d/d0 Z1d1d2 Z2d3d4 Z3dTd5ee fd6d7Z4dUd8ej5d9eeej6f fd:d;Z7d8e8d9ej9fd<d=Z:d9e8fd>d?Z;d@dA Z<dVdCdDZ=dEdF Z>dVdGdHZ?ej@jAe=  e? dIZBdJeCdKeCfdLdMZDdNeej6ejEjFjf d9ej6fdOdPZGdUdQeee8  fdRdSZHdS )W    Nknobs)OptionalSetUnion)RandomState)TensorWrapperreinterprettype_canonicalisation_dict)int8Zint16Zint32int64)uint8Zuint16uint32uint64)Zfloat16float32float64bfloat16Zfloat8_e4m3fnZfloat8_e5m2boolr   >   r   r   r   c                   C   s   t jdddkS )NZTRITON_INTERPRET01)osenvironget r   r   _/home/wildlama/miniconda3/envs/lam_a2e/lib/python3.10/site-packages/triton/_internal_testing.pyis_interpreter      r   c                   C   s   t  rd S tjjj S N)r   tritonruntimeZdriverZactiveget_current_targetr   r   r   r   r       s   r    c                  C      t  } | d u r	dS | jdkS )NFcudar    backendtargetr   r   r   is_cuda$      r'   c                   C      t  otj d dkS )Nr      r'   torchr"   get_device_capabilityr   r   r   r   is_ampere_or_newer)      r.   c                   C      t  otj d dkS )Nr   
   r+   r   r   r   r   is_blackwell-   r/   r2   c                   C   r)   Nr   	   r+   r   r   r   r   is_hopper_or_newer1   r/   r5   c                   C   r0   r3   r+   r   r   r   r   	is_hopper5   r/   r6   c                   C   r0   )Nr      r+   r   r   r   r   is_sm12x9   r/   r8   c                  C   r!   )NFhipr#   r%   r   r   r   is_hip=   r(   r:   c                  C   "   t  } | d uo| jdko| jdkS )Nr9   Zgfx90ar    r$   archr%   r   r   r   is_hip_cdna2B      r>   c                  C   r;   )Nr9   Zgfx942r<   r%   r   r   r   is_hip_cdna3G   r?   r@   c                  C   r;   )Nr9   Zgfx950r<   r%   r   r   r   is_hip_cdna4L   r?   rA   c                  C   "   t  } | d uo| jdkod| jv S )Nr9   Zgfx11r<   r%   r   r   r   is_hip_gfx11Q   r?   rC   c                  C   rB   )Nr9   Zgfx12r<   r%   r   r   r   is_hip_gfx12V   r?   rD   c                  C   rB   )Nr9   Zgfx1250r<   r%   r   r   r   is_hip_gfx1250[   r?   rE   c                   C   s   t  pt pt S r   )r>   r@   rA   r   r   r   r   is_hip_cdna`   r   rF   c                   C   s   t  rdS dS )Ni  i   )rA   r   r   r   r   get_hip_lds_sized   s   rG   c                  C   r!   )NFZxpur#   r%   r   r   r   is_xpuh   r(   rH   c                  C   s   t  } | d u r	dS t| jS )N )r    strr=   r%   r   r   r   get_archm   r(   rK   rsc                 C   s8  t | tr| f} |du rtdd}|tt v rOttt|}|du r&|jnt	||j}|du r3|j	nt||j	}tt|}|j
||| |d}d||dk< |S |rad|v ra|j
dd	| tjd}|S |tv ro|dd| |S |d
kr|dd| ddtd@ dS |dv r|dd| dkS td| )zp
    Override `rs` if you're calling this function twice and don't want the same
    result for both calls.
    N   )seed)dtype   r   float8   (   r   r   r   l      )r   Zint1Zbool_g        zUnknown dtype )
isinstanceintr   
int_dtypesuint_dtypesnpiinfogetattrminmaxrandintr   float_dtypesnormalastypeviewr   RuntimeError)shapeZ	dtype_strrL   lowhighrY   rO   xr   r   r   numpy_randomr   s,   


*rg   rf   returnc                 C   s   | j j}|tv r"|d}| tt|}ttj	||dtt
|S |r5d|v r5ttj	| |dtt
|S |dkrF|dkrFtj	| |d S tj	| |dS )z
    Note: We need dst_type because the type of x can be different from dst_type.
          For example: x is of type `float32`, dst_type is `bfloat16`.
          If dst_type is None, we infer dst_type from x.
    u)devicerQ   r   r   )rO   namerW   lstripr`   rZ   rX   r	   r,   Ztensortlr   )rf   rj   Zdst_typetZsigned_type_nameZx_signedr   r   r   	to_triton   s   
ro   c                 C   s   t t|  d S r   )rm   Z	str_to_tyr
   rf   r   r   r   str_to_triton_dtype   s   rq   c                 C   sL   t | tjjr
| jS t | tjrtdt| }|	dS t
dt|  )Nz^torch\.(\w+)$rP   znot a triton or torch dtype: )rT   r   languagerO   rk   r,   rematchrJ   group	TypeErrortype)rO   mr   r   r   torch_dtype_name   s   
ry   c                 C   sl   t | tr| j  ttt| j	S t | t
jr/| j	t
ju r)|    S |   S td|  )Nz Not a triton-compatible tensor: )rT   r   basecpunumpyr`   rZ   rX   ry   rO   r,   Tensorr   float
ValueErrorrp   r   r   r   to_numpy   s   
 r   Fc                 C   sl   t  rdS t s
dS tjjj}| rdnd}ttt|	d}t
|dks)J |tj d dko5||kS )	NTF)r7   r   )r7      .   r   r4   )r   r'   r   ZnvidiaZptxasversiontuplemaprU   splitlenr,   r"   r-   )
byval_onlyZcuda_versionZmin_cuda_versionZcuda_version_tupler   r   r   supports_tma   s   
r   c                   C   s&   t  rdS t s
dS tj d dkS )NTFr   r4   )r   r'   r,   r"   r-   r   r   r   r   supports_ws   s
   r   c                 C   s   | rdS dS )NzURequires __grid_constant__ TMA support (NVIDIA Hopper or higher, CUDA 12.0 or higher)zLRequires advanced TMA support (NVIDIA Hopper or higher, CUDA 12.3 or higher)r   )r   r   r   r   tma_skip_msg   s   r   )reasonsizealignc                 C   s   t j| t jddS )Nr"   )rO   rj   )r,   emptyr   )r   r   _r   r   r   default_alloc_fn   r   r   rn   c                 C   s   t | tjjjr| jS | S r   )rT   r   r   jitr   rz   )rn   r   r   r   unwrap_tensor   s   r   skipped_attrc                    st   ddl m d u rt t fddj D g  j fdd} fdd}||fS )	Nr   r   c                    s4   i | ]\}}t | jr| jkr|vr||qS r   )rT   Z
base_knobs).0rk   knobset)r   r   r   r   
<dictcomp>   s    
z%_fresh_knobs_impl.<locals>.<dictcomp>c                     sj     D ]+\} }t| |   |j D ]}|jtjv r(j	|jdd q 
|j qqd_S )NF)ZraisingT)itemssetattrcopyresetZknob_descriptorsvalueskeyr   r   Zdelenvappendpropagate_env)rk   r   Zknob)env_to_unsetr   	knobs_mapmonkeypatchr   r   fresh_function   s   z)_fresh_knobs_impl.<locals>.fresh_functionc                     sL     D ]
\} }t| | q   D ]}|tjv r tj|= q_d S r   )r   r   Zundor   r   r   )rk   r   k)r   r   r   r   prev_propagate_envr   r   reset_function  s   

z)_fresh_knobs_impl.<locals>.reset_function)r   r   setpytestZMonkeyPatch__dict__r   r   )r   r   r   r   )r   r   r   r   r   r   r   _fresh_knobs_impl   s   	r   )NNNr   )F)Ir   rs   r|   rX   r,   r   Ztriton.languagerr   rm   r   typingr   r   r   r   Znumpy.randomr   Ztriton.runtime.jitr   r	   r
   rV   rW   Zintegral_dtypesr^   Zfloat_dtypes_with_bfloat16ZdtypesZdtypes_with_bfloat16Ztorch_float8_dtypesZtorch_dtypessortedr   Z
tma_dtypesr   r    r'   r.   r2   r5   r6   r8   r:   r>   r@   rA   rC   rD   rE   rF   rG   rH   rK   rg   Zndarrayr}   ro   rJ   rO   rq   ry   r   r   r   r   markZskipifZrequires_tmarU   r   r   r   r   r   r   r   r   r   <module>   sh    

 

$