o
    h                     @   s  d dl Z d dlZd dlZd dlZd dlZd dlmZ d dlm	Z	 d dl
Z
d dlmZ d dlmZmZ d dlmZmZmZ g dZg dZee Zg dZed	g Zee Zed	g Zd
dgZdge dg e d	g Zeeeh d Zdd Z dd Z!dd Z"dd Z#dd Z$dd Z%dd Z&dd Z'dd  Z(d!d" Z)d#d$ Z*d%d& Z+dAd'ee fd(d)Z,dBd*ej-d+eeej.f fd,d-Z/d*e0d+ej1fd.d/Z2d+e0fd0d1Z3d2d3 Z4dCd5d6Z5dCd7d8Z6e
j7j8e5  e6 d9Z9d:e:d;e:fd<d=Z;d>eej.ej<j=jf d+ej.fd?d@Z>dS )D    N)knobs)RandomState)OptionalUnion)TensorWrapperreinterprettype_canonicalisation_dict)int8Zint16Zint32int64)uint8Zuint16uint32uint64)Zfloat16float32float64bfloat16Zfloat8_e4m3fnZfloat8_e5m2boolr   >   r   r
   r   c                   C   s   t jdddkS )NZTRITON_INTERPRET01)osenvironget r   r   ^/home/www/facesmatcher.com/frenv_anti/lib/python3.10/site-packages/triton/_internal_testing.pyis_interpreter      r   c                   C   s   t  rd S tjjj S N)r   tritonruntimeZdriverZactiveget_current_targetr   r   r   r   r      s   r   c                  C      t  } | d u r	dS | jdkS )NFcudar   backendtargetr   r   r   is_cuda$      r%   c                   C   s   t  otj d dkS )Nr   	   )r%   torchr    get_device_capabilityr   r   r   r   	is_hopper)   s   r*   c                  C   r   )NFhipr!   r#   r   r   r   is_hip-   r&   r,   c                  C   "   t  } | d uo| jdko| jdkS )Nr+   Zgfx90ar   r"   archr#   r   r   r   is_hip_cdna22      r0   c                  C   r-   )Nr+   Zgfx942r.   r#   r   r   r   is_hip_cdna37   r1   r2   c                  C   r-   )Nr+   Zgfx950r.   r#   r   r   r   is_hip_cdna4<   r1   r3   c                  C   s,   t  } t| j | d uo| jdkod| jv S )Nr+   Zgfx12)r   printr/   r"   r#   r   r   r   is_hip_gfx12A   s   
r5   c                   C   s   t  pt pt S r   )r0   r2   r3   r   r   r   r   is_hip_cdnaG   r   r6   c                  C   r   )NFZxpur!   r#   r   r   r   is_xpuK   r&   r7   c                  C   s   t  } | d u r	dS t| jS )N )r   strr/   r#   r   r   r   get_archP   r&   r:   rsc                 C   s8  t | tr| f} |du rtdd}|tt v rOttt|}|du r&|jnt	||j}|du r3|j	nt||j	}tt|}|j
||| |d}d||dk< |S |rad|v ra|j
dd	| tjd}|S |tv ro|dd| |S |d
kr|dd| ddtd@ dS |dv r|dd| dkS td| )zp
    Override `rs` if you're calling this function twice and don't want the same
    result for both calls.
    N   )seed)dtype   r   float8   (   r   r   r   l      )r   Zint1Zbool_g        zUnknown dtype )
isinstanceintr   
int_dtypesuint_dtypesnpiinfogetattrminmaxrandintr	   float_dtypesnormalastypeviewr   RuntimeError)shapeZ	dtype_strr;   lowhighrH   r>   xr   r   r   numpy_randomU   s,   


*rV   rU   returnc                 C   s   | j j}|tv r"|d}| tt|}ttj	||dtt
|S |r5d|v r5ttj	| |dtt
|S |dkrF|dkrFtj	| |d S tj	| |dS )z
    Note: We need dst_type because the type of x can be different from dst_type.
          For example: x is of type `float32`, dst_type is `bfloat16`.
          If dst_type is None, we infer dst_type from x.
    u)devicer@   r   r   )r>   namerF   lstriprO   rI   rG   r   r(   Ztensortlr   )rU   rY   Zdst_typetZsigned_type_nameZx_signedr   r   r   	to_tritons   s   
r^   c                 C   s   t t|  S r   )r\   Z	str_to_tyr   rU   r   r   r   str_to_triton_dtype   s   r`   c                 C   sL   t | tjjr
| jS t | tjrtdt| }|	dS t
dt|  )Nz^torch\.(\w+)$r?   znot a triton or torch dtype: )rC   r   languager>   rZ   r(   rematchr9   group	TypeErrortype)r>   mr   r   r   torch_dtype_name   s   
rh   c                 C   sl   t | tr| j  ttt| j	S t | t
jr/| j	t
ju r)|    S |   S td|  )Nz Not a triton-compatible tensor: )rC   r   basecpunumpyrO   rI   rG   rh   r>   r(   Tensorr   float
ValueErrorr_   r   r   r   to_numpy   s   
 ro   Fc                 C   sl   t  rdS t s
dS tjjj}| rdnd}ttt|	d}t
|dks)J |tj d dko5||kS )	NTF)   r   )rp      .   r   r'   )r   r%   r   ZnvidiaZptxasversiontuplemaprD   splitlenr(   r    r)   )
byval_onlyZcuda_versionZmin_cuda_versionZcuda_version_tupler   r   r   supports_tma   s   
rz   c                 C   s   | rdS dS )NzURequires __grid_constant__ TMA support (NVIDIA Hopper or higher, CUDA 12.0 or higher)zLRequires advanced TMA support (NVIDIA Hopper or higher, CUDA 12.3 or higher)r   )ry   r   r   r   tma_skip_msg   s   r{   )reasonsizealignc                 C   s   t j| t jddS )Nr    )r>   rY   )r(   emptyr	   )r}   r~   _r   r   r   default_alloc_fn   r   r   r]   c                 C   s   t | tjjjr| jS | S r   )rC   r   r   jitr   ri   )r]   r   r   r   unwrap_tensor   s   r   )NNNr   )F)?r   rb   rk   rG   r(   r   Ztriton.languagera   r\   r   ZpytestZnumpy.randomr   typingr   r   Ztriton.runtime.jitr   r   r   rE   rF   Zintegral_dtypesrM   Zfloat_dtypes_with_bfloat16ZdtypesZdtypes_with_bfloat16Ztorch_float8_dtypesZtorch_dtypessortedsetZ
tma_dtypesr   r   r%   r*   r,   r0   r2   r3   r5   r6   r7   r:   rV   Zndarrayrl   r^   r9   r>   r`   rh   ro   rz   r{   markZskipifZrequires_tmarD   r   r   r   r   r   r   r   r   <module>   sV    

 

(