o
    ø»ÎhþM  ã                   @   sâ   d dl mZmZmZ d dlmZmZmZmZ d dl	m
Z
 d dlmZ d dlmZmZmZ d dlmZ d dlZd dlZd dlZd dlZd dlZd dlmZ d	efd
d„Zdd„ Zdd„ ZeddG dd„ dƒƒZG dd„ deƒZdS )é    )ÚBaseBackendÚ	GPUTargetÚLanguage)ÚirÚpassesÚllvmÚamd)Úknobs)Ú	dataclass)ÚAnyÚDictÚTuple)Ú
ModuleTypeN)ÚPathÚtargetc                 C   s   dd„ S )Nc                 S   s   dS )N©é   r   r   © )Zlhs_typeZrhs_typer   r   úb/home/www/facesmatcher.com/frenv_anti/lib/python3.10/site-packages/triton/backends/amd/compiler.pyÚ<lambda>   s    z"get_min_dot_size.<locals>.<lambda>r   ©r   r   r   r   Úget_min_dot_size   s   r   c                 C   ó   t jjd u r
| dkS t jjS ©NÚgfx942)r	   r   Úuse_block_pingpong©Úarchr   r   r   Úis_pingpong_schedule_enabled   ó   r   c                 C   r   r   )r	   r   Zuse_in_thread_transposer   r   r   r   Úis_in_thread_transpose_enabled   r   r    T)Úfrozenc                   @   s&  e Zd ZU dZeed< dZeed< dZeed< dZeed< dZ	e
ed	< d
Zeed< dZeed< dZeed< dZeed< dZee ed< dZee ed< dZeed< dZee ed< dZeed< dZeed< dZeed< dZeed< dZeed< dZeed< d Zeed!< d"Zeed#< d$d%„ Zd&d'„ Z dS )(Ú
HIPOptionsé   Ú	num_warpsr   Úwaves_per_eué   Ú
num_stagesÚnum_ctasNÚextern_libsr   Úcluster_dimsFÚdebugTÚsanitize_overflowr   )Úfp8e5Úsupported_fp8_dtypesr   Ú!deprecated_fp8_dot_operand_dtypesÚieeeÚdefault_dot_input_precision)r0   Úallowed_dot_input_precisionsÚenable_fp_fusionÚlaunch_cooperative_gridr   Úmatrix_instr_nonkdimÚkpackÚallow_flush_denormÚmax_num_imprecise_acc_defaultÚhipÚbackend_nameÚnoneÚschedule_hintc                 C   sÒ   t | jdd… ƒ}|dkrdnd}t | d|¡ | jdkr'| j| jd @ dks+J d	ƒ‚| jd
kr9| jdks9J dƒ‚ttƒjd }| j	d u rGi nt
| j	ƒ}dD ]}t||› d ƒ||< qNt | dt| ¡ ƒ¡ d S )Né   éþÿÿÿé
   é    é@   Ú	warp_sizer   r   znum_warps must be a power of 2Úgfx950zgfx950 only accepts kpack == 1Úlib)ZocmlZocklz.bcr)   )Úintr   ÚobjectÚ__setattr__r$   r6   r   Ú__file__Úparentr)   ÚdictÚstrÚtupleÚitems)ÚselfZ	gfx_majorrB   Údefault_libdirr)   rD   r   r   r   Ú__post_init__E   s    ÿ
zHIPOptions.__post_init__c                 C   s.   d  dd„ | j ¡ D ƒ¡}t | d¡¡ ¡ S )NÚ_c                 S   s   g | ]\}}|› d |› ‘qS )ú-r   )Ú.0ÚnameÚvalr   r   r   Ú
<listcomp>V   s    z#HIPOptions.hash.<locals>.<listcomp>úutf-8)ÚjoinÚ__dict__rM   ÚhashlibÚsha256ÚencodeÚ	hexdigest)rN   Úkeyr   r   r   ÚhashU   s   zHIPOptions.hash)!Ú__name__Ú
__module__Ú__qualname__r$   rE   Ú__annotations__r%   r'   r(   r)   rJ   r*   rL   r+   Úboolr,   r   rK   r.   r   r/   r1   r2   r3   r4   r5   r6   r7   r8   r:   r<   rP   r_   r   r   r   r   r"      s0   
 r"   c                       s  e Zd Zedefdd„ƒZdeddf‡ fdd„Zdefdd	„Zde	fd
d„Z
dd„ Zdd„ Zdeeef fdd„Zdd„ Zedd„ ƒZedd„ ƒZedd„ ƒZedd„ ƒZedd„ ƒZedd„ ƒZed d!„ ƒZed"d#„ ƒZed$d%„ ƒZed&d'„ ƒZd(d)„ Ze ¡ d*d+„ ƒZ‡  ZS ),Ú
HIPBackendr   c                 C   s
   | j dkS )Nr9   )Úbackendr   r   r   r   Úsupports_target\   s   
zHIPBackend.supports_targetÚreturnNc                    s&   t ƒ  |¡ t|jtƒsJ ‚d| _d S )NÚhsaco)ÚsuperÚ__init__Ú
isinstancer   rK   Z
binary_ext)rN   r   ©Ú	__class__r   r   rk   `   s   
zHIPBackend.__init__c                 C   s   d|j › S )Núhip:r   ©rN   Úoptionsr   r   r   Úget_target_namee   s   zHIPBackend.get_target_namec                    sú   dt jjp| jji}| jjdkr#ttjƒ}| dh¡ t	t
|ƒƒ|d< dˆ vr]ttjƒ}| jjdkr:| h d£¡ n| jjdkrH| dd	h¡ nd
| jjv rU| dd	h¡ t	t
|ƒƒ|d< dˆ vrgt jj|d< | ‡ fdd„tj ¡ D ƒ¡ tdi |¤ŽS )Nr   r   Ztf32r2   r.   >   Zfp8e4b8Úfp8e4nvZfp8e5b16rC   rs   r-   Zgfx12r3   c                    s*   i | ]}|ˆ v rˆ | d ur|ˆ | “qS ©Nr   )rS   Úk©Úoptsr   r   Ú
<dictcomp>}   s
    ÿ
ÿz,HIPBackend.parse_options.<locals>.<dictcomp>r   )r	   ZruntimeZoverride_archr   r   Úsetr"   r2   ÚupdaterL   Úsortedr.   ÚlanguageZdefault_fp_fusionÚ__dataclass_fields__Úkeys)rN   rw   Úargsr2   r.   r   rv   r   Úparse_optionsh   s$   

zHIPBackend.parse_optionsc                 C   s(   |j |j|j|jd |jd |jd fS )Nr   r   r&   )r$   r(   Úsharedr*   )rN   Úmetadatar   r   r   Úpack_metadata   s   úzHIPBackend.pack_metadatac                 C   s   dt | jƒiS )NZmin_dot_size)r   r   rp   r   r   r   Úget_codegen_implementation‹   ó   z%HIPBackend.get_codegen_implementationc                 C   s   ddl m} d|iS )Nr   )Ú	libdeviceztriton.language.extra.libdevice)Ztriton.language.extra.hipr†   )rN   r†   r   r   r   Úget_module_mapŽ   s   zHIPBackend.get_module_mapc                 C   s   t  |¡ d S rt   )r   Úload_dialects)rN   Úctxr   r   r   rˆ   “   r…   zHIPBackend.load_dialectsc                 C   sL   dd l }d}t| dƒr|  ¡ |kS t| |jƒr$t| dƒr$|  ¡  ¡ |kS dS )Nr   iÿÿÿÚ	ptr_rangeÚuntyped_storageF)ÚtorchÚhasattrrŠ   rl   ZTensorr‹   Úsize)ÚargrŒ   Z
MAX_INT_32r   r   r   Úis_within_2gb–   s   
zHIPBackend.is_within_2gbc                 C   s$   t  | ¡}d| v r|ddgg7 }|S )NÚSztt.pointer_ranger@   )r   Ú
parse_attr)ZdescÚretr   r   r   r’   ¡   s   
zHIPBackend.parse_attrc                 K   s:   t j| |fi |¤Ž}tjjr|dkrt | ¡r|d7 }|S )NZtensorr‘   )r   Úget_arg_specializationr	   r   Úuse_buffer_opsre   r   )r   ÚtyÚkwargsr“   r   r   r   r”   ¨   s   z!HIPBackend.get_arg_specializationc                  C   sn   t jj} | d urt| ƒ}| ¡ r|S ttƒjd }| ¡ r|S tdƒ}| ¡ r)|S tdƒ}| ¡ r3|S tdƒ‚)Nzllvm/bin/ld.lldz/opt/rocm/llvm/bin/ld.lldz/usr/bin/ld.lldzWROCm linker /opt/rocm/llvm/bin/ld.lld not found. Set 'TRITON_HIP_LLD_PATH' to its path.)r	   r   Zlld_pathr   Úis_filerH   rI   Ú	Exception)Zlld_env_pathZlldr   r   r   Úpath_to_rocm_lld±   s   zHIPBackend.path_to_rocm_lldc                 C   sš   t  | j¡}| ¡  tj |¡ tj |¡ tj 	|¡ tj 
|¡ tj |¡ tj |¡ tj |¡ tj |¡ tj |¡ tj |¡ | | ¡ | S rt   )r   Úpass_managerÚcontextÚenable_debugr   ÚcommonÚadd_inlinerÚttirZadd_rewrite_tensor_pointerZ(add_rewrite_tensor_descriptor_to_pointerÚadd_canonicalizerZadd_combineZadd_reorder_broadcastÚadd_cseÚadd_triton_licmÚadd_symbol_dceZadd_loop_unrollÚrun)Úmodr‚   rq   Úpmr   r   r   Ú	make_ttirÅ   s   
zHIPBackend.make_ttirc                 C   sŒ  t  | j¡}| ¡  tj |d|j› |j|j	|j
¡ | | ¡ t  | j¡}| ¡  tj |¡ tj |¡ tj |¡ tjj ||j|j|j¡ tj |¡ tjj |¡ tj |d¡ tjj |¡ tj |¡ tj |¡ tj |¡ tj |¡ tjj}tjj}tjj}|jdkr‘d }}tjj  ||j!|||¡ |r¨tjj "||j¡ tj |¡ |j #¡ dkr¾tjj $||j¡ tj |d¡ tj |¡ tj %|¡ t&|jƒrãtjj '|¡ tj |¡ tjj (|¡ t)|jƒ}|rÿ|j!dkrÿtjj *||j!¡ tjj+rtjj ,|¡ tj |¡ tjj -||j¡ tjj .|¡ tj |¡ tj /|¡ tj 0|¡ |r?tjj 1||j¡ | | ¡ | S )Nro   Tzlocal-prefetchr   r;   r&   )2r   r›   rœ   r   r   r    Zadd_convert_to_ttgpuirr   r$   rB   r(   r¥   ÚttgpuirZadd_coalesceZadd_remove_layout_conversionsZadd_optimize_thread_localityr   Zadd_accelerate_matmulr5   r6   Zadd_optimize_epilogueZadd_optimize_dot_operandsZadd_hoist_layout_conversionsZadd_fuse_nested_loopsrž   r¡   r£   r	   Úglobal_prefetchÚlocal_prefetchÚuse_async_copyr<   Zadd_stream_pipeliner'   Zadd_coalesce_async_copyÚlowerZinsert_instruction_sched_hintsZadd_reduce_data_duplicationr    Zadd_in_thread_transposeZadd_reorder_instructionsr   Zadd_block_pingpongr•   Zadd_canonicalize_pointersZadd_convert_to_buffer_opsZadd_fold_true_cmpir¢   r¤   Zadd_update_async_wait_count)r¦   r‚   rq   r§   rª   r«   r¬   r   r   r   r   Ú
make_ttgirÖ   sj   ÿ





zHIPBackend.make_ttgirc                 C   sb   | }t  |j¡}| ¡  tj |¡ tj |¡ tj	 
|¡ tj |¡ tj |¡ | |¡ |S rt   )r   r›   rœ   r   r   r©   rŸ   rž   Zadd_sccpr    Zadd_loop_aware_cser¡   Z add_combine_tensor_select_and_ifr¥   )Úsrcr‚   rq   r¦   r§   r   r   r   Ú	ttgir_opt  s   
zHIPBackend.ttgir_optc                    s&  | }t  |j¡}| ¡  d}tjj ||j|¡ tj	 
|¡ tj	 |¡ tj |¡ d}tjj ||j|¡ tj |¡ tj |¡ tj	 |¡ tj	 |¡ tj |¡ tj |¡ tj |¡ |j ¡ dkrrtjj ||j|j¡ tjjs|tj |¡ tjj ||¡ | |¡ t  ¡  t ¡ }t !||¡‰ t "ˆ ¡ d}tjj#r¤d}t $ˆ tj%|j|¡ t &ˆ |j¡ t 'ˆ d¡ t (ˆ dd¡ t (ˆ d	d¡ t (ˆ d
d¡ t (ˆ d|j)dk¡ dd„ ˆ  *¡ D ƒ}	|	d  +tj,¡ |	d  -dd|j.|j) › ¡ |	d  -d|j/› ¡ |j0r
dnd}
|	d  -d|
¡ tjj#r&|	d  1d¡ |	d  2¡  t 3|	d ¡ tjj#rQt4t5ƒj6d }t7|d ƒt7|d ƒt7|d ƒg}t 8ˆ |¡ n|j9re‡ fdd„|j9D ƒ}t 8ˆ |¡ t :ˆ tj;|jdg |j<¡ tjj=r~t >|	d ¡ |  ?d¡|d< t @ˆ ¡ t Aˆ ¡ t7ˆ ƒS )Nr   Tr;   Ú ú+xnackiô  Z__oclc_finite_only_optFZ__oclc_correctly_rounded_sqrt32Z__oclc_unsafe_math_optZ__oclc_wavefrontsize64rA   c                 S   s   g | ]}|  ¡ s|‘qS r   )Zis_declaration)rS   Úfnr   r   r   rV   `  s    z(HIPBackend.make_llir.<locals>.<listcomp>zamdgpu-flat-work-group-sizez1,zamdgpu-waves-per-euzpreserve-signr0   zdenormal-fp-math-f32rD   z
asanrtl.bczocml.bczockl.bcc                    s    g | ]\}}t  ˆ |¡r|‘qS r   )r   Zneed_extern_lib)rS   rT   Úpath©Zllvm_modr   r   rV   €  s     z
ttg.sharedr   )Br   r›   rœ   r   r   r   r©   Zadd_optimize_lds_usager   ÚconvertZadd_scf_to_cfZadd_index_to_llvmirZadd_allocate_shared_memoryZadd_to_llvmirrž   r¡   r¢   Zadd_cf_to_llvmirZadd_arith_to_llvmirr¤   r<   r­   Zlower_instruction_sched_hintsr'   r	   ÚcompilationZdisable_line_infoZllvmirZadd_di_scopeZadd_builtin_func_to_llvmirr¥   r   Zinit_targetsZ	to_moduleZattach_target_tripleÚenable_asanZattach_datalayoutÚTARGET_TRIPLEZset_isa_versionZset_abi_versionZset_bool_control_constantrB   Zget_functionsZset_calling_convZCALLING_CONV_AMDGPU_KERNELZadd_fn_attrr$   r%   r7   Zadd_fn_target_featureZadd_fn_asan_attrZset_all_fn_arg_inregr   rH   rI   rK   Zlink_extern_libsr)   Zoptimize_moduleZOPTIMIZE_O3r3   Zscalarize_packed_fopsZ#add_scalarize_packed_fops_llvm_passZget_int_attrZcleanup_bitcode_metadataZdisable_print_inline)r¯   r‚   rq   r¦   r§   Zcustom_lds_sizeZ_HIPBackend__HIP_FTZrœ   Útarget_featuresZfnsZdenormal_moderO   Úpathsr   rµ   r   Ú	make_llir#  s€   






ý


zHIPBackend.make_llirc              	   C   sx   t  d| ¡}t|ƒdksJ ‚|d |d< g }|jdkr | d¡ t | tj|j	d||j
d¡}tjjr:td	ƒ t|ƒ |S )
Nz3define amdgpu_kernel void @([a-zA-Z_][a-zA-Z0-9_]*)r   r   rT   Z	attentionzsink-insts-to-avoid-spillsr±   Fz!// -----// AMDGCN Dump //----- //)ÚreÚfindallÚlenr<   Úappendr   Ztranslate_to_asmr   r¹   r   r3   r	   Zdump_amdgcnÚprint)r¯   r‚   rq   ÚnamesÚflagsÚamdgcnr   r   r   Úmake_amdgcn‘  s   

zHIPBackend.make_amdgcnc                 C   s  d}t jjrd}t | |j|¡}t ¡ }t 	¡ h}t 	¡ 1}t
|jdƒ}| |¡ W d   ƒ n1 s4w   Y  t |ddd|jd|jg¡ W d   ƒ n1 sQw   Y  t
|jdƒ}	|	 ¡ }
W d   ƒ n1 skw   Y  W d   ƒ |
S W d   ƒ |
S 1 sƒw   Y  |
S )	Nr±   r²   Úwbz-flavorZgnuz-sharedz-oÚrb)r	   r·   r¸   r   Zassemble_amdgcnr   re   rš   ÚtempfileÚNamedTemporaryFileÚopenrT   ÚwriteÚ
subprocessÚ
check_callÚread)r¯   r‚   rq   rº   ri   Z	rocm_pathZtmp_outZtmp_inZfd_inZfd_outr“   r   r   r   Ú
make_hsaco§  s,   

ÿý
ÿ
ûþ
ûùzHIPBackend.make_hsacoc                    s†   |t jkr‡ ‡fdd„|d< ‡ ‡fdd„|d< n|t jkr&‡ ‡fdd„|d< ‡ ‡fdd„|d< ‡ ‡fd	d„|d
< ‡ ‡fdd„|d< d S )Nc                    ó   ˆ  | |ˆ ¡S rt   )r¨   ©r¯   r‚   ©rq   rN   r   r   r   º  ó    z'HIPBackend.add_stages.<locals>.<lambda>r    c                    rÐ   rt   )r®   rÑ   rÒ   r   r   r   »  rÓ   Zttgirc                    rÐ   rt   )r°   rÑ   rÒ   r   r   r   ½  rÓ   c                    rÐ   rt   )r¼   rÑ   rÒ   r   r   r   ¾  rÓ   Zllirc                    rÐ   rt   )rÅ   rÑ   rÒ   r   r   r   ¿  rÓ   rÄ   c                    rÐ   rt   )rÏ   rÑ   rÒ   r   r   r   À  rÓ   ri   )r   ZTRITONZGLUON)rN   Zstagesrq   r|   r   rÒ   r   Ú
add_stages¸  s   

zHIPBackend.add_stagesc                 C   s&   t jt ¡ dgdd}|› d| j› S )Nz	--versionrW   )ÚencodingrR   )rÌ   Úcheck_outputre   rš   r   )rN   Úversionr   r   r   r_   Â  s   zHIPBackend.hash) r`   ra   rb   Ústaticmethodr   rg   rk   rK   rr   r   r€   rƒ   r„   r   r   r‡   rˆ   r   r’   r”   rš   r¨   r®   r°   r¼   rÅ   rÏ   rÔ   Ú	functoolsÚ	lru_cacher_   Ú__classcell__r   r   rm   r   re   Z   sB    







=

m


re   )Ztriton.backends.compilerr   r   r   Ztriton._C.libtritonr   r   r   r   Ztritonr	   Údataclassesr
   Útypingr   r   r   Útypesr   rZ   rÈ   r½   rÌ   rÙ   Úpathlibr   r   r   r    r"   re   r   r   r   r   Ú<module>   s$    <