o
    hT                     @  s  d dl mZ d dlZd dlZddlmZmZ ddlmZ ddlm	Z	 ddlm
Z
mZ ddlmZmZ dd	lmZ dd
lmZmZmZ ddlmZ ddlmZ d dlmZ d dlZd dlZd dlZd dlZd dlZdZ de iZ!dZ"de"iZ#dd Z$G dd dZ%G dd dZ&e' dd Z(e' dd Z)dd Z*d1dd Z+G d!d" d"Z,d2d#d$Z-d3d)d*Z.G d+d, d,Z/G d-d. d.e0Z1G d/d0 d0Z2dS )4    )annotationsN   )get_cache_invalidating_env_varsir)backends)Language)BaseBackend	GPUTarget)__version__knobs)OutOfResources)get_cache_managerget_dump_managerget_override_manager)driver)get_sass)Pathz=\.(?:visible|extern)\s+\.(?:entry|func)\s+(\w+)\s*\(([^)]*)\)ptxz\.param\s+\.(\w+)c                 C  sP   t d| }t d| }|d urdS t dd| } |d ur&dt|d S | S )Nz!tt\.ptr<([^,]+)ztt.nv_tma_desc = 1Z	nvTmaDescz {[^}]+} *   )researchsubconvert_type_reprgroup)xmatchZtma r   ^/home/www/facesmatcher.com/frenv_anti/lib/python3.10/site-packages/triton/compiler/compiler.pyr   '   s   r   c                   @  s0   e Zd ZddddZdd Zdd	 Zd
d ZdS )	ASTSourceNreturnNonec                 C  s   || _ tj| _d| _|j| _|| _t | _	|d ur<|
 D ]\}}t|tr-|j|fn|}t|ts6J || j	|< q|p@t | _t| jtrXdd t| jdD | _d S | j D ]}t|tshtdq]d S )Nttirc                 S  s   i | ]	\}}||  qS r   )strip.0kvr   r   r   
<dictcomp>D       z&ASTSource.__init__.<locals>.<dictcomp>,zSignature keys must be string)fnr   TRITONlanguageext__name__name	signaturedict	constantsitems
isinstancestr	arg_namesindextupleattrs	enumeratesplitkeys	TypeError)selfr,   r2   Z
constexprsr;   r'   r(   r   r   r   __init__6   s&    
zASTSource.__init__c                   sz   dd t | j D }dd  d fddt | j D }| jj dt| j d| d| }t	
|d S )Nc                 S  s   g | ]\}}|qS r   r   r%   r   r   r   
<listcomp>K   s    z"ASTSource.hash.<locals>.<listcomp>c                 S  s   t | dr| jS t| S )N	cache_key)hasattrrC   r7   )r   r   r   r   <lambda>L       z ASTSource.hash.<locals>.<lambda>-c                   s   g | ]\}} |qS r   r   r%   get_keyr   r   rB   M   rF   utf-8)sortedr2   r5   joinr4   r,   rC   r7   r;   hashlibsha256encode	hexdigest)r@   Z
sorted_sigZconstants_keykeyr   rH   r   hashJ   s
   "$zASTSource.hashc                 C  s"   ddl m} || j| ||||dS )Nr   )ast_to_ttir)contextoptionscodegen_fns
module_map)Zcode_generatorrS   r,   )r@   rU   rV   rW   rT   rS   r   r   r   make_irQ   s   zASTSource.make_irc                 C  s   t  S N)r3   r@   r   r   r   parse_optionsV   s   zASTSource.parse_optionsNNr!   r"   r0   
__module____qualname__rA   rR   rX   r[   r   r   r   r   r    4   s
    r    c                   @  s,   e Zd Zdd Zdd Zdd Zdd Zd	S )
IRSourcec           
      C  s   || _ t|}|jdd  | _tj| _| | _t	
| |
| | jdkrStt| j | jtj}|d| _|d}tt| j |}dd t|D | _d S t	| j || _| j }d| | _| j|}| j|}	dd t|	D | _d S )Nr   r   r   c                 S  s   i | ]	\}}|t |qS r   )r   r&   r'   tyr   r   r   r)   l   r*   z%IRSource.__init__.<locals>.<dictcomp>@c                 S  s   i | ]\}}||qS r   r   rb   r   r   r   r)   s   s    )pathr   suffixr/   r   r-   r.   	read_textsrcr   load_dialectsr   r   prototype_pattern	MULTILINEr   r1   findallarg_type_patternr<   r2   parse_mlir_modulemoduleZget_entry_func_nameZget_functionZget_function_signature)
r@   re   rT   backendr   r2   typesfn_nameZfuncOpZfunc_tyr   r   r   rA   \   s&   






zIRSource.__init__c                 C  s   t | jd S )NrJ   )rM   rN   rh   rO   rP   rZ   r   r   r   rR   u   s   zIRSource.hashc                 C  s   || j _| j S rY   )ro   rT   )r@   rU   rV   rW   rT   r   r   r   rX   x   s   zIRSource.make_irc                 C  s4   | j dkr| jd}|d usJ dd|iS t S )Nttgirzttg.num-warpsz'Unable to parse ttg.num-warps attribute	num_warps)r/   ro   Zget_int_attrr3   )r@   rt   r   r   r   r[   |   s
   
zIRSource.parse_optionsNr^   r   r   r   r   ra   Z   s
    ra   c               
   C  s  dd l } tjtjtjt}g }ttd}|t|	 
 g7 }W d    n1 s0w   Y  tj|ddftj|ddfg}|D ]6\}}| j|g|dD ])}t|j|jjd}|t|	 
 g7 }W d    n1 syw   Y  qUqIt }tdd	d
 }	ttj|dd|	 d}	 |	d}
|
sq||
 qW d    n1 sw   Y  ||
  tj|d}| j|gddD ])}t|j|jjd}|t|	 
 g7 }W d    n1 sw   Y  qt d| S )Nr   rbcompilerztriton.compiler.r   ztriton.backends.)prefix
EXT_SUFFIX.Z_Cz
libtriton.Ti   r.   ztriton.language.rG   )pkgutilosre   dirnameabspath__file__openrM   rN   readrP   rL   walk_packagesmodule_finder	find_specr1   origin	sysconfigget_config_varr=   updateappendr
   )r{   ZTRITON_PATHcontentsfZpath_prefixesre   rw   libZlibtriton_hashr/   chunkZlanguage_pathr   r   r   
triton_key   sF   

r   c                 C  s   t jj| d S )Nmax_shared_mem)r   activeutilsZget_device_properties)devicer   r   r   r      s   r   c                 C  sj   |dks|dkrt | |}||_|S |dks|dks|dkr%t|  S |dks-|dkr3t|  S d S )Nr#   rs   Zllirr   Zamdgcncubinhsaco)r   rn   rT   r   rg   
read_bytes)	full_namer/   rT   ro   r   r   r   parse   s   r   eBaseExceptionc                   s   t jjrdS | jdurt| j | jdurt| j ddg}dd |D }| j g } durEt fdd|D s>|   j	  dus.t
||dd D ]\}}||_	qN|s]d| _dS d|d	 _	|d
 | _dS )z
    Removes code_generator.py and related files from tracebacks.

    These are uninteresting to the user -- "just show me *my* code!"
    Nz"/triton/compiler/code_generator.pyz/ast.pyc                 S  s   g | ]	}| d tjqS )/)replacer|   sep)r&   Zbad_filer   r   r   rB      r*   z$filter_traceback.<locals>.<listcomp>c                 3  s$    | ]} j jj|r|V  qd S rY   )tb_framef_codeco_filenameendswith)r&   r   tbr   r   	<genexpr>   s   " z#filter_traceback.<locals>.<genexpr>r   rz   r   )r   compilationZfront_end_debugging	__cause__filter_traceback__context____traceback__anyr   tb_nextzip)r   Z	BAD_FILESframesZ	cur_frameZ
next_framer   r   r   r      s.   






r   c                   @  s4   e Zd ZdddZdddZdd	d
ZdddZdS )CompileTimerr!   r"   c                 C  s    t   | _d | _g | _d | _d S rY   )timestartir_initialization_endlowering_stage_endsstore_results_endrZ   r   r   r   rA      s   

zCompileTimer.__init__c                 C  s   t   | _d S rY   )r   r   rZ   r   r   r   finished_ir_initialization   s   z'CompileTimer.finished_ir_initialization
stage_namer7   c                 C  s   | j |t f d S rY   )r   r   r   )r@   r   r   r   r   stage_finished   s   zCompileTimer.stage_finishedknobs.CompileTimesc                 C  s~   t   }| jd u r|| _n|| _d
dd}g }| j}| jD ]\}}|||||f |}qtj|| j| j|||| jd	S )Nr   floatendfloat | Noner!   intc                 S  s   |d u rdS t ||  d S )Nr   i@B )r   )r   r   r   r   r   delta   s   zCompileTimer.end.<locals>.delta)Zir_initializationZlowering_stagesZstore_results)r   r   r   r   r!   r   )r   r   r   r   r   r   ZCompileTimesr   )r@   	timestampr   Zlowering_stage_durationsZstage_startr   Z	stage_endr   r   r   r      s   


zCompileTimer.endNr]   )r   r7   r!   r"   )r!   r   )r0   r_   r`   rA   r   r   r   r   r   r   r   r      s
    


r   c           '      C  sf  t jj}|r	t }|d u rtj }t|tsJ dt	|}t| t
 }|r:t| ts0J dt }t| ||} |  }|t|pEt fi |}t }	t  d|   d|  d|  dtt|	  	}
t|
d }t|}t jj}t jj}t jj}|rt|  nd }|rt|  nd }| j d d }| d}|!|pi }|"|}t jj#}|s|d urt$| ||}|r|| |j%& ||' dd |S ||d	|j(|	}t)|d
< t }|*||| j+ t,|- .| j/}|r|d7 }t| tst }t0| |0| |1|}|2 }z
| 3||||}W n t4y6 } zt5|  d }~ww |rK| d| j/ }|6||||< n| d}|6||||< t jj7} |rp| rp|8| j9 t:d| j9  |rw|;  t,| |d  D ]\}!}"|"||}#| d|! }|d u r|"dd  }$r|$<d|! rt=|$|!|}#n|>| }%rt:d|%  t=|%|!|}#|r|!dv r|6|#|||< |d ur|6|#| | |!kr|>|}&|#8|& t:d|&  |#}|r|?|! q|j6t@jA|tBd|dd||< |C|| t jjDs|E  |r-|| |||' dd t$| ||S )Nz target must be of GPUTarget typez'source must be either AST or a filepathrG   rJ      .jsonT)rh   metadatametadata_grouptimesZ	cache_hit)rR   targetZtriton_versionr   ry   z.sourcezCreating new locations for ir_overridez
Overriding kernel with file )r   r   json)defaultF)binary)Fr   r   Zlistenerr   r   r   get_current_targetr6   r	   make_backendr    r7   r   rT   ra   r[   r3   r   r   rR   rK   r5   rM   rN   rO   rP   r   overrideZdump_irZstore_binary_onlyr   r   r1   Z	get_groupgetalways_compileCompiledKernelr   _asdictr   __dict__r
   Z
add_stagesr.   listr>   r9   r/   ri   Zget_codegen_implementationZget_module_maprX   	Exceptionr   put
use_ir_locZcreate_location_snapshotre   printr   r   r   Zget_filer   r   dumpsvarsZ	put_groupZenable_asanZdisable_multithreading)'rh   r   rU   Zcompilation_listenerZtimerrp   Z	ir_sourcerT   Zextra_optionsZenv_varsrQ   rR   Zfn_cache_managerZenable_overrideZenable_ir_dumpZstore_only_binaryZfn_override_managerZfn_dump_manager	file_nameZmetadata_filenamer   metadata_pathr   resr   ZstagesZfirst_stagerV   rW   ro   r   Zir_filenamer   r/   Z
compile_irZnext_moduler   r   Zir_full_namer   r   r   compile  s   
:







$







r   r   r	   r!   r   c                   sN    fddt  D }t|dkr!tt| d j d| d|d  S )Nc                   s   g | ]}|j  r|j qS r   )rv   Zsupports_target)r&   r   r   r   r   rB     s    z make_backend.<locals>.<listcomp>r   z! compatible backends for target (z) (z). There should only be one.r   )r   valueslenRuntimeErrorrp   )r   Zactivesr   r   r   r     s   r   c                   @  s$   e Zd Zdd Zdd Zdd ZdS )LazyDictc                 C  s   || _ g | _d S rY   )dataextras)r@   r   r   r   r   rA     s   
zLazyDict.__init__c                 C  s0   | j D ]\}}| j|| B | _q| j   | jS rY   )r   r   clearr@   funcargsr   r   r   r     s   
zLazyDict.getc                 C  s   | j ||f d S rY   )r   r   r   r   r   r   add  s   zLazyDict.addN)r0   r_   r`   rA   r   r   r   r   r   r   r     s    r   c                   @  s   e Zd Zdd ZdS )AsmDictc                 C  s.   |dkrt | d }ntd| || |< |S )Nsassr   zUnknown key: '%s')r   KeyError)r@   rQ   valuer   r   r   __missing__  s
   zAsmDict.__missing__N)r0   r_   r`   r   r   r   r   r   r     s    r   c                      s<   e Zd Zdd Zdd Z fddZdd Zd	d
 Z  ZS )r   c                   s  ddl m} tdd | D }t| }t|d |d< |d }t|d |d |d	 |d< |d
t	t
| }|di || _t| jj}	|	| j| _|| _|| _| jj| _dd | D }
|	j t fdd|
D | _| j  | _d | _d | _d S )Nr   )
namedtuplec                 s  s&    | ]\}}| d rt|V  qdS )r   Nr   r   r&   cpr   r   r   r     s   $ z*CompiledKernel.__init__.<locals>.<genexpr>Zcluster_dimsr   rp   arch	warp_sizeKernelMetadatac                 S  s"   g | ]\}}| d st|qS )r   r   r   r   r   r   rB     s   " z+CompiledKernel.__init__.<locals>.<listcomp>c                   s:   i | ]}|j d d |j d d  kr| n| qS )r   N)rf   r   rg   )r&   file
binary_extr   r   r)     s    ,z+CompiledKernel.__init__.<locals>.<dictcomp>r   )collectionsr   nextr5   r   loadsrg   r:   r	   rK   r   r>   r   r   r   Zpack_metadatapacked_metadatarh   rR   r1   r   r   asmkernelro   function)r@   rh   r   rR   r   r   r   r   r   rp   Z	asm_filesr   r   r   rA     s*   


zCompiledKernel.__init__c                 C  s   | j d urd S tj }tj| j| j| _t|}| jj	|kr(t
| jj	|dt| jdrD| jjd urDd}| jj|krDt
| jj|dtjj| j| j| jj	|\| _ | _| _| _| _tj j}| jj| | jkrvt
| jj| | jdd S )Nzshared memory	tmem_sizei   ztensor memorythreads)ro   r   r   get_current_deviceZlauncher_clsrh   r   runr   Zsharedr   rD   r   r   Zload_binaryr1   r   r   Zn_regsZn_spillsZn_max_threadsr   r   rt   )r@   r   Z
max_sharedZmax_tmem_sizer   r   r   r   _init_handles  s$   

zCompiledKernel._init_handlesc                   s   |dkr|    t |S )Nr   )r  super__getattribute__)r@   r1   	__class__r   r   r    s   zCompiledKernel.__getattribute__c           	      G  s   t jjd u rd S t| j| j|d}t| jtr| jj	j
d u r!|S i }d}t| jj	jD ]\}}|| ||< |d7 }q,|| jj	j
|| j|f |S )N)r1   r   streamr   r   )r   runtimelaunch_enter_hookr   r1   r   r6   rh   r    r,   launch_metadatar<   r8   r   r   )	r@   gridr  r   retZarg_dictZarg_idxiZarg_namer   r   r   r	    s   
zCompiledKernel.launch_metadatac                   s       d d fdd
}|S )N)r  c              
     sp   | d u rt j }t j|} j | g|R  }j d  d  d | jj|tj	j
tj	jg	|R   d S )Nr   r   r   )r   r   r   Zget_current_streamr	  r   r   r   r   r  r  Zlaunch_exit_hook)r  r   r   r	  r
  r@   r   r   runner  s   
"z*CompiledKernel.__getitem__.<locals>.runner)r  )r@   r
  r  r   r  r   __getitem__  s   zCompiledKernel.__getitem__)	r0   r_   r`   rA   r  r  r	  r  __classcell__r   r   r  r   r     s    r   )r   r   r\   )r   r	   r!   r   )3
__future__r   rM   r   Z_C.libtritonr   r   r   Zbackends.compilerr   r   r	   r   r
   r   Zruntime.autotunerr   Zruntime.cacher   r   r   Zruntime.driverr   Ztools.disasmr   pathlibr   r   	functoolsr|   r   r   Zptx_prototype_patternrj   Zptx_arg_type_patternrm   r   r    ra   	lru_cacher   r   r   r   r   r   r   r   r3   r   r   r   r   r   r   <module>   sP    
&*
#

&
' 
