o
    h                  	   @  s  d dl mZmZ d dlZd dlZd dlZd dlZd dlZd dlZd dl	Z	d dl
mZ d dlmZ d dlmZ d dlmZmZmZmZmZmZmZmZmZmZ d dlmZ d dlmZ d	d
lm Z  d	dl!m"Z" d	dl#m$Z$m%Z%m&Z&m'Z' e(de)d  Z*edZ+G dd dej,Z-dCddZ.G dd dZ/i Z0g Z1dd Z2dDddZ3G dd dee+ Z4dd Z5d d! Z6d"d# Z7eG d$d% d%Z8G d&d' d'e4e+ Z9edEd*d+Z:edddddddd,dFd7d+Z:	dGdddddddd,dHd:d+Z:G d;d< d<Z;G d=d> d>Z<d?d@ Z=dAdB Z>dS )I    )annotationsdivisionN)defaultdict)	dataclass)cached_property)
CallableGenericIterableOptionalTypeVarUnionoverloadDictAnyTupleTensorDescriptor)
ModuleType   )knobs)driver)find_paths_ifget_iterable_pathtype_canonicalisation_dictcanonicalize_dtypez.runtime.jitTc                      s   e Zd ZdZd fddZedd Zdd	 Zd
d Zdd Z	dd Z
dd Zdd Zdd Zdd Zdd Zdd Zdd Z  ZS )DependenciesFindera  
    This AST visitor is used to find dependencies of a JITFunction. This can
    be used to invalidate a JITFunction's hash when its source code -- or
    that of its dependencies -- changes.

    This visitor also keeps track of the global variables touched by the
    JITFunction.  When we launch the kernel, we check that these have the same
    values as they did when we ran this visitor.  If not, we raise an error (or
    otherwise we could recompile).
    returnNonec                   sH   t    || _t|d| _|| _|| _h d| _	i | _
d| _d S )Nutf-8>
   minmaxrangelenlistfloatint
isinstanceprintgetattrF)super__init__namehashlibsha256encodehasherglobals	nonlocalssupported_python_builtinsused_global_valsvisiting_arg_default_value)selfr,   r1   r2   src	__class__ X/home/www/facesmatcher.com/frenv_anti/lib/python3.10/site-packages/triton/runtime/jit.pyr+   )   s   


zDependenciesFinder.__init__c                 C  
   | j  S N)r0   	hexdigestr6   r:   r:   r;   retN      
zDependenciesFinder.retc                 C  s&   t |jrdS t|dd}|tS )NT
__module__ )inspect	isbuiltinfuncr)   
startswithTRITON_MODULE)r6   noderF   moduler:   r:   r;   _is_triton_builtinR   s   
z%DependenciesFinder._is_triton_builtinc                 C  s   t |tr]| j |j @ D ].}|\}}| j| \}}|j| \}}||kr=td| d| d| j d|j d| dq| j|j |j}|t	t
|dd7 }| j|d	 d S d S )
NGlobal variable z has value z when compiling z, but inner kernel z has conflicting value z7 from when it was first compiled.  This is not allowed.noinlineFr   )r'   JITFunctionr4   keysRuntimeErrorr,   __name__update	cache_keystrr)   r0   r/   )r6   rF   kvar_name_Zv1Zv2Zfunc_keyr:   r:   r;   _update_hashX   s   
&zDependenciesFinder._update_hashc                   s   t |jtju r|jS |j jv rd S  fdd}||j\}}|d urM jsMt |turMt|t	sMt
|ddsM|j jvrMt||f j|jt|f<  | |S )Nc                   sD    j | d }|d ur| j fS  j| d }|d ur | jfS dS )N)NN)r1   getr2   )r,   valr?   r:   r;   name_lookupr   s   

z2DependenciesFinder.visit_Name.<locals>.name_lookupZ__triton_builtin__F)typectxastStoreidlocal_namesr5   r   r'   rN   r)   r3   copyr4   rX   )r6   rI   r[   rZ   Zvar_dictr:   r?   r;   
visit_Namej   s(   	





zDependenciesFinder.visit_Namec                   s    fdd|j D S )Nc                   s   g | ]}  |qS r:   )visit).0eltr?   r:   r;   
<listcomp>       z2DependenciesFinder.visit_Tuple.<locals>.<listcomp>)eltsr6   rI   r:   r?   r;   visit_Tuple   s   zDependenciesFinder.visit_Tuplec                 C  sf   |  |j}t|tjr|  |j}t|tjs|d u s$t|ddtkr&d S t||j}| | |S )NrQ   rC   )	rd   valuer'   r^   	Attributer)   rH   attrrX   )r6   rI   lhsr@   r:   r:   r;   visit_Attribute   s   
z"DependenciesFinder.visit_Attributec                 C  s"   dd |j j D | _| | d S )Nc                 S  s   h | ]}|j qS r:   arg)re   rr   r:   r:   r;   	<setcomp>       z7DependenciesFinder.visit_FunctionDef.<locals>.<setcomp>)argsra   generic_visitrj   r:   r:   r;   visit_FunctionDef   s   z$DependenciesFinder.visit_FunctionDefc                   sn    fdd}t |j|j|jr|jgng |jD ]} | q||j |jd ur0 |j ||j	 d S )Nc                   sB   z j rJ d _ | D ]}|d ur | qW d _ d S d _ w )NTF)r5   rd   )defaultsexprr?   r:   r;   visit_defaults   s   

z:DependenciesFinder.visit_arguments.<locals>.visit_defaults)
	itertoolschainposonlyargsru   vararg
kwonlyargsrd   kw_defaultskwargrx   )r6   rI   rz   rr   r:   r?   r;   visit_arguments   s   (


z"DependenciesFinder.visit_argumentsc                 C  s:   |  |}t|tr|  jt|O  _d S | j| d S r=   )rd   r'   r$   ra   setadd)r6   rI   targetr:   r:   r;   visitAssnTarget   s   

z"DependenciesFinder.visitAssnTargetc                 C  s4   t |jdkrtd| |jd  | | d S )N   z2Simultaneous multiple assignment is not supported.r   )r#   targets	TypeErrorr   rv   rj   r:   r:   r;   visit_Assign   s   zDependenciesFinder.visit_Assignc                 C     |  |j | | d S r=   r   r   rv   rj   r:   r:   r;   visit_AnnAssign      z"DependenciesFinder.visit_AnnAssignc                 C  r   r=   r   rj   r:   r:   r;   	visit_For   r   zDependenciesFinder.visit_For)r   r   )rQ   rB   __qualname____doc__r+   propertyr@   rK   rX   rc   rk   rp   rw   r   r   r   r   r   __classcell__r:   r:   r8   r;   r      s     %
'
 	r   r   rT   c                 C  s  dd l m  m} t| trZ|  } | dr/| d} t| } | ds'J d| dd   S | 	dr>dt| d d  S | drMdt| dd   S | drYt| dS n%t| |j
rhdt| j S t| |jrr| j} nt| tr{| j} nt| } t| d	d
| S )Nr   zconst const**kr   ztl.Z_trC   )triton.language.corelanguagecorer'   rT   striprG   removeprefix_normalize_tyendswithZpointer_typeZ
element_tydtyper,   r\   rQ   r   rY   replace)tyr   r:   r:   r;   r      s.   






r   c                   @  sr   e Zd ZdZdd	d
Zedd ZedddZedddZedd Z	edd Z
edd Zedd ZdS )KernelParamzBRepresents a parameter (name plus metadata) to a @jit'ed function.numr&   paraminspect.Parameterdo_not_specializebooldo_not_specialize_on_alignmentc                 C  s   || _ || _|| _|| _d S r=   )r   _paramr   r   )r6   r   r   r   r   r:   r:   r;   r+   
  s   
zKernelParam.__init__c                 C     | j jS r=   )r   r,   r?   r:   r:   r;   r,        zKernelParam.namer   rT   c                 C  s(   | j jr| j jtjjkrdS t| j jS )NrC   )r   
annotationrD   	Parameteremptyr   r?   r:   r:   r;   r     s   zKernelParam.annotationc                 C  sN   | j }|dr|dd  }n|dr|dd  }|tt v r%| j S dS )Nr   r   r   r   rC   )r   rG   r   r   values)r6   ar:   r:   r;   annotation_type  s   

zKernelParam.annotation_typec                 C  s
   d| j v S N	constexpr)r   r?   r:   r:   r;   is_constexpr&  rA   zKernelParam.is_constexprc                 C  s    | j rdS d| jv p| jdS )NFr   r   )r   r   rG   r?   r:   r:   r;   is_const*  s   zKernelParam.is_constc                 C  r   r=   )r   defaultr?   r:   r:   r;   r   0  r   zKernelParam.defaultc                 C  s   | j jtjjkS r=   )r   r   rD   r   r   r?   r:   r:   r;   has_default4  s   zKernelParam.has_defaultN)r   r&   r   r   r   r   r   r   r   rT   )rQ   rB   r   r   r+   r   r,   r   r   r   r   r   r   r   r:   r:   r:   r;   r     s"    





r   c                   s0   ddl m ddlm  d	 fdd	S )
Nr   r   r   r   FTc                   s   d u rdS t  trdS t  trA|r d|dnd } dkr%|r%dS d kr1 dkr1d	|fS d
 kr= dkr=d|fS d|fS t  trHdS t dr} j|f}t|d }|d u rn|d rbdndt|d  }|t|< |rw d|dnd }||fS t  t	rd j
fS t  rd fS t drdS t  trfdd D } fdd}|dd |D }	|dd |D }
|	|
fS t  trt jdsJ t jj}d| t j dd fS t  rt jdsJ t jj}d| t j d jdd fS td t  )!N)r   N)u1Nr&   )alignr   )r   r   i   iZi32l            l    Zu64Zi64)Zfp32Ndata_ptrr   r   r   tensorr   Ztma_desc_cpu_ptr)Z	nvTmaDescNc                   s   g | ]} |qS r:   r:   re   x)specialize_implr:   r;   rg   c      zCcreate_specialize_impl.<locals>.specialize_impl.<locals>.<listcomp>c                   s   t  drt |  S t| S )N_fields)hasattrr\   tuple)valsrq   r:   r;   <lambda>d      zAcreate_specialize_impl.<locals>.specialize_impl.<locals>.<lambda>c                 S     g | ]}|d  qS r   r:   r   r:   r:   r;   rg   e  r   c                 S  r   r   r:   r   r:   r:   r;   rg   f  r   ztensordesc<>,zUnsupported type: %s)r'   r   r&   r%   r   r   	dtype2strrY   r   rN   rS   r   r   baser$   Zblock_shapeZlayoutr   r\   )rr   r   specialize_valuer   keyZdskresspecZ
make_tupleZtysrO   innerZGluonTensorDescriptorr   specialize_extrar   rq   r;   r   B  sX   










"z/create_specialize_impl.<locals>.specialize_impl)FTT)r   r   Z'triton.experimental.gluon.nvidia.hopperr   )r   r:   r   r;   create_specialize_impl=  s   1r   Fc                 C  s6   t tdkrttdd  td }|| |dd S )Nr   c                 [     d S r=   r:   )rW   kwargsr:   r:   r;   r   x  s    zmangle_type.<locals>.<lambda>)r   )r#   specialize_impl_cacheappendr   )rr   
specializer   r:   r:   r;   mangle_typev  s   r   c                   @  s    e Zd ZU ded< dddZdS )KernelInterfacer   runr   c                   s    fddS )z
        A JIT function is launched with: fn[grid](*args, **kwargs).
        Hence JITFunction.__getitem__ returns a callable proxy that
        memorizes the grid.
        c                    s   j |  dd|S )NFgridwarmup)r   )ru   r   r   r6   r:   r;   r     rh   z-KernelInterface.__getitem__.<locals>.<lambda>r:   )r6   r   r:   r   r;   __getitem__  s   zKernelInterface.__getitem__N)r   r   )rQ   rB   r   __annotations__r   r:   r:   r:   r;   r   }  s   
 r   c           	   	   C  sl   dd |  D }dd l}| |dd | D t| dd | D t| |j|d}||}|S )Nc                 S  s*   i | ]\}}||j jd krt|n|qS r   )r9   rQ   rT   re   r   rl   r:   r:   r;   
<dictcomp>  s   * z1serialize_specialization_data.<locals>.<dictcomp>r   c                 S     g | ]}t |qS r:   r$   r   r:   r:   r;   rg     r   z1serialize_specialization_data.<locals>.<listcomp>c                 S  r   r:   r   r   r:   r:   r;   rg     r   )r,   	signatureconstant_keysconstant_vals
attrs_keys
attrs_valsoptionsr   )itemsjsonrO   r$   r   __dict__dumps)	r,   r   	constantsattrsr   r   r   objZserialized_objr:   r:   r;   serialize_specialization_data  s   $
r   c              
   C  s  t | jt |ksJ g }t| j |D ]o\}}|jr&|d| d q|jr+dnd}|jr2dnd}|jr9dnd}d| d| d| d| d	}	|j	r~t
|j	trc|j	dksa|j	dd	 d
v rcd}|rs|d|j	 d|	 d q|d|j	 d q||	  qdd }
ddtt|
| j dg  dddd | j D  dd| d}dd | j D }t|d< t|j|d< t|| |d S )a2  
    Equivalent to sig.bind followed by apply_defaults. This generates a
    native Python function (using exec) which can be memoized on a per-kernel
    basis to avoid having to run these expensive functions -- which constitute
    much of the kernel launch overhead -- every time we run the kernel.
    z("constexpr", )TrueFalsezspecialize_impl(, r   Nr   )fpbfFz("z",) + z[1:]z", None)c                 S  s0   | d j tjju r| d S | d  d| d  S )Nr   r   z	=default_r   rD   r   r   )r   r:   r:   r;   r     s   0 z0create_function_from_signature.<locals>.<lambda>z
def dynamic_func(z	**optionsz):
    params = {c                 S  s   g | ]
}d | d| qS )'z': r:   )re   r,   r:   r:   r;   rg     s    z2create_function_from_signature.<locals>.<listcomp>z}
    specialization = [r   z-]
    return params, specialization, options
c                 S  s,   i | ]\}}|j tjjurd | |j qS )Zdefault_r   )re   r,   r   r:   r:   r;   r     s    z2create_function_from_signature.<locals>.<dictcomp>rN   r   Zdynamic_func)r#   
parametersziprO   r   r   r   r   r   r   r'   rT   joinr$   mapr   rN   r   Zget_arg_specializationexec)sigZkparamsbackendspecializationr,   kpr   r   r   r@   rr   Z	func_bodyZfunc_namespacer:   r:   r;   create_function_from_signature  s@   
r	  c                 C  s   | j  d| j S )N.)rB   r   fnr:   r:   r;   get_full_name     r  c                   @  s&   e Zd ZU ded< ded< ded< dS )JitFunctionInfor   rJ   rT   r,   rN   Zjit_functionN)rQ   rB   r   r   r:   r:   r:   r;   r    s   
 r  c                      s   e Zd Zdd Zd&ddZdd Zd	d
 Zdd Zdd Z		d'ddZ	dd Z
edd Zedd Zdd Zdd Zdd Zdd Z fd d!Z fd"d#Zd$d% Z  ZS )(rN   c                 C     dS )NFr:   r?   r:   r:   r;   is_gluon  s   zJITFunction.is_gluonr   bool | Nonec	                 C  s   |sd S | j j}	| j j}
ddd t| j|d D }|	 d|j d|j d|j d|j	 d	|j
 d
| d}t| j }t||||d ||}||||j|j|j|j	|j
|j|||d}|||t|
|	| d|i||ddS )Nr   c                 S  s    g | ]\}}|j  d | qS )z: r,   )re   r   r   r:   r:   r;   rg          z*JITFunction._call_hook.<locals>.<listcomp>r   z[num_warps=z, num_ctas=z, num_stages=z, enable_fp_fusion=z, launch_cooperative_grid=](r   r   )r   devicer   	num_warpsnum_ctas
num_stagesenable_fp_fusionlaunch_cooperative_gridextern_libsconfigsspecialization_data	is_warmupr   F)r   reprr  compileZis_manual_warmupZalready_compiled)r  r   rB   r  r  paramsr  r  r  r  r  r  r   r  r  )r6   hookr   r   r  r   r   r  r  r,   rJ   Z	arg_reprsr   Z	full_namer  r   r:   r:   r;   
_call_hook  s:    8


zJITFunction._call_hookc                 C  s   t |sJ | j| dS )z
        Add a hook that will be executed prior to the execution of run
        function with args and kwargs passed into the kernel
        N)callablepre_run_hooksr   )r6   r#  r:   r:   r;   add_pre_run_hook  s   zJITFunction.add_pre_run_hookc                 C  sX   ddl m}m}m}m} tj }||}|| _|| _|| _t| j	| j
|}i |||fS )z1
        Precompute as much as possible.
        r   )CompiledKernelr!  	ASTSourcemake_backend)compilerr(  r!  r)  r*  r   activeZget_current_targetr	  r   r"  )r6   r(  r!  r)  r*  r   r  binderr:   r:   r;   create_binder  s   
zJITFunction.create_binderc          !   
     s  | d| jp
tjj|d< tj }tj|}| jD ]	}||i | q| j	| \}}	}
|
|i |\}}t
|t
| }| |d }|d u r|}dd | jD }dd |D }dd t||D }d|vspJ dd	|vsxJ d
d|vsJ d|D ]}||jvr||vrtd| qt|dd }fdd|D }dd |D  t dd } fdd|D }| tjj||||||g|rd S | | |||}| j||	|jd}|||< | tjj||||||g| t }| j D ] \\}}\}}| || }|krtd| d| d| q|sp|d us$J t|r-|}t|}|d }|dkr>|d nd}|dkrI|d nd}|j||g R  } |j|||||j|j | tjj!tjj"g	 R   |S )Ndebugc                 S     g | ]}|j qS r:   r  r   r:   r:   r;   rg   <  rt   z#JITFunction.run.<locals>.<listcomp>c                 S  r   r   r:   r   r:   r:   r;   rg   =  r   c                 S  s   i | ]\}}||qS r:   r:   )re   rU   vr:   r:   r;   r   >  rh   z#JITFunction.run.<locals>.<dictcomp>Zdevice_typez=device_type option is deprecated; current target will be usedr  z8device option is deprecated; current device will be usedstreamz8stream option is deprecated; current stream will be usedz2Keyword argument %s was specified but unrecognisedc                 S  s   |dkS r   r:   )rW   rZ   r:   r:   r;   r   G  s    z!JITFunction.run.<locals>.<lambda>c                   s    i | ]}|t t  |qS r:   )r   r$   r   )re   path)
bound_argsr:   r;   r   H  r  c                 S  r   r   r:   r   r:   r:   r;   rg   J  r   c                 S  s
   t |tS r=   )r'   rT   )rW   r   r:   r:   r;   r   K  s   
 c                   s   i | ]}| t |qS r:   )Z
parse_attrr   )re   rU   )attrvalsr  r:   r;   r   L  r   )r   r   rL   z1 has changed since we compiled this kernel, from z to r   r   r   )#rY   r/  r   runtimer   r,  get_current_deviceZget_current_streamr&  device_cachesrT   Zparse_optionsr"  r  r   KeyErrorr   r$  Zjit_cache_hookr)  r!  Zjit_post_compile_hookobjectr4   r   rP   r%  r#   launch_metadatar   r   functionZpacked_metadataZlaunch_enter_hookZlaunch_exit_hook)!r6   r   r   ru   r   r  r2  r#  Zkernel_cacher   r-  r  r   r   kernelZsigkeysZsigvalsr   rU   
constexprsr   r7   Znot_presentr,   rW   rZ   Zglobals_dictZnewValZ	grid_sizeZgrid_0Zgrid_1Zgrid_2r;  r:   )r5  r  r4  r;   r   #  st   



zJITFunction.runc                 C  s   | j d u r| jS |  |S r=   )_repr_fn_name)r6   rW   r:   r:   r;   r   m  s   zJITFunction.reprNc	                 C  sz  |r|ng }|r
|ng }|| _ |j| _|| _t|| _|| _|| _t|d | _	|| _
t|| _|| _g | _t| jj D ]!\}	}
|	|v pL|
j|v }|	|v pU|
j|v }| jt|	|
|| q@tt|}|td|tj d  }| | t| j| _d | _ i | _!d | _"|| _#|| _$dd | jD | _%dd | jD | _&g | _'|j(| _(|j)| _)|j*| _*|j+| _+|j| _d S )Nr   z^def\s+\w+\s*\(c                 S  r0  r:   r  re   pr:   r:   r;   rg     rt   z(JITFunction.__init__.<locals>.<listcomp>c                 S  s   g | ]}|j r|jqS r:   )r   r   rA  r:   r:   r;   rg     s    ),r  rB   rJ   versionrD   r   r   r   getsourcelinesstarting_line_numberr?  r  r@  r;  r"  	enumerater   r   r,   r   r   textwrapdedent	getsourceresearch	MULTILINEstart_unsafe_update_srcr   r.  r8  hashr4   r=  r/  rM   	arg_namesr>  r&  r   rQ   r   __globals__)r6   r  rC  r   r   r/  rM   r   r;  ir   ZdnsZdns_oar7   r:   r:   r;   r+   p  sD   

zJITFunction.__init__c                 C  s   | j t| jjB S r=   )rQ  rD   getclosurevarsr  r2   r?   r:   r:   r;   get_capture_scope     zJITFunction.get_capture_scopec                 C  sh   | j d u r1t| jj}t| j| j|| jd}|	| 
  |jt| j | _ tt|j | _| j S )N)r,   r1   r2   r7   )rO  rD   rS  r  r2   r   r@  rQ  r7   rd   parser@   rT   rE  dictsortedr4   r   )r6   r2   Zdependencies_finderr:   r:   r;   rS     s   
zJITFunction.cache_keyc                 C  s   ddl m} |S )Nr   r   )r   r   )r6   r   r:   r:   r;   r\     s   zJITFunction.typec                O  s   | j ttj||dd|S )NTr   )r   r  
MockTensor
wrap_dtype)r6   r   ru   r   r:   r:   r;   r     s   zJITFunction.warmupc                   s  ddl m}m} dd l}dd lm  tj }|	|}|d | j
kr0td|d  d| j
 tt|d }|d } fd	d
t||D }	tt|d }
|d }tt|
|}t|d  }|| ||	|}dd
 |d  D }|d }||d |}|| j| d |< |S )Nr   )r!  r)  r   r,   zSpecialization data is for z but trying to preload for r   r   c                   s,   i | ]\}}| j |r  |n|qS r:   )r   Zis_dtyper   tlr:   r;   r     s    z'JITFunction.preload.<locals>.<dictcomp>r   r   r   c                 S  s(   i | ]\}}|t |trt|n|qS r:   )r'   r$   r   r   r:   r:   r;   r     s    r   r   )r+  r!  r)  r   Ztriton.languager   r   r,  r7  loadsr@  rP   r  r   r  rW  r   r8  )r6   r  r!  r)  r   r  Zdeserialized_objr   r   r   r   r   r   r   r7   r   r   r=  r:   r[  r;   preload  s4   



zJITFunction.preloadc                 C  sH   t | j}t|t jsJ t|jdksJ t|jd t js"J |S )Nr   r   )r^   rV  r7   r'   Moduler#   bodyFunctionDef)r6   treer:   r:   r;   rV    s
   zJITFunction.parsec                 O  s   t d)Nz:Cannot call @triton.jit'd outside of the scope of a kernel)rP   )r6   ru   r   r:   r:   r;   __call__  s   zJITFunction.__call__c                   s.   |dkrt d| dtt| || d S )Nr7   zCannot set attribute 'zX' directly. Use '_unsafe_update_src()' and manually clear `.hash` of all callersinstead.)AttributeErrorr*   rN   __setattr__)r6   r,   rl   r8   r:   r;   re    s   zJITFunction.__setattr__c                   s   d| _ t d| dS )z
        The only method allowed to modify src.
        Bypasses the __setattr__ restriction by calling super().__setattr__ directly.
        Nr7   )rO  r*   re  )r6   Znew_srcr8   r:   r;   rN    s   zJITFunction._unsafe_update_srcc                 C  s   d| j  d| jj dS )NzJITFunction(:r   )rJ   r  r   r?   r:   r:   r;   __repr__  s   zJITFunction.__repr__)r   r  )NNNNNNN)rQ   rB   r   r  r$  r'  r.  r   r   r+   rT  r   rS   r\   r   r^  rV  rc  re  rN  rg  r   r:   r:   r8   r;   rN     s,    
.J
>

 rN   r  JITFunction[T]c                 C  r   r=   r:   r  r:   r:   r;   jit     ri  rC  r   r;  r   r   r/  rM   r   Optional[Callable]r;  r   Optional[Iterable[int | str]]r   r/  Optional[bool]rM   Callable[[T], JITFunction[T]]c                 C  r   r=   r:   rk  r:   r:   r;   ri    s   Optional[T]4Union[JITFunction[T], Callable[[T], JITFunction[T]]]c          	        s.   d fdd}| dur|| S |S )	a<  
    Decorator for JIT-compiling a function using the Triton compiler.

    :note: When a jit'd function is called, arguments are
        implicitly converted to pointers if they have a :code:`.data_ptr()` method
        and a `.dtype` attribute.

    :note: This function will be compiled and run on the GPU. It will only have access to:

           * python primitives,
           * builtins within the triton package,
           * arguments to this function,
           * other jit'd functions

    :param fn: the function to be jit-compiled
    :type fn: Callable
    r  r   r   rh  c              
     sP   t | sJ tjjrddlm} ||  dS t|  dS )Nr   )InterpretedFunction)rC  r   r   r/  rM   r   r;  )r%  r   r6  Z	interpretinterpreterrr  rN   )r  rr  r/  r   r   r;  rM   r   rC  r:   r;   	decorator8  s"   zjit.<locals>.decoratorNr  r   r   rh  r:   )	r  rC  r   r;  r   r   r/  rM   ru  r:   rt  r;   ri    s   c                   @  s<   e Zd ZdZedd Zdd Zedd Zedd	 Zd
S )rY  zr
    Can be used in place of real tensors when calling:
        kernel.warmup(MockTensor(torch.float32), ...)
    c                 C  s"   | j jdkr| jdkrt| S | S )Nr   Ztorch)r9   rQ   rB   rY  rq   r:   r:   r;   rZ  ]  s   zMockTensor.wrap_dtypec                 C  s
   || _ d S r=   r   )r6   r   r:   r:   r;   r+   c     
zMockTensor.__init__c                   C  r  Nr   r:   r:   r:   r:   r;   r   f  rj  zMockTensor.data_ptrc                   C  r  rx  r:   r:   r:   r:   r;   	ptr_rangej  rj  zMockTensor.ptr_rangeN)	rQ   rB   r   r   staticmethodrZ  r+   r   ry  r:   r:   r:   r;   rY  W  s    

rY  c                   @  s^   e Zd Zdd Zdd Zdd Zdd	d
Zdd Zdd Zdd Z	dd Z
dd Zdd ZdS )TensorWrapperc                 C  s*   || _ || _|j| _|j| _| jj| _d S r=   )r   r   datar  shape)r6   r   r   r:   r:   r;   r+   q  s
   zTensorWrapper.__init__c                 C  r<   r=   )r   r   r?   r:   r:   r;   r   x  rw  zTensorWrapper.data_ptrc                 G  s   | j j| S r=   )r   stride)r6   ru   r:   r:   r;   r~  {  s   zTensorWrapper.strider   rT   c                 C  s   d| j  d| j dS )NzTensorWrapper[r  r   )r   r   r?   r:   r:   r;   __str__~  s   zTensorWrapper.__str__c                 C  r<   r=   )r   element_sizer?   r:   r:   r;   r    rw  zTensorWrapper.element_sizec                 C     t | j | jS r=   )r{  r   cpur   r?   r:   r:   r;   r    r  zTensorWrapper.cpuc                 C  s   | j |j  d S r=   )r   copy_)r6   otherr:   r:   r;   r    r  zTensorWrapper.copy_c                 C  r  r=   )r{  r   cloner   r?   r:   r:   r;   r    r  zTensorWrapper.clonec                 C     t | j|| jS r=   )r{  r   tor   )r6   r  r:   r:   r;   r    rU  zTensorWrapper.toc                 C  r  r=   )r{  r   	new_emptyr   )r6   sizesr:   r:   r;   r    rU  zTensorWrapper.new_emptyNr   )rQ   rB   r   r+   r   r~  r  r  r  r  r  r  r  r:   r:   r:   r;   r{  o  s    
r{  c                 C  sP   t | tr|| jjkr| jS t| j|S t| drt| |S tdt|  d)Nr   zCannot reinterpret a r
  )r'   r{  r   r   r   r   r\   )r   r   r:   r:   r;   reinterpret  s   


r  c                 C  sr   | }t |ts|j}t |tr|jjj}t|j\}}t|D ]\}}| 	dr4||7 } ||fS q ||fS )Nzdef )
r'   rN   r  __code__co_filenamerD   rD  rF  r   rG   )r  Zbase_fn	file_namelinesZ
begin_lineidxliner:   r:   r;   get_jit_fn_file_line  s   


r  r   )Frv  )r   rl  r;  rl  r   rm  r   rm  r/  rn  rM   rn  r   ro  r=   )r  rp  r   rl  r;  rl  r   rm  r   rm  r/  rn  rM   rn  r   rq  )?
__future__r   r   r^   rb   r-   rD   r{   rJ  rG  collectionsr   dataclassesr   	functoolsr   typingr   r   r	   r
   r   r   r   r   r   r   Ztriton.tools.tensor_descriptorr   typesr   rC   r   Zruntime.driverr   Z_utilsr   r   r   r   rQ   r#   rH   r   NodeVisitorr   r   r   r   r   r   r   r   r   r	  r  r  rN   ri  rY  r{  r  r  r:   r:   r:   r;   <module>   sz    0 
Q2
9:  /<%