o
    hz                    @  s   d dl mZ d dlZd dlmZmZmZmZmZm	Z	m
Z
 d dlZd dlmZ ddlmZ ddlmZ ed	Zed
ZG dd deZG dd de	e ZdS )    )annotationsN)ListOptionalSequenceTupleTypeVarGenericType)driver   )ir   )coreTTensorTyc                      s   e Zd Z fddZ  ZS )IncompatibleTypeErrorImplc                   s@   || _ || _d| j   d | j  | _tt| | j d S )Nzinvalid operands of type  and )type_atype_b__repr__messagesuperr   __init__)selfr   r   	__class__ ^/home/www/facesmatcher.com/frenv_anti/lib/python3.10/site-packages/triton/language/semantic.pyr      s   z"IncompatibleTypeErrorImpl.__init__)__name__
__module____qualname__r   __classcell__r   r   r   r   r      s    r   c                   @  s  e Zd ZU ejZded< eZded< dd ZdSddZdSddZ	dTddZ
dUddZdVdWddZdXd"d#Z		$dYdZd)d*Zd[d-d.Zd\d2d3Zd\d4d5Zd\d6d7Zd]d8d9Zd]d:d;Zd^d=d>Zd]d?d@Zd_dEdFZd_dGdHZd`dKdLZdadMdNZdbdOdPZdbdQdRZdbdSdTZdbdUdVZdbdWdXZdcdYdZZ dbd[d\Z!dbd]d^Z"dbd_d`Z#dddadbZ$dddcddZ%dddedfZ&dedidjZ'dbdkdlZ(dbdmdnZ)dbdodpZ*dbdqdrZ+dbdsdtZ,dbdudvZ-dwdxdfd|d}Z.dgddZ/dgddZ0dhddZ1diddZ2djddZ3dkddZ4dlddZ5dmddZ6dnddZ7doddZ8dpddZ9dqddZ:drddZ;dsddZ<dtduddZ=dd Z>dd Z?dd Z@dd ZAdd ZBdd ZCdd ZDdd ZEdd ZFdvddǄZGdwdd˄ZHdxdd̈́ZIdyddτZJdyddфZKddӄ ZLddՄ ZMdyddׄZNdyddلZOdyddۄZPdydd݄ZQdydd߄ZRdzddZSd{ddZTdd ZUdd ZVd|ddZWd}ddZXd~ddZYdddZZdddZ[dddZ\dddZ]dddZ^dddZ_dd dZ`dddZadd Zbdd
dZcdddZddddZedddZfdddZgdd Zhdd#d$Zidd&d'Zjdd*d+Zkdd-d.Zldd0d1Zmdd2d3Zndd4d5Zodd6d7Zpdd<d=Zqdd@dAZrddBdCZsdDdE ZtdVdFdGZuddIdJZvddKdLZwddQdRZxdwS (  TritonSemanticzType[TensorTy]tensorz
ir.builderbuilderc                 C  s
   || _ d S N)r$   )r   r$   r   r   r   r      s   
zTritonSemantic.__init__axisintreturnr   c                 C  ,   |dvrt d| | | j|tjS )Nr   r   r   z+program_id axis must be 0, 1, or 2 but got )
ValueErrorr#   r$   Zcreate_get_program_idtlint32r   r&   r   r   r   
program_id&      zTritonSemantic.program_idc                 C  r)   )Nr*   z-num_programs axis must be 0, 1, or 2 but got )r+   r#   r$   Zcreate_get_num_programsr,   r-   r.   r   r   r   num_programs+   r0   zTritonSemantic.num_programsa_tytl.dtypeb_tyc                 C  s   |j }|j }|j}|j}||kr||kr|S |S |tjjjkr'||kr%|S |S |tjjjkr6||kr4|S |S td| d| )Nzunexpected signedness r   )int_bitwidthint_signednessr,   dtypeZ
SIGNEDNESSZUNSIGNED	TypeError)r   r2   r4   Za_rankZb_rankZa_snZb_snr   r   r   integer_promote_impl4   s   z#TritonSemantic.integer_promote_impla_is_scalarboolb_is_scalar
div_or_modc                 C  sV  ||kr)|r
||fn||f\}}|  j|  jkr)|r'|tjtjfv r'tjS |S | s1| r4tjS | s<| r?tjS |	 sG|	 rO|rLtjS tjS |
 r_|
 r_|r\tjS tjS |
 sg|
 rjtjS | r{| r{||krx|S tjS | r| std| d| |r|j|jkrtd|  d |  d | ||S )Nunexpected type r   zCannot use /, #, or % with x because they have different signedness;this is unlikely to result in a useful answer. Cast them to the same signedness.)kindvaluer,   float16bfloat16float32Zis_fp64float64is_fp32is_fp16is_bf16is_fp8is_intr8   r6   r   r9   )r   r2   r:   r4   r<   r=   	scalar_tyZ	tensor_tyr   r   r   computation_type_implC   s:   z$TritonSemantic.computation_type_implT
check_typec                 C  s  t |tr| | j|tjS t |trdd|  krdk r%n ntj}n8d|  kr/dk r5n ntj	}n(d|  kr?dk rEn ntj
}nd|  krOdk rUn ntj}ntd| d| j||d	S t |trd
}ddd  }td |}|tdks|dks||ks||  kr|krn ntj}ntj}| j||d	S t |tjr| |jS t || jr|S |rtd| dt| d|S )N           l                             l            zNonrepresentable integer .r7   g      8g   ?r      absinfg        zcannot convert z	 of type z
 to tensor)
isinstancer;   r#   r$   get_int1r,   int1r'   r-   uint32int64uint64r+   scalar_constantfloat__builtins__rD   rE   	constexpr	to_tensorrA   r8   type)r   xrM   r7   Zmin_float32Zmax_float32Zabs_xr   r   r   ra   u   sH   



zTritonSemantic.to_tensorr   r   allow_ptr_aNonec                 C  sJ   |  r!|st|||  r||krt||| r#t||d S d S r%   )is_ptrr   is_floating)r   r   r   rd   r   r   r   check_ptr_type_impl   s   


z"TritonSemantic.check_ptr_type_implFlhsTensorTy | numbers.NumberrhsTuple[TensorTy, TensorTy]c                 C  s  t |tj}t |tj}|r|}	| |}|r|}
| |}|jj}|jj}| ||| | ||| |r| s| s| |||||}|rQ|	dk rQ|	 s[|r_|
dk r_|	 r_t
d| r|r~| |	  krs| ks~n t
d|	 d| |r| |
  kr| ksn t
d|
 d| |r| j|	|dn| ||}|r| j|
|dn| ||}| ||\}}||fS )Nr   z{Cannot perform a binary operation between an unsigned tensor and a negative scalar. Perform a explicit cast on one of them.zScalar z is out of range for type rS   )rW   numbersNumberra   rb   scalarrh   rf   rL   is_int_unsignedr+   rJ   get_int_min_valueget_int_max_valuer]   castbroadcast_impl_value)r   ri   rk   Zallow_lhs_ptrZallow_rhs_ptrZarithmetic_checkr=   Zlhs_is_scalarZrhs_is_scalarZ
lhs_scalarZ
rhs_scalar
lhs_sca_ty
rhs_sca_ty
ret_sca_tyr   r   r   binary_op_type_checking_impl   sF   

z+TritonSemantic.binary_op_type_checking_impl	binary_opcallablec                 C  s   |j jjdks| jjjsd S |j j}|j j}||ksJ | s"J | |tj	}| |tj	}|||d}|
 }| |tj	}| }| |tj	}| | ||| ||}	d|j d|j }
| |	|
 d S )N@   Fr'   z! overflow detected for operation )rb   ro   r5   r$   optionssanitize_overflowrJ   rs   r,   r[   rr   r]   rq   and_
less_equalgreater_equalr   device_assert)r   ri   rk   ry   ru   rv   retZ	max_valueZ	min_valuecondmsgr   r   r    binary_op_sanitize_overflow_impl   s    z/TritonSemantic.binary_op_sanitize_overflow_implinputotherr}   c                 C  s6  |  ||dd\}}|jj}|jj}| r| rtd| r3| s3||}}|jj}|jj}| rf|j}|j rY|jjdk rY|j	t
j| j}| j|j|d}| | j|j||jS | rx| | j|j|j|jS | r|r| ||| j | | j|j|j|jS td| )NTzcannot add pointers togetherr{   Fr>   )rx   rb   ro   rf   r8   handler7   rp   r5   with_element_tyr,   r[   to_irr$   create_int_castr#   Zcreate_addptrrg   Zcreate_faddrJ   r   addZ
create_add)r   r   r   r}   input_scalar_tyother_scalar_tyZother_handleZi64_tyr   r   r   r      s,   
zTritonSemantic.addc                 C  s   |  ||dd\}}|jj}| r| j|| |ddS | r/| | j	|j
|j
|jS | rK|r=| ||| j | | j|j
|j
|jS td| )NTF)r}   r>   )rx   rb   ro   rf   r   minusrg   r#   r$   Zcreate_fsubr   rJ   r   subZ
create_subr8   r   r   r   r}   rK   r   r   r   r      s   zTritonSemantic.subc                 C  s   |  ||\}}|jj}| r| | j|j|j|jS | r:|r,| 	||| j
 | | j|j|j|jS td| Nr>   )rx   rb   ro   rg   r#   r$   Zcreate_fmulr   rJ   r   mulZ
create_mulr8   r   r   r   r   r     s   zTritonSemantic.mulc                 C  s   |  ||dddd\}}|jj}|jj}| r#| r#| ||}nI| r2| r2| ||}n:| rI| rI| |tj}| |tj}n#| re| re|j|jkr^| ||}ql| ||}nt	d| | 
| j|j|j|jS NFTr>   )rx   rb   ro   rg   rJ   rs   r,   rD   Zfp_mantissa_widthr8   r#   r$   create_fdivr   )r   r   r   r   r   r   r   r   truediv  s    zTritonSemantic.truedivc                 C  s   |  ||dddd\}}|jj}|jj}| rN| rN| ||}| ||}| ||}| r@| | j	|j
|j
|jS | | j|j
|j
|jS td| r   )rx   rb   ro   rJ   r9   rs   is_int_signedr#   r$   Zcreate_sdivr   Zcreate_udivr8   )r   r   r   r   r   ret_tyr   r   r   floordiv7  s   zTritonSemantic.floordivieee_roundingc                 C  s`   |j j}|j j}| r| std| ||dddd\}}| j|j|j}| ||j S )Nz4both operands of fdiv must have floating scalar typeFT)	rb   ro   rg   r8   rx   r$   r   r   r#   )r   r   r   r   r   r   r   r   r   r   fdivE  s   zTritonSemantic.fdivc                 C  s   |  ||dddd\}}|jj}|jj}| r&| | j|j|j|jS | r`|j	|j	kr@t
d|  d |  d | rR| | j|j|j|jS | | j|j|j|jS t
d| )NFTzCannot mod z by r?   r>   )rx   rb   ro   rg   r#   r$   Zcreate_fremr   rJ   r6   r8   r   r   Zcreate_sremZcreate_urem)r   r   r   rK   r   r   r   r   modN  s    zTritonSemantic.modrc   ypropagate_nantl.PropagateNanc                 C     |  ||\}}|j}| r>|tjjkr#| | j|j	|j	|j
S |tjjkr7| | j|j	|j	|j
S td| | rP| | j|j	|j	|j
S | rb| | j|j	|j	|j
S td| NzUnexpected propagate_nan Unexpected dtype )rx   r7   rg   r,   PropagateNanALLr#   r$   Zcreate_minimumfr   rb   NONEZcreate_minnumfr+   r   Zcreate_minsirp   Zcreate_minuir8   r   rc   r   r   r7   r   r   r   minimume     zTritonSemantic.minimumc                 C  r   r   )rx   r7   rg   r,   r   r   r#   r$   Zcreate_maximumfr   rb   r   Zcreate_maxnumfr+   r   Zcreate_maxsirp   Zcreate_maxuir8   r   r   r   r   maximumv  r   zTritonSemantic.maximumminmaxc                 C  sp   |  ||\}}|  ||\}}|  ||\}}|j}| r0| | j|j|j|j||jS td| d)Nr   z(. Only floating point clamp is supported)	rx   r7   rg   r#   r$   Zcreate_clampfr   rb   r8   )r   rc   r   r   r   r7   r   r   r   clamp  s   "zTritonSemantic.clampc                 C  sv   |  ||\}}|jj}|jj}| r| st||| ||}||kr-| ||}||kr7| ||}||fS r%   )rx   rb   ro   rJ   r   r9   rs   )r   r   r   input_sca_tyZother_sca_tyrw   r   r   r   bitwise_op_type_checking_impl  s   
z,TritonSemantic.bitwise_op_type_checking_implc                 C  ,   |  ||\}}| | j|j|j|jS r%   )r   r#   r$   Z
create_andr   rb   r   r   r   r   r   r   r~        zTritonSemantic.and_c                 C  r   r%   )r   r#   r$   Z	create_orr   rb   r   r   r   r   or_  r   zTritonSemantic.or_c                 C  r   r%   )r   r#   r$   Z
create_xorr   rb   r   r   r   r   xor_  r   zTritonSemantic.xor_c                 C  <   |j  s| |tj}|j  s| |tj}| ||S r%   )rb   is_int1bitcastr,   rY   r~   r   r   r   r   logical_and  
   

zTritonSemantic.logical_andc                 C  r   r%   )rb   r   r   r,   rY   r   r   r   r   r   
logical_or  r   zTritonSemantic.logical_orc                 C  s"   |j  s| |tj}| |S r%   )rb   r   r   r,   rY   invertr   r   r   r   r   not_  s   

zTritonSemantic.not_c                 C  r   r%   )r   r#   r$   Zcreate_lshrr   rb   r   r   r   r   lshr  r   zTritonSemantic.lshrc                 C  r   r%   )r   r#   r$   Zcreate_ashrr   rb   r   r   r   r   ashr  r   zTritonSemantic.ashrc                 C  r   r%   )r   r#   r$   Z
create_shlr   rb   r   r   r   r   shl  r   zTritonSemantic.shlc                 C  s   |S r%   r   r   r   r   r   plus  s   zTritonSemantic.plusc                 C  sN   |j j}| rtd|  d | | j|| j|}| 	||dS )Nz$wrong type argument to unary minus ()T)
rb   ro   rf   r+   r   r#   r$   get_null_valuer   r   )r   r   r   _0r   r   r   r     s
   zTritonSemantic.minusc                 C  sT   |j j}| s| rtd|  d | | j|	| j|}| 
||S )Nz%wrong type argument to unary invert (r   )rb   ro   rf   rg   r+   r   r#   r$   Zget_all_ones_valuer   r   )r   r   r   Z_1r   r   r   r     s
   zTritonSemantic.invertvtl.block_typec                 C  s   |j tjS r%   )rb   r   r,   rY   )r   r   r   r   r   
_bool_like  s   zTritonSemantic._bool_likec                 C     |  ||\}}|jj}| r | | j|j|j| |S |	 rH|
 r8| | j|j|j| |S | | j|j|j| |S td| r   )rx   rb   ro   rg   r#   r$   Zcreate_fcmpOGTr   r   rJ   r   Zcreate_icmpSGTZcreate_icmpUGTr8   r   r   r   rK   r   r   r   greater_than        zTritonSemantic.greater_thanc                 C  r   r   )rx   rb   ro   rg   r#   r$   Zcreate_fcmpOGEr   r   rJ   r   Zcreate_icmpSGEZcreate_icmpUGEr8   r   r   r   r   r     r   zTritonSemantic.greater_equalc                 C  r   r   )rx   rb   ro   rg   r#   r$   Zcreate_fcmpOLTr   r   rJ   r   Zcreate_icmpSLTZcreate_icmpULTr8   r   r   r   r   	less_than  r   zTritonSemantic.less_thanc                 C  r   r   )rx   rb   ro   rg   r#   r$   Zcreate_fcmpOLEr   r   rJ   r   Zcreate_icmpSLEZcreate_icmpULEr8   r   r   r   r   r     r   zTritonSemantic.less_equalc                 C  v   |  ||\}}|jj}| r | | j|j|j| |S |	 r4| | j
|j|j| |S td| r   )rx   rb   ro   rg   r#   r$   Zcreate_fcmpOEQr   r   rJ   Zcreate_icmpEQr8   r   r   r   r   equal"       zTritonSemantic.equalc                 C  r   r   )rx   rb   ro   rg   r#   r$   Zcreate_fcmpUNEr   r   rJ   Zcreate_icmpNEr8   r   r   r   r   	not_equal-  r   zTritonSemantic.not_equalN)r   startendr   c          	      C  s   t |tr
t |tstdt|d? }t|d? }|s|r"td||kr*td|| }||d @ dkr:td|g}|d u rHttj|}|| j}| 	| j
||||S )Nz/arange's arguments must be of type tl.constexpr    zarange must fit in int32z=arange's end argument must be greater than the start argumentr   r   z#arange's range must be a power of 2)rW   r'   r+   r;   r,   
block_typer-   r   r$   r#   Zcreate_make_range)	r   r   r   r   Zis_start_int64Zis_end_int64rangeshapeZ	ret_ty_irr   r   r   arange<  s    zTritonSemantic.aranger7   c                 C  sV   |d u rt d|dkr| j|| j}nt| jd|j }||}| ||S )Nz2dtype must be specified when value is not a tensorr   get_)r+   r$   r   r   getattrnamer#   )r   rA   r7   Zget_value_fnr   r   r   r]   N  s   zTritonSemantic.scalar_constantc                 C  s8   t |tjr|jjdksJ d| ||S | ||S )Nr   zonly accepts size-1 tensor)rW   r,   r#   numelrA   rs   r]   )r   rA   r7   r   r   r   make_scalarY  s   zTritonSemantic.make_scalarr   	List[int]c                 C  s   |  | |||S r%   )splatr   )r   r   rA   r7   r   r   r   full`     zTritonSemantic.fullrA   c                 C  sP   |j  r	J dt|dkr|S t|j|}| | j|	| j|j
|S )NzCannot splat a block tensorr   )rb   is_blocklenr,   r   r7   r#   r$   create_splatr   r   )r   rA   r   r   r   r   r   r   g  s
    zTritonSemantic.splat	dst_shapecan_reorderc                 C  sT   d}|D ]}||9 }q|j j|krtdt|j j|}| | j|j	|||S )Nr   z:reshape() cannot change total number of elements in tensor)
rb   r   r+   r,   r   ro   r#   r$   Zcreate_reshaper   )r   r   r   r   r   sr   r   r   r   reshapen  s   
zTritonSemantic.reshapec                 C  s\   dd |j D }||d |j s| j||dS t|jj|}| | j	
|j||S )Nc                 S  s   g | ]}t |qS r   r,   _unwrap_if_constexpr.0rc   r   r   r   
<listcomp>x      z.TritonSemantic.expand_dims.<locals>.<listcomp>r   r   )r   insertrb   r   r   r,   r   ro   r#   r$   create_expand_dimsr   )r   r   r&   r   r   r   r   r   expand_dimsw  s   
zTritonSemantic.expand_dimsc                 C  sZ   |sJ dt |jdksJ t|jj|jd |jd  g}| | j|j	|j	|S )Nz;current implementation of `cat` always may reorder elementsr   r   )
r   r   r,   r   rb   ro   r#   r$   Z
create_catr   )r   ri   rk   r   ret_typer   r   r   cat  s   "zTritonSemantic.catabc                 C  s   |  ||\}}|jg k}|r| |d}| |d}t|jd tjr*td}nd}|j|g }t|jj|}| 	| j
|j|j|}|rR| j|dgdd}|S )Nr   r   Fr   )rt   r   r   rW   r,   r`   r   rb   ro   r#   r$   Zcreate_joinr   r   )r   r   r   Z
was_rank_1two	new_shaper   r   r   r   r   join  s   
zTritonSemantic.joinc                 C  sr   t |jdks	J t|jd dksJ |jd d }t|jj|}| j|j	\}}| 
||| 
||fS )Nr   r   r   )r   r   r,   r   r   rb   ro   r$   Zcreate_splitr   r#   )r   r   r   r   ZoutLHSZoutRHSr   r   r   split  s   

zTritonSemantic.splitdims
Tuple[int]c                   s   t  jt |krtdtdd |D ttt |kr%td| t jj	 fdd|D }| 
| j j||S )Nz5permute dims must have the same length as input shapec                 s  s    | ]}t |V  qd S r%   r   r   dr   r   r   	<genexpr>  s    z)TritonSemantic.permute.<locals>.<genexpr>z?permute dims must be a permutation of 0, 1, ..., n-1, but were c                   s   g | ]} j | qS r   r   r   r   r   r   r     r   z*TritonSemantic.permute.<locals>.<listcomp>)r   r   r+   sortedlistr   r,   r   rb   ro   r#   r$   Zcreate_transr   )r   r   r   r   r   r   r   permute  s   "zTritonSemantic.permutec                 C  s   |j  s| ||S |j  }t|t|kr"td| d| ||kr(|S t|D ]#\}}|| |krO|dkrOtd||  d| d| d| d| 
q,t|j j	|}| 
| j|j||S )Nz!Cannot broadcast, rank mismatch: z, r   z3Cannot broadcast, the expanded size of the tensor (z ) must match the existing size (z) at non-singleton dimension : )rb   r   r   get_block_shapesr   r+   	enumerater,   r   ro   r#   r$   create_broadcastr   )r   r   r   Z	src_shapeiitemr   r   r   r   broadcast_impl_shape  s*   

z#TritonSemantic.broadcast_impl_shapec              	   C  sj  |j }|j }| r(| s(||j}| | j|| j|j|}||fS | sJ| rJ||j}| | j|| j|j|}||fS | r1| r1|	 }|	 }t
|t
|k rtt
|t
|D ]}| | j|jdt|jdg|j }|j }|	 }qmn0t
|t
|k rtt
|t
|D ]}| | j|jdt|jdg|j }|j }|	 }qt
|t
|ksJ g }t|D ]3\}	}
||	 }|
dkr|| q|dks||
kr||
 qtdt|	 d t|
 d t| ||krt|j|}| | j|j||}||kr1t|j|}| | j|j||}||fS )Nr   r   z?Cannot make_shape_compatible: incompatible dimensions at index r  r   )rb   r   r   ro   r#   r$   r   r   r   r  r   r   r   r,   r   valuesr  appendr+   strr  )r   ri   rk   Zlhs_tyZrhs_tyZ	lhs_shapeZ	rhs_shape_	ret_shaper  leftrightr   r   r   r   rt     sl    + '



z#TritonSemantic.broadcast_impl_valuerounding_modeOptional[str]c                 C  s<   |d u rd S |dkrt jjS |dkrt jjS td| d)NZrtneZrtzzInvalid rounding mode: z0. Supported rounding modes are 'rtne' and 'rtz'.)r   ROUNDING_MODERTNEZRTZr+   )r   r  r   r   r   _str_to_rounding_mode  s   z$TritonSemantic._str_to_rounding_modedst_tyc                 C  s   |j }| r||j}||kr|S |j}|j}| s!| r'| ||S |j}|j}||kr?tdt| d t| | 	| j
|j|| j
|S )Nz!Cannot bitcast data-type of size z to data-type of size )rb   r   r   ro   rf   rs   primitive_bitwidthr+   r  r#   r$   create_bitcastr   r   )r   r   r  src_ty
src_sca_ty
dst_sca_tyZsrc_bitsZdst_bitsr   r   r   r     s     zTritonSemantic.bitcastfp_downcast_roundingc                 C  sr  |j }|j}|j}||kr|S | r||}| |}d}| r?| r?|j|jk r?|d u r6tjj	}qQ|tjj	kr>d}n|d urQt
dt| d t| | sY| rr| jjdd usfJ d| jjd |||| dS | rz| s| r| s|r| | j|j|| j||S | r| r| r| s| | |tj|S | o| o|j|jk}|r| | j|j|| j|S | o| o|j|jk }	|	r| | j|j|| j|S | r@| r@|j|jks|j|jkr@|  o|!  }
|! r/|j"| j}| | j#||j"}| $||S | | j%|j|| j|
|S |& r| r|! rg|j"| j}| | j#||j"}| $||S |  r|| | j'|j|| j|S | | j(|j|| j|S | r|& r|! s|  s| | j)|j|| j|S | | j*|j|| j|S |+ r| r|j}|dkr| | j,|j|| j|S |d	kr| $| |tj-| | j.d
tj-S | r|+ r| | j/|j|| j|S |+ r/|+ r/| | j0|j|| j|S J d| d| )NFTz]fp_downcast_rounding should be set only for truncating fp conversions. Source scalar type is z and destination type is Zconvert_custom_typesz0target doesn't provide conversion for this type.)Z	_semanticr{   r   r   zcannot cast z to )1rb   ro   r   r   r  rg   r  r   r  r  r+   r  is_fp8e4b15r$   codegen_fnsgetrI   r#   Zcreate_fp_to_fpr   r   rG   rF   rH   rs   r,   rD   Zcreate_fp_truncZcreate_fp_extrJ   r5   r6   r   is_boolr7   r   r   r   Zis_standard_floatingZcreate_fp_to_siZcreate_fp_to_uiZcreate_ui_to_fpZcreate_si_to_fprf   Zcreate_ptr_to_intr[   	get_int64Zcreate_int_to_ptrr  )r   r   r  r  r  r  r  Zuse_custom_roundingZtruncate_fpZext_fpZsign_extendtyr   bitwidthr   r   r   rs     s   



 
 




    
 
(  zTritonSemantic.castc                 C  s\   t jj}|r,|dkrt jj}|S |dkrt jj}|S |dkr$t jj}|S td| d|S )Nz.ca.cgz.cvCache modifier  not supported)r   CACHE_MODIFIERr   CACGZCVr+   r   cache_modifiercacher   r   r   _str_to_load_cache_modifier     z*TritonSemantic._str_to_load_cache_modifierc                 C  sp   t jj}|r6|dkrt jj}|S |dkrt jj}|S |dkr$t jj}|S |dkr.t jj}|S td| d|S )Nz.wbr%  z.csz.wtr&  r'  )r   r(  r   ZWBr*  CSZWTr+   r+  r   r   r   _str_to_store_cache_modifier      	z+TritonSemantic._str_to_store_cache_modifierc                 C  sH   t jj}|r"|dkrt jj}|S |dkrt jj}|S td| d|S )NZ
evict_lastZevict_firstzEviction policy r'  )r   ZEVICTION_POLICYZNORMALZ
EVICT_LASTZEVICT_FIRSTr+   )r   eviction_policyevictionr   r   r   _str_to_eviction_policy  s   z&TritonSemantic._str_to_eviction_policyc                 C  sD   d }|r |dkrt jj}|S |dkrt jj}|S td| d|S )NzeronanzPadding option r'  )r   PADDING_OPTIONZPAD_ZEROPAD_NANr+   )r   padding_optionpaddingr   r   r   _str_to_padding_option  s   z%TritonSemantic._str_to_padding_optionc                 C  sp   t jj}|r6|dkrt jj}|S |dkrt jj}|S |dkr$t jj}|S |dkr.t jj}|S td| d|S )NacquirereleaseZacq_relrelaxedMemory semantic r'  )r   ZMEM_SEMANTICZACQUIRE_RELEASEZACQUIREZRELEASEZRELAXEDr+   )r   Z
sem_optionsemr   r   r   _str_to_sem  r2  zTritonSemantic._str_to_semc                 C  s\   t jj}|r,|dkrt jj}|S |dkrt jj}|S |dkr$t jj}|S td| d|S )NZgpuZctasysr@  r'  )r   ZMEM_SYNC_SCOPEZGPUZCTAZSYSTEMr+   )r   Zscope_optionscoper   r   r   _str_to_scope  r/  zTritonSemantic._str_to_scopec                 C  s   |rEt |ds
|g}dd |D }|D ]}t|tr(d|  kr't|k s*J  J qt|dks3J t|tt|ksAJ dt|S dS )N__iter__c                 S  "   g | ]}t |tjr|jn|qS r   rW   r,   r`   rA   r   elemr   r   r   r        " z?TritonSemantic._canonicalize_boundary_check.<locals>.<listcomp>r   z'Duplicate dimension in `boundary_check`r   )hasattrrW   r'   r   setr  )r   boundary_checkblock_shapedimr   r   r   _canonicalize_boundary_check  s   
,z+TritonSemantic._canonicalize_boundary_checkc	              
   C  s   |d us|d urt d|jjj}	|	tjksJ d|	 r(|tjjkr(t d|jj}
| 	||

 }| | j|j||||||
S )NK`mask` and `other` arguments cannot be specified for loading block pointers4`tl.int1` should be rewritten in `tl.make_block_ptr`z@Padding option `nan` is not supported for integer block pointers)r+   rb   
element_tyr,   rY   rJ   r   r8  r9  rQ  r  r#   r$   Zcreate_tensor_pointer_loadr   )r   ptrmaskr   rN  r;  r-  r4  is_volatileelt_tyr  r   r   r   _load_block_pointer  s   
z"TritonSemantic._load_block_pointerc	              
   C  s  |j j std|j   d|d u r|d urtd|s!|r%td|j  s@|r5|j  r5td|r@|j  r@td|j  r_|d urR| ||j  }|d ur_| ||j  }|j j}	|	j}
|
t	j
k}|r}t	j}
t	|
|	j}	| ||	}|d ur| ||
}|j  r|j |
}n|
}|d u r| | j|j||||}n| | j|j|j|r|jnd ||||}|r| |t	j
}|S )NUnsupported ptr type z in `tl.load`z)`other` cannot be provided without `mask`z`padding_option` or `boundary_check` argument is not supported for loading a tensor ofpointers or loading a scalar. Because the compiler does not know the boundary; please use block pointers (defined by `make_block_ptr`) insteadEMask argument cannot be block type if pointer argument is not a blockzFOther argument cannot be block type if pointer argument is not a block)rb   ro   rf   r+   r   r   r
  r  rT  r,   rY   int8pointer_typeaddress_spacers   r   r#   r$   Zcreate_loadr   Zcreate_masked_load)r   rU  rV  r   rN  r;  r-  r4  rW  ptr_tyrX  r!  r  r   r   r   r   _load_legacy  sN   



zTritonSemantic._load_legacyrU  rV  Optional[TensorTy]rN  r   r:  r  r,  r3  rW  c	              
   C  sd   |  |}	| |}
| |}|j r&|jj r&| ||||||	|
|S | ||||||	|
|S r%   )	r.  r5  r<  rb   rf   rT  r   rY  r`  )r   rU  rV  r   rN  r:  r,  r3  rW  r-  r4  r;  r   r   r   load2  s   


zTritonSemantic.loaddesctl.tensor_descriptor_basec                 C  sz   t |tjsJ t|j}t||ksJ d| dt| | j|dd}| j|j|| 	|| 
|}| ||jS )N	expected  offsets, but got Frequire_i64)rW   r,   tensor_descriptor_baser   rO  _convert_to_ir_valuesr$   Zcreate_descriptor_loadr   r.  r5  r#   r   )r   rc  offsetsr,  r3  ndimrc   r   r   r   descriptor_load@  s   
$zTritonSemantic.descriptor_loadc                 C  sR   t |tjsJ t|j}t||ksJ d| dt| |j|jks'J d S )Nre  rf  )rW   r,   ri  r   rO  r   )r   rc  rA   rk  rl  r   r   r   validate_store_likeK  s   
$z"TritonSemantic.validate_store_likec                 C  s:   |  ||| | j|dd}| | j|j|j|tjS NFrg  )rn  rj  r#   r$   Zcreate_descriptor_storer   r,   void)r   rc  rA   rk  r   r   r   descriptor_storeQ  s   zTritonSemantic.descriptor_storec                 C  sn   |  ||| |jtjtjtjtjtjtjhv sJ d| j	|dd}t
jj}| | j||j|j|tjS NUnsupported dtypeFrg  )rn  r7   r,   rZ   r-   r\   rD   rB   rC   rj  r   DESCRIPTOR_REDUCE_KINDADDr#   r$   create_descriptor_reducer   rp  r   rc  rA   rk  r@   r   r   r   descriptor_atomic_addV  s
   * z$TritonSemantic.descriptor_atomic_addc                 C  s   t j }|jdko|jdkS )NcudaZ   )r
   ZactiveZget_current_targetbackendarch)r   targetr   r   r   _has_native_tma]  s   
zTritonSemantic._has_native_tmac                 C  sP   |t jt jt jt jt jt jhv sJ d|t jt jhv r$|  s&J dd S d S )Nrs  z-16-bit float types require native tma support)r,   rZ   r-   r\   r[   rB   rC   r~  )r   r7   r   r   r   $_descriptor_atomic_min_max_supporteda  s
   (z3TritonSemantic._descriptor_atomic_min_max_supportedc                 C  P   |  ||| | |j | j|dd}tjj}| | j	||j
|j
|tjS ro  )rn  r  r7   rj  r   rt  MINr#   r$   rv  r   r,   rp  rw  r   r   r   descriptor_atomic_minf  
    z$TritonSemantic.descriptor_atomic_minc                 C  r  ro  )rn  r  r7   rj  r   rt  MAXr#   r$   rv  r   r,   rp  rw  r   r   r   descriptor_atomic_maxm  r  z$TritonSemantic.descriptor_atomic_maxc                 C  f   |  ||| |jtjtjtjtjhv sJ d| j|dd}tj	j
}| | j||j|j|tjS rr  )rn  r7   r,   rZ   r-   r\   r[   rj  r   rt  ANDr#   r$   rv  r   rp  rw  r   r   r   descriptor_atomic_andt  
   " z$TritonSemantic.descriptor_atomic_andc                 C  r  rr  )rn  r7   r,   rZ   r-   r\   r[   rj  r   rt  ORr#   r$   rv  r   rp  rw  r   r   r   descriptor_atomic_or{  r  z#TritonSemantic.descriptor_atomic_orc                 C  r  rr  )rn  r7   r,   rZ   r-   r\   r[   rj  r   rt  XORr#   r$   rv  r   rp  rw  r   r   r   descriptor_atomic_xor  r  z$TritonSemantic.descriptor_atomic_xorc           
      C  sF  t |tjsJ |dksJ d|dksJ dt|jdks'J d|j |jd dks6J d|j t|jdksEJ d	|j |jd d
ksTJ d|j |j}d|j d
 }|jd |ksuJ d| d| d|jd  t|j|jd |jd g}| j	|fddd }| j
|j|j||| j
}	| |	|S )N z#cache modifier is not supported yetz$eviction policy is not supported yetr   descriptor must be 2D, but got r   r   *descriptor block must have 1 row, but got x offsets must be 1D, but got    z5descriptor gather must have at least 8 rows, but got r   zdescriptor gather of  must have at least  columns, but got Frg  )rW   r,   ri  r   rO  r   r7   r  r   rj  r$   Zcreate_descriptor_gatherr   r   r#   )
r   rc  	x_offsetsy_offsetr,  r3  r7   min_colsrb   rc   r   r   r   descriptor_gather  s(   z TritonSemantic.descriptor_gatherc                 C  s  t |tjsJ t|jdksJ d|j |jd dks&J d|j t|jdks5J d|j |jd dksDJ d|j |j}d	|j d }|jd |kseJ d
| d| d|jd  | j	|fddd }| j
|j|j|j| | d tjS )Nr   r  r   r   r  r  r  z6descriptor scatter must have at least 8 rows, but got r   zdescriptor scatter of r  r  Frg  )rW   r,   ri  r   rO  r   Zshapaer7   r  rj  r$   Zcreate_descriptor_scatterr   r#   rp  )r   rc  rA   r  r  r7   r  r   r   r   descriptor_scatter  s"   z!TritonSemantic.descriptor_scatterc           	   	   C  s   |d urt d|jj }|j s| ||}|j s"J d||j ks7J d| d|j  d|jjj|jjksPJ d|jjj d|jj d|jjj}|tjks^J d| ||}| 	||}| 
| j|j|j|||tjS )	NrR  z-Value argument must be block type or a scalarzBlock shape(z) and value shape(z
) mismatchzBlock element type(z) and value element type(rS  )r+   rb   rT  r  r   r
  r,   rY   rQ  rs   r#   r$   Zcreate_tensor_pointer_storer   rp  )	r   rU  valrV  rN  r-  r4  rO  rX  r   r   r   _store_block_pointer  s"   
2
z#TritonSemantic._store_block_pointerc           	   	   C  s:  |j j std|j   d|rtd|j  s0|j  r%td|r0|j  r0td|j  rK| ||j  }|d urK| ||j  }|j j}|j}|t	j
krgt	j}t	||j}| ||}| ||}|d u r| | j|j|j||t	jS |j j std| | j|j|j|j||t	jS )NrZ  z in `tl.store`z`boundary_check` argument is not supported for storing a tensor of pointers or storing a scalar. Because the compiler does not know the boundary; please use block pointers (defined by `make_block_ptr`) insteadzFValue argument cannot be block type if pointer argument is not a blockr[  "Mask must have boolean scalar type)rb   ro   rf   r+   r   r   r
  r  rT  r,   rY   r\  r]  r^  rs   r#   r$   Zcreate_storer   rp  r!  Zcreate_masked_store)	r   rU  r  rV  rN  r-  r4  r_  rX  r   r   r   _store_legacy  s6   



 zTritonSemantic._store_legacyr  c           	      C  sp   |  |}| |}|j s|jj rtd|j r.|jj r.| 	||||||S | 
||||||S )N"Cannot store to a constant pointer)r1  r5  rb   is_constro   r+   rf   rT  r   r  r  )	r   rU  r  rV  rN  r,  r3  r-  r4  r   r   r   store  s   

zTritonSemantic.storecmprA  rD  c              	   C  sT   |  |}| |}|jjj}|jdvrtd| | j	|j
|j
|j
|||jS )N)   r   r{   z9atomic_cas only supports elements with width {16, 32, 64})rB  rE  rb   ro   rT  r  r+   r#   r$   Zcreate_atomic_casr   )r   rU  r  r  rA  rD  rT  r   r   r   
atomic_cas  s   



$zTritonSemantic.atomic_casop#Tuple[TensorTy, TensorTy, TensorTy]c                 C  sj  |j j std|j   |j  s|j j rtd|j jj}|tju r4|dkr4td| d |tj	u rE|dkrEtd| d |tj
tjfv sR|jdk r^td| d t| |j  r}|d urp| ||j  }|d ur}| ||j  }| ||j jj}|d u r| jd	}tj}|j  r|j tj}| j|| j|}| ||}|||fS )
Nz)Pointer argument of store instruction is r  r   Zatomic_z does not support fp16z does not support bf16r  z does not support T)rb   ro   rf   r+   r   r  rT  r,   rB   rC   Zint16uint16r  r  r   r
  r  rs   r$   rX   rY   r   r   r   r#   )r   rU  r  rV  r  rT  Zmask_irZmask_tyr   r   r   atom_red_typechecking_impl  s2   



z)TritonSemantic.atom_red_typechecking_implc                 C  s@   |j j}tj|dd}| ||}| ||d }| |tjS )NF)r$  signedr   )r7   r  r,   Zget_int_dtyper   r   rs   rY   )r   rc   r$  ZidtypeZixZsignbitr   r   r   _signbit6  s
   zTritonSemantic._signbitc                 C  s  |  |||d\}}}| |}| |}|jj}| rK| r6| | j	t
jj|j|j|j|||jS | | j	t
jj|j|j|j|||jS |tjtjhvrZtd| |tjkrbtjntj}| ||}| |t|d}	|tjkr}tjntj}
| ||
}| |t|
d}| |}| |}| | j	t
jj|	j|j| ||j|||j}| | j	t
jj|j|j| ||j|||j}| |||}| ||S )Nr   z#atomic_max not supported for dtype r   )r  rB  rE  rb   ro   rJ   r   r#   r$   create_atomic_rmwr   	ATOMIC_OPr  r   UMAXr,   rD   rE   r8   r-   r[   r   r]  rZ   r\   r  r   r~   UMINwherer   rU  r  rV  rA  rD  sca_tyZi_typeZi_valZi_ptrZui_typeZui_valZui_ptrnegposZpos_retZneg_retr   r   r   r   
atomic_max=  L   



zTritonSemantic.atomic_maxc                 C  s  |  |||d\}}}| |}| |}|jj}| rK| r6| | j	t
jj|j|j|j|||jS | | j	t
jj|j|j|j|||jS |tjtjhvrZtd| |tjkrbtjntj}| ||}| |t|d}	|tjkr}tjntj}
| ||
}| |t|
d}| |}| |}| | j	t
jj|	j|j| ||j|||j}| | j	t
jj|j|j| ||j|||j}| |||}| ||S )Nr   z#atomic_min not supported for dtype r   )r  rB  rE  rb   ro   rJ   r   r#   r$   r  r   r  r  r   r  r,   rD   rE   r8   r-   r[   r   r]  rZ   r\   r  r   r~   r  r  r  r   r   r   
atomic_minc  r  zTritonSemantic.atomic_minc              
   C  sp   |  |||d\}}}| |}| |}|jj}| r!tjjntjj	}| 
| j||j|j|j|||jS )Nr   )r  rB  rE  rb   ro   rg   r   r  ZFADDru  r#   r$   r  r   )r   rU  r  rV  rA  rD  r  r  r   r   r   
atomic_add  s   

zTritonSemantic.atomic_addc              
   C  T   |  |||d\}}}| |}| |}| | jtjj|j	|j	|j	|||j
S )Nand)r  rB  rE  r#   r$   r  r   r  r  r   rb   r   rU  r  rV  rA  rD  r   r   r   
atomic_and     

"zTritonSemantic.atomic_andc              
   C  r  )Nor)r  rB  rE  r#   r$   r  r   r  r  r   rb   r  r   r   r   	atomic_or  r  zTritonSemantic.atomic_orc              
   C  r  )Nxor)r  rB  rE  r#   r$   r  r   r  r  r   rb   r  r   r   r   
atomic_xor  r  zTritonSemantic.atomic_xorc              
   C  r  )NZxchg)r  rB  rE  r#   r$   r  r   r  ZXCHGr   rb   r  r   r   r   atomic_xchg  s   

zTritonSemantic.atomic_xchgc                 C  sL   |  | jjjv sJ d| jjj d| | }|dkr d}ttj|S )Nzinput_precision must be one of . Got ZTF32X3ZTF32x3)lowerr$   r|   Zallowed_dot_input_precisionsupperr   r   ZINPUT_PRECISION)r   input_precisionr   r   r   _str_to_dot_input_precision  s   z*TritonSemantic._str_to_dot_input_precisionaccr  max_num_imprecise_acc	out_dtypec              
   C  s  |j  r
|j  sJ |j r|j rn@|jtjtjtjtjtj	fv s.J d|j |jtjtjtjtjtj	fv sEJ d|j |j|jksWJ d|j d|j |j
 sa|j
 r{d| jjjv rmtd | |tj}| |tj}|d u r| jjj}| |}t|j}t|j}||  krdksn ||  krdksn J d	|j d
|j d|jd j|jd jksJ d|j d|j d|jd j d|jd j d	| jjdd usJ d| jjd |j |j }	|jd j|	d kr|jd j|	d kr|jd j|	d ks)J d|	d  d|	d  d|	d  |j j rF|j jtjks<J d| jd}
tj}n4| rOtd|j j s]|j j rg| jd}
tj	}n|  rr| j!dn| jd}
|}|j jd }|j jd }|j jd }|dkr|j jd nd }t"||r|||gn||g}|d u r| j#|$| j|
}n|j%}|j |ksJ |d u r|j r|j r| jjj&}qd}n|j r|j r||krtd| d| d| '| j(|j%|j%||||S )NzUnsupported lhs dtype zUnsupported rhs dtype z&Both operands must be same dtype. Got r   Zfp8e4b15zthe use of fp8e4b15 is deprecated on Hopper and later architectures and can cause significant slow down. It will be removed in a future triton releaser      +Both inputs must be either 2D or 3D; (lhs: 	 vs rhs: r   r   zFirst input shape (z) and second input shape z= are not compatible for matmul (second index of first shape (z0) must be equal to first index of second shape (min_dot_sizez2target doesn't provide lower shape bounds for dot.r   r   zInput shapes should have M >= z, N >= z
 and K >= zonly int8 supported!zhout_dtype=bfloat16 is unsupported. Please use out_dtype=float32/float16 and cast with `.to(tl.bfloat16)`zmax_num_imprecise_acc (z) must be <= K ())rb   r   r7   rI   r,   r\  uint8rB   rC   rD   r  r$   r|   Z!deprecated_fp8_dot_operand_dtypeswarningswarnrs   Zdefault_dot_input_precisionr  r   r   rA   r  r   ro   rJ   	get_int32r-   rH   r+   rF   get_fp32rG   Zget_fp16r   r   r   r   Zmax_num_imprecise_acc_defaultr#   Z
create_dot)r   ri   rk   r  r  r  r  lhs_rankrhs_rankr  r   Zret_scalar_tyMNKBr   
acc_handler   r   r   dot  s   

$



F0, 
" 

"zTritonSemantic.dotfloat_formatc                 C  s.   t tj| d }|d u rtd| d|S )NzInvalid float format: rR   )r   r   ZScaleDotElemTypeTYr  r+   )r   r  Zty_enumr   r   r   _str_to_fp_type	  s   zTritonSemantic._str_to_fp_typec                 C  s   t jt jt jt jd|}|du r-|dksJ d| |jt jks+J d|j |S |j|kr4|S t jt jt jt jd| }|j|ksQJ d| d|j | 	||S )z
        If float_format is subbyte, make sure it's packed as uint8 and return it.
        Otherwise, return a tensor (perhaps bitcasting) of the specified float format.
        )e5m2e4m3bf16fp16Ne2m1z)Internal Error: Unexpected float format: z)e2m1 format must be packed as uint8. Got zUnexpected dtype for r  )
r,   Zfloat8e5Z
float8e4nvrC   rB   r   r7   r  r  r   )r   r  r  Z	triton_tyZunsigned_tyr   r   r   _bitcast_to_fp_type  s   
 z"TritonSemantic._bitcast_to_fp_type	lhs_scale
lhs_format	rhs_scale
rhs_formatTensorTy | None	fast_math
lhs_k_pack
rhs_k_packc           !      C  s  |j  r
|j  sJ t|j}t|j}||  kr dks9n ||  kr+dks9n J d|j d|j d|j}|j}| |}| |}h d}||v sXJ d| ||v scJ d| |d u pqt|tjoq|jd u }|d u pt|tjo|jd u }| 	||}| 	||}|	s|d	ksJ d
|
s|d	ksJ d
|j jdd  \}}|j jdd  \}}|d	krdnd}|d	krdnd}|	r|| n|}|
r|| n|}||ksJ d|j d|j d|dkr|j jd nd }|	s|| }|
s|| }t
||r	|||gn||g}| jd}|d u r%| j|| j|}n|j}|j |ks0J |r5d n|j}|r=d n|j} | | j|j| ||j||||	|
|
|S )Nr   r  r  r  r   >   r  r  r  r  r  zNYI: lhs_format zNYI: rhs_format r  zBonly mxfp4 inputs can be packed along a dimension different than Kr  r   zCReduction dimension should pack the same number of elements; (lhs: r   )rb   r   r   r   rA   r  rW   r,   r`   r  r   r$   r  r   r   r   r#   Zcreate_dot_scaled)!r   ri   r  r  rk   r  r  r  r  r  r  r  r  r  Zlhs_format_enumZrhs_format_enumZallowed_formatsZrhs_scale_is_noneZlhs_scale_is_noner  ZK_LHSZK_RHSr  ZPACKED_AZPACKED_BZPACKED_A_DIMZPACKED_B_DIMr  r   r   r  Zrhs_scale_handleZlhs_scale_handler   r   r   
dot_scaled!  sV   

F

" 

zTritonSemantic.dot_scaled	conditionc                 C  s   |j tjkrtd|j   | |tj}| ||dd\}}|j r6| 	||\}}| 	||\}}n| 	||\}}|j}| 
| j|j|j|j|S )Nzgtl.where with a non-boolean condition is deprecated and will error out in a future triton release. Got T)r7   r,   rY   r  r  rs   rx   rb   r   rt   r#   r$   Zcreate_selectr   )r   r  rc   r   r  r   r   r   r   r  U  s   

zTritonSemantic.wherec                 C  s"   |r	t ||}n|}| ||S r%   )r,   r   r#   )r   rc   rK   r  Zres_tyr   r   r   wrap_tensori  s   zTritonSemantic.wrap_tensorinputsSequence[TensorTy]Tuple[TensorTy, ...]c                   s    d u rt fddD d d jjt} |k s'J d| d fddtD tfddD sAJ d	jd
d D  |  sWJ t fddt	tD S )Nc                 3  s&    | ]} j ||jjgd dV  qdS )Tr   N)r   r   rA   r   tr   r   r   r   s  s   $ z+TritonSemantic.reduction.<locals>.<genexpr>r   z&reduction axis must be < inputs rank (r   c                   s   g | ]
\}}| kr|qS r   r   )r   r  r   )r&   r   r   r   y  s    z,TritonSemantic.reduction.<locals>.<listcomp>c                 3  s    | ]	}|j j kV  qd S r%   )rb   r   r  r   r   r   r   z  s    z-all reduction inputs must have the same shapec                 S     g | ]}|j qS r   r   r  r   r   r   r   |      c                 3  s,    | ]} | | jjV  qd S r%   r  Z
get_resultrb   ro   r   r  )r  	reduce_opr  r   r   r   r     s    
)
tuplerb   r   r   r  allr$   Zcreate_reduceverifyr   )r   r  r&   region_builder_fnrankr   )r&   r  r  r  r   r   r   	reductionq  s   
zTritonSemantic.reductionreversec                   s    d j jt}| |  kr|k s!n J d| d| d|dk r)||7 } D ]}|j jks7J dq+jdd  D |||  sOJ t fdd	tt D S )
Nr   z
scan axis z must be < inputs rank (r   z(all scan inputs must have the same shapec                 S  r  r   r  r  r   r   r   r     r  z3TritonSemantic.associative_scan.<locals>.<listcomp>c                 3  s,    | ]} | | jjV  qd S r%   r  r  r  Zscan_opr   r   r   r   r     s   * z2TritonSemantic.associative_scan.<locals>.<genexpr>)rb   r   r   r$   Zcreate_scanr  r  r   )r   r  r&   r  r   r  r  r   r  r   associative_scan  s   .$zTritonSemantic.associative_scansrcindexc                 C  s   |j  s	J dt|jj}t|jj|ksJ d| |  kr&|k s2n J d| d| d|dk r:||7 }t|D ]}||krEq>|jj| |jj| ksYJ d| dq>| j|j|j|}| 	||jj
|jjS )	Nzindex must be an integer tensorz0source and index tensors must have the same rankzgather axis z must be < source rank (r   r   z
index dim z( must match the corresponding source dim)r7   rJ   r   rb   r   r   r$   Zcreate_gatherr   r  ro   )r   r  r  r&   r  r   gatherr   r   r   r    s   .*zTritonSemantic.gathernum_binsc                 C  s~   t |jdksJ d|j sJ d|d ur,| ||j}|jj s)td|j	}| 
| j|j	||ttj|gS )Nr   z histogram only supports 1D inputz%histogram only supports integer inputr  )r   r   r7   rJ   r
  rb   ro   r!  r+   r   r#   r$   Zcreate_histogramr,   r   r-   )r   r   r  rV  r   r   r   	histogram  s   zTritonSemantic.histogramr  c                 C  s@   t dt|jt|krtd|jdt||j  |S )Nr   zAShape of input to multiple_of does not match the length of valuesztt.divisibility)	r   r   r   r+   r   set_attrr   	make_attrget_contextr   rc   r  r   r   r   multiple_of  s   zTritonSemantic.multiple_ofc                 C  :   t |jt |krtd|jdt||j  |S )NzDShape of input to max_contiguous does not match the length of valuesztt.contiguityr   r   r+   r   r  r   r	  r
  r  r   r   r   max_contiguous     zTritonSemantic.max_contiguousc                 C  r  )NzCShape of input to max_constancy does not match the length of valuesztt.constancyr  r  r   r   r   max_constancy  r  zTritonSemantic.max_constancyc                 C  s   |  | j tjS r%   )r#   r$   Zcreate_barrierr,   rp  r  r   r   r   debug_barrier  r   zTritonSemantic.debug_barrierprefixargsList[TensorTy]hexc                 C  s   | ds|r|d7 }| ds|r|d d d }t|dkr)|ds)d| }dd |D }dd |D }| | j||||tjS )N r  r   r   c                 S  r  r   r  r   argr   r   r   r     r  z/TritonSemantic.device_print.<locals>.<listcomp>c                 S  s   g | ]}|j  qS r   )r7   r   r  r   r   r   r     r   )endswithr   
startswithr#   r$   Zcreate_printr,   rp  )r   r  r  r  new_args	is_signedr   r   r   device_print  s   zTritonSemantic.device_printr   r   c                 C  s(   | j jjsd S | | j |j|tjS r%   )r$   r|   debugr#   Zcreate_assertr   r,   rp  )r   r   r   r   r   r   r     s   
zTritonSemantic.device_assertc                 C  s   |  | j|jtjS r%   )r#   r$   Zcreate_assumer   r,   rp  )r   r   r   r   r   assume  s   zTritonSemantic.assumec                 C  s>  t |tr
t|}t |tjrWt |jtr| j|jS |r;d|j  kr*dk s4n J d|j d| j|jS d|j  krFdk sPn J d|j d| j	|jS t |tj
r|jjdksgJ d	|j spJ d
|jtjkr|r| j|j| j |j S |jtjkr|sJ d|jS J dt| )NrP   rQ   z@Block pointers only support 64 bit `shape/strides`, got a value z which is out of the rangerN   rO   zFBlock pointers only support 32 bit `offsets/block_shape`, got a value r   z*Expected a scalar in shape/strides/offsetsz8Expected an integer scalar type in shape/strides/offsetsFzzBlock pointers only support 32 bit `offsets/block_shape`, add a `.to(tl.int32)` or use regular indexing for 64 bit supportz3Unsupported element type in shape/strides/offsets: )rW   r'   r,   r`   rA   r;   r$   rX   r"  r  r#   r   r7   rJ   r[   r   r   Zget_int64_tyr   r-   rb   )r   rJ  rh  r   r   r   _convert_elem_to_ir_value  s2   



z(TritonSemantic._convert_elem_to_ir_valuec                   s,   t |dr fdd|D S | gS )NrF  c                   s   g | ]} | qS r   )r!  rI  rh  r   r   r   r     s    z8TritonSemantic._convert_to_ir_values.<locals>.<listcomp>)rL  r!  )r   	list_likerh  r   r"  r   rj    s   
z$TritonSemantic._convert_to_ir_valuesbasec              	     s:  |  |}|  |}| j |dd}|j r|jj r td|jjtjkr4| |t	tj
|jj}t ds< g dd  D  tdd  D sPJ d	t|dsX|g}d
d |D }t|ttt|ksoJ dt fdd||||fD sJ d| j|j||| |}| |t	t|jj S )NFrg  zMExpected `base` to be a pointer type (but not a block pointer type or others)rF  c                 S  rG  r   rH  rI  r   r   r   r     rK  z1TritonSemantic.make_block_ptr.<locals>.<listcomp>c                 s  s2    | ]}t |tod |  kodk n  V  qdS )rN   rO   N)rW   r'   rI  r   r   r   r     s   0 z0TritonSemantic.make_block_ptr.<locals>.<genexpr>zGExpected a list of constant integers (`int32_t` range) in `block_shape`c                 S  rG  r   rH  rI  r   r   r   r   #  rK  z<Expected a permutation of (0, 1, ..., len(order)-1) in orderc                 3  s     | ]}t  t |kV  qd S r%   )r   )r   r#  rO  r   r   r   '  s    zBExpected shape/strides/offsets/block_shape to have the same length)rj  rb   rf   rT  r   r+   r,   rY   rs   r]  r\  r^  rL  r  r  r  r   r   r$   Zcreate_make_block_ptrr   r#   r   )r   r$  r   stridesrk  rO  orderr   r   r%  r   make_block_ptr
  s,   



  zTritonSemantic.make_block_ptrc                 C  s(   | j |dd}| | j|j||jS ro  )rj  r#   r$   Zcreate_advancer   rb   )r   r$  rk  r   r   r   advance0  s   zTritonSemantic.advancer&  rO  List[tl.constexpr]tl.tensor_descriptorc                   s  t |}d|  krdksn td| dt ||kr)td| dt | t ||kr;td| dt | t|jtjsDJ |jjjd	 }t|d
 }|| dk rhtd| d| d||  dt|d
 |d
< |d
 dkrtd|d
   fdd|D } fdd|D }t	|}t|j
tjsJ t|j
j|}|j}	|j
j }
 j|	dd |D dd |D ||
}t||||S )Nr      z Expected 1 <= ndim <= 5 but got z dimensionsz	Expected z strides but got zExpected block_shape to have z dimensions but got r  r   r  zRDescriptor block shape must have at least 16 bytes in the last dimension, but got z * z = z bytesz-Tensor descriptor last dim must be 1 but got c                      g | ]	}  |tjqS r   )r   r,   r-   r   r  r   r   r   Q      z9TritonSemantic.make_tensor_descriptor.<locals>.<listcomp>c                   r-  r   )r   r,   r[   r   r  r   r   r   R  r.  c                 S  r  r   r  r   r   r   r   r   r   \  r  c                 S  r  r   r  r/  r   r   r   r   ]  r  )r   r+   rW   r7   r,   r]  rT  r  r   Z_unwrap_shaperb   r   r   r   r$   Zcreate_make_tensor_descriptorZtensor_descriptor)r   r$  r   r&  rO  rl  Z	elem_sizeZcontig_dim_sizerb   Zbase_handleZis_signed_intr   r   r  r   make_tensor_descriptor7  s8   
z%TritonSemantic.make_tensor_descriptor)r&   r'   r(   r   )r2   r3   r4   r3   r(   r3   )r2   r3   r:   r;   r4   r3   r<   r;   r=   r;   r(   r3   )T)rM   r;   )r   r3   r   r3   rd   r;   r(   re   )FFTF)ri   rj   rk   rj   r(   rl   )ri   r   rk   r   ry   rz   )r   rj   r   rj   r}   r;   r(   r   )r   rj   r   rj   r(   r   )r   rj   r   rj   r   r;   r(   r   )rc   r   r   r   r   r   )rc   r   r   r   r   r   r   r   )r   r   r   r   r(   rl   )r   r   r   r   r(   r   )r   r   )r   r   r(   r   )r   r   r(   r   )r   r'   r   r'   r   r   r(   r   )r7   r3   r(   r   )r   r   r7   r3   r(   r   )rA   r   r   r   r(   r   )r   r   r   r   r   r;   r(   r   )r   r   r&   r'   r(   r   )ri   r   rk   r   r   r;   r(   r   )r   r   r   r   r(   r   )r   r   r(   rl   )r   r   r   r   r(   r   )r   r   r   r   r(   r   )ri   r   rk   r   r(   r   )r  r  )r   r   r  r3   r(   r   r%   )r   r   r  r3   r  r  r(   r   )rU  r   rV  ra  r   ra  rN  r   r:  r  r,  r  r3  r  rW  r;   r(   r   )rc  rd  r,  r  r3  r  r(   r   )rc  rd  rA   r   r(   re   )rc  rd  rA   r   r(   r   )r,  r  r3  r  r(   r   )rA   r   r(   r   )rU  r   r  r   rV  ra  r,  r  r3  r  r(   r   )rU  r   r  r   r  r   rA  r  rD  r  r(   r   )
rU  r   r  r   rV  r   r  r  r(   r  )rc   r   r(   r   )rU  r   r  r   rV  r   rA  r  rD  r  r(   r   )ri   r   rk   r   r  r   r  r  r  r'   r  r3   r(   r   )r  r  )r  r   r  r  )ri   r   r  r   r  r  rk   r   r  ra  r  r  r  r  r  r;   r  r;   r  r;   r  r3   r(   r   )r  r   rc   r   r   r   r(   r   )r  r  r&   r'   r(   r  )r  r  r&   r'   r   r;   r(   r  )r  r   r  r   r&   r'   r(   r   )r   r   r  r'   rV  ra  r(   r   )rc   r   r  r   r(   r   )r(   r   )r  r  r  r  r  r;   r(   r   )r   r   r   r  r(   r   )r$  r   r(   r   )
r$  r   r   r  r&  r  rO  r*  r(   r+  )yr   r   r    r,   r#   __annotations__langr   r/   r1   r9   rL   ra   rh   rx   r   r   r   r   r   r   r   r   r   r   r   r   r~   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r]   r   r   r   r   r   r   r   r   r  r
  rt   r  r   rs   r.  r1  r5  r<  rB  rE  rQ  rY  r`  rb  rm  rn  rq  rx  r~  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r   r   r!  rj  r(  r)  r0  r   r   r   r   r"      s   
 	2)%		
	8	o<,&&	N4&r"   )
__future__r   r  typingr   r   r   r   r   r   r	   rm   Ztriton.runtimer
   Z_C.libtritonr   r  r   r,   r   r   	Exceptionr   r"   r   r   r   r   <module>   s    $	