o
    hB6                  #   @   s   U d dl Z d dlZd dlmZmZ d dlZd dlmZ ejdedZi Z	e
ejjef ed< eh dZded	ed
eegef fddZedd	d;dd d ddejdejdejdeej dededed
ejfddZdee ded
efddZdejdeded
ejfd d!Zd"ejd#ejd$ed%edee d&ed
ejfd'd(Zd$ed%ed
dfd)d*Zd"ejd#ejd$ed%edee d
ejfd+d,Zed-d			d<dd d d dd.dd/d"ejd#ejd0ejd1eej d2eej d3eej d4ed5ed6ed&edee d7ed8ee d
eejejejejf fd9d:ZdS )=    N)CallableOptional)_dtype_mappings_T)boundONNX_ATEN_DECOMP_TABLE>         
      op_typeopset_versionreturnc                    s   dt dt f fdd}|S )zDDecorator to register an ONNX operator with a custom implementation.funcr   c                    sP   d }t jjd  d| dd| }| tttt jj |< ||  |S )NZopsetzonnx::. )Zmutates_args)torchZlibraryZ	custom_opr   getattropsZonnxZregister_fake)r   overloadZtorch_opr   r   r   Z/home/www/facesmatcher.com/frenv_anti/lib/python3.10/site-packages/torch/onnx/ops/_impl.py	decorator   s   

z_onnx_op.<locals>.decorator)r   )r   r   r   r   r   r   _onnx_op   s   r   ZRotaryEmbedding   F)interleaved	num_headsrotary_embedding_dimx	cos_cache	sin_cacheposition_idsr   r   r   c                   sN   j d } j d }t j dkr2 j d }	t|dk fdd |	| }
||||
g}t | tt j dkdd   j d }
|dkrJ|
} d	d	d	d	d	d	d	|f } d	d	d	d	d	d	|d	f }|d }|d	ur{|| }|| }n|}|}|d	d	d	d	d	|f }|d	d	d	d	d	|f }t|d}t|d}|r|d	d	d	d	d	d	dd	df }|d	d	d	d	d	d	dd	df }n
tj|dd
d\}}|| ||  }|| ||  }|rt|d
}t|d
}tj||fd
d}t||j }n	tj||fd
d}tj||fd
d}t j dkr%t| j }|S )z_RotaryEmbedding-23 https://onnx.ai/onnx/operators/onnx__RotaryEmbedding.html#rotaryembedding-23r   r	         c                      s   d j  S )NzKnum_heads must be provided for 3D inputs. Received input tensor with shape )shaper   r   r   r   <lambda>>   s    z%rotary_embedding_23.<locals>.<lambda>   c                   S      dS )Nzx should be a 4D tensor by nowr   r   r   r   r   r&   C       Ndim)r$   lenr   _checkZreshapeZ	unsqueezechunkcat)r   r   r    r!   r   r   r   
batch_sizesequence_lengthhidden_size	head_sizeZ	new_shapeZx_rotateZx_not_rotateZrotary_embedding_dim_halfcossinx1Zx2realimagZx_rotate_concatoutputr   r%   r   rotary_embedding_23+   sn   




  "$r;   scaler4   c                 C   s   | dur| S dt | S )z/Get the scale factor for attention computation.Ng      ?)mathsqrt)r<   r4   r   r   r   _get_scale_factor   s   r?   tensorr1   c                 C   s:   | j d | j d }}|| }| ||||dd S )z1Reshape 3D tensor to 4D for multi-head attention.r	   r#   )r$   view	transpose
contiguous)r@   r1   r   r2   r3   r4   r   r   r   _reshape_3d_to_4d   s   rD   QKcurrent_q_num_headscurrent_kv_num_headsqk_matmul_output_modec              	   C   s2   |dkrt | ||||S tt| |ddS )z1Get QK output tensor based on the specified mode.r   r*   )_compute_qk_output_for_mode_0r   Z
zeros_likematmulrB   )rE   rF   rG   rH   r<   rI   r   r   r   _get_qk_output_for_aten_spda   s
   	
rM   c                    s"   t   dk fdd dS )z-Validate Group Query Attention configuration.r   c                      s   d d  dS )Nzq_num_heads (z%) must be divisible by kv_num_heads (z	) for GQAr   r   rH   rG   r   r   r&      s    z-_validate_gqa_configuration.<locals>.<lambda>N)r   r.   )rG   rH   r   rN   r   _validate_gqa_configuration   s   
rO   c                 C   s`   |}||kr|| }|j |dd}t|| jd }t|}| | }	|| }
t|	|
ddS )zDHelper function to compute QK output for qk_matmul_output_mode == 0.r	   r+   r"   rJ   r*   )repeat_interleaver?   r$   r=   r>   r   rL   rB   )rE   rF   rG   rH   r<   ZK_for_qkrepeat_factorscale_factor
sqrt_scaleQ_scaledK_scaledr   r   r   rK      s   	
rK   Z	Attention        )	is_causalkv_num_headsq_num_headsrI   r<   softcapsoftmax_precisionV	attn_maskpast_key
past_valuerW   rX   rY   rZ   r[   c          (      C   s  d\}}}t | j}| jd }t | jdkr;t|dko|dkdd  | jd }t| ||} t|||}t|||}tt | jdkoQt |jdkoQt |jdkdd  | j| }t|
|}
|d	urmtj||g|d
n| }|d	ur~tj||g|d
n| }||}}| j| }|j| }| j| }|j| }|dko|	dko|d	u o|d	u p|jtj	k}t
|| |rd	}|d	ur|jtj	kr| n|}tjjj| |||d||
t	||kd}t| ||||
|	}n||kr|| }|j||d
}|j||d
}tj||| j| jd}|r+t|d	u dd  ttj||tj	| jd}|| td}|d	urE|jtj	krA|| td}n|| }t|
| jd } t| }!| |! }"||! }#t|"|#dd}$|$}|$| }%|	dkrq|%}|dkr|t|%|  }%|	dkr|%}|d	ur|tv r|%j}&|%tj| }%tj|%dd
}'|'|&}'qtj|%dd
}'ntj|%dd
}'|	dkr|'}t|'|}|dkr|dd  !||d}||||fS )zMAttention-23 https://onnx.ai/onnx/operators/onnx__Attention.html#attention-23)r	   r#   r"   r   r"   c                   S   r(   )Nz;q_num_heads and kv_num_heads must be provided for 3D inputsr   r   r   r   r   r&      r)   zattention_23.<locals>.<lambda>r	   r'   c                   S   r(   )Nz'Q, K, and V should be 4D tensors by nowr   r   r   r   r   r&      r)   Nr+   rV   )r]   Z	dropout_prW   r<   Z
enable_gqa)dtypedevicec                   S   r(   )Nz'Cannot use both is_causal and attn_maskr   r   r   r   r   r&   @  r)   z-infrJ   r*   r#   )"r-   r$   r   r.   rD   r?   r0   cloner`   boolrO   nnZ
functionalZscaled_dot_product_attentionrM   rP   Zzerosra   ZtrilZonesZmasked_fillfloatr=   r>   rL   rB   tanh-_ATTENTION_23_ALLOWED_INTERMEDIATE_PRECISIONStor   ZONNX_DTYPE_TO_TORCH_DTYPEZsoftmaxrC   rA   )(rE   rF   r\   r]   r^   r_   rW   rX   rY   rI   r<   rZ   r[   Znum_head_dimZsequence_dimZhead_dimZinput_shape_lenr1   Zq_sequence_lengthZq_head_sizeZpresent_keyZpresent_valuerG   rH   Zkv_sequence_lengthZcan_use_sdpaZsdpa_attn_maskr:   Z	qk_outputrQ   Z	attn_biasZcausal_maskrR   rS   rT   rU   Zqk_matmul_outputZqk_with_biasZoriginal_dtypeZ
qk_softmaxr   r   r   attention_23   s   



(
















ri   )N)NNN)r=   typingr   r   r   Ztorch.onnx.opsr   TypeVarr   r   dictZ_opsZ
OpOverload__annotations__	frozensetrg   strintr   ZTensorrc   r;   re   r?   rD   rM   rO   rK   tupleri   r   r   r   r   <module>   s   
 
	U




	
