o
    h*                 !   @   sD3  U d dl Z d dlZd dlZd dlZd dlZd dlmZ d dlmZ d dl m	Z	m
Z
 d dlmZmZ d dlmZmZmZmZmZ d dlZd dlZd dlmZ d dlmZ d dlm  mZ d dlmZm Z m!Z! d dl"m#Z# d d	l$m%Z% d d
lm&Z&m'Z'm(Z(m)Z)m*Z* d dl+m,Z,m-Z-m.Z.m/Z/ d dl0m1Z2 d dl3m4Z4 ej5j6Z6g Z7e8e9 e:d< ej;j<j=Z=G dd deZ>		d~dedej?de@de@fddZAe	eAej?jBddZCe	eAej?jBdZDe	eAej?jBddZEe	eAej?jFdZGde!deHde!fdd ZIe#e=jJe/d!eDd"e!d#e!fd$d%ZJe#e=jKe/d!eDd"e!d#e!fd&d'ZKe#e=jLe/d!eDd"e!de!d(eMd)eMfd*d+ZLe#e=jNe/d!eDd,e!d-eMd.eMd/eMd0e@d1e!fd2d3ZNe#e=jOjPgd4d5 ZQe#e=jOj!gd6e!fd7d8ZRe#e=jSe/ eDd9e!de!fd:d;ZSe#e=jTe/d!eDd,e!d9e!fd<d=ZTe#e=jUe/d!d,e!d9e!d>eMd?eMfd@dAZUe#e=jVe/ eDd9e!de!fdBdCZVe#e=jWe/ eDd,e!d9e!de!fdDdEZWe#e=jXe/d!d,e!d9e!d)eMfdFdGZXe#e=jYe/d!eDd,e!d9e!dHeMdIe@fdJdKZYe#e=jZe/d!eDddMe!d9e!dNe9fdOdPZZe#e=j[eDd,e!dQe!fdRdSZ[e#e=j\e/ eDd9e!de!fdTdUZ\e#e=j]e/d!eDd,e!d9e!de!fdVdWZ]e#e=j^d9e!dXe!de!fdYdZZ^e#e=j_d,e!d9e!dXe!de`e!e!f fd[d\Z_e#e=jae/ eDd,e!d9e!d]e!d^eMd_eMd`e@dIe@de!fdadbZae#e=jbe/d!eDd,e!d9e!dce!de!fdddeZbdfe!dgeHfdhdiZcdjejdfdkdlZee#e=jfe/ eDe>jgjhfd9e!dme!dgeHde!fdndoZfe#e=jie/d!eDd,e!dQe!dme!dgeHfdpdqZie#e=jjddrdsZke#e=jle/ eDe>jgjhdtfd9e!dme!dgeHd(eMfdudvZle#e=jmjneDd,e!d9e!dme!dgeHd(eMf
dwdxZme#e=jmjoeDd,e!d9e!dme!dgeHd(eMd!e!fdydzZpe#e=jqjneDd,e!d9e!dme!dgeHd{eMf
d|d}Zqe#e=jqjreDd,e!d9e!dme!dgeHd{eMd!e!fd~dZsd,e!d9e!dme!dXee! dgeHdeHde!de!fddZte#e=jue/d!eDd,e!d9e!deHde!fddZue#e=jve/d!d,e!d9e!dme!dXee! dgeHdeHde!de!fddZve#e=jwe/d!d,e!d9e!dme!dXee! dgeHdeHde!de!fddZwe#e=jxe/ eDde>jgjhfd9e!dme!dXee! dgeHde!f
ddZxe#e=jye/d!eDde>jgjhfd,e!d9e!dme!dXee! dgeHde!fddZye#e=jze/ eDe>jgjhfdQe!dme!dgeHde!fddZze#e=j{e/d!eDe>jgjhfd,e!d9e!dme!dgeHde!f
ddZ{e#e=j|e/ ddQe!de!deMfddZ|e#e=j}e/ de!de!de!fddZ}e#e=j~e/ d,e!de8eH deHdeHdeHdeHfddZ~e#e=jj!	 			dd9e!deHdeeH deeH deHf
ddZde!deHdeeH deeH de`eHeHf f
ddZe#e=je/ 	 			ddQe!de!deHdeeH deeH deHfddZe#e=je/ d,e!de8eH deHdeHfddZe#e=je/ d,e!de8eH deHdeHdeHf
ddZd,e!d!e!dejdfddZe#e=je/d!eCd,e!de!deHdejdfddZe#e=je/ eCd,e!de!deHdejdfddZdd Ze#e=je/ dQe!de8eH de8eH de8eH de8eH de!fddZe#e=je/ eDdQe!de8eH de8eH de8eH de8eH de8eH de!fddÄZe#e=je/ d,e!de!d.eMfddƄZe#e=je/ dMe!de8eH deHdeHdeHde!fdd˄Ze#e=jjneD	dd,e!d9e!deeM de!fdd΄Ze#e=je=jjne6je=jjne6jdQe!deMdee@ fddфZe#e=je/ddӃdQe!deMdee@ fddՄZe#e=je/ de!deHde@fdd؄Ze#e=je/ddٍde!deHde@fddۄZe#e=je/ 			ddXe!de!deHde@de@de!fddZe#e=je/ d,e!de!deHdeHde@f
ddZde8eH fddZde8e! deHdeHde8e! fddZde8e! fddZde8e! deHfddZde8e! deHdeHfddZe#e=jjne=jjrg	dde8e! deHdeHdee! de!f
ddZe#e=jjne=jjrg	 	dd9e!de8eH deHdee8e!  dee8e!  f
ddZe#e=jj!ddQe!deHdeHde`e!df fddZe#e=jjn	 ddQe!de8eH deHde`e!df fddZe#e=jj!dd9e!deHdeHde`e!df fddZe=jje6j	 dd9e!d e!deHde`e!df fddZe#e=je/ddٍeDdd9e!de!de!d(eHd-eHf
ddZe#e=je/ eD			dd9e!de!de!d(eHd-eHde@fdd	Ze#e=je/ddٍeDdd9e!de!d
e!d(eHd-eHf
ddZe#e=jjneDd,e!dQe!de!de!dee! deHdeHdeHdeHde8e@ de`ee! ee! ee! f fddZe#e=jjrd,e!dQe!de!de!dee! deHdeHdeHdeHde8e@ dej!dej!dej!de`ee! ee! ee! f fddZdee! dee! fddZe#e=jjnde!dQe!de8eH de!de!dXee! dee! de8e@ de`ee! ee! ee! f fdd Ze#e=jjrde!dQe!de8eH de!de!dXee! dee! de8e@ dej!dej!dej!de`ee! ee! ee! f fd!d"ZdQe!dXee! dee! d#ee! d$ee! d`e@d%eMdeMd&e@de`e!e!e!ee! ee! f fd'd(Ze#e=je/dd)d*dQe!dXee! dee! d#ee! d$ee! d`e@d%eMdeMde`e!e!e!f fd+d,Ze=jjne6je=jjne6jdQe!dXee! dee! d#ee! d$ee! d`e@d%eMdeMde`e!e!e!f fd-d.Ze=jjne6jdde8e! fd/d0Ze#e=jjndQe!dXee! dee! d#e!d$e!d%eMdeMde`e!e!e!f fd1d2Ze#e=jjndQe!dXee! dee! d#e!d$e!d`e@d%eMdeMde`e!e!e!f fd3d4Ze#e=jjdQe!dXee! dee! d`e@d%eMdeMde`e!e!e!f fd5d6Ze#e=jjndQe!dXee! dee! d#e!d$e!d`e@d%eMdeMde`e!e!e!e!e!f fd7d8ZdQe!dXee! dee! d#e!d$e!deMd`e@de!fd9d:Ze#e=jjndQe!dXee! dee! d#e!d$e!d%eMdeMde`e!e!e!e!f fd;d<Ze#e=jjndQe!dXee! dee! d#e!d$e!d%eMdeMde`e!e!e!e!e!e!f fd=d>Ze#e=jjndQe!dXee! dee! d#e!d$e!d%eMdeMde`e!e!e!e!f fd?d@Ze#e=je/ddӃeDddAdBZe#e=je/ dddddddCdee!e'f djeejd dDeej dEe@dFe@dGeej fdHdIZe#e=je=je=jge/ dJdK Ze=jjne6je#e=jÃe/ddӐddLdQe!dXe!dee! d#ee! d$ee! d`e@dMeMdNeMfdOdPZÐdQdR Ze#e=jjnde!dQe!dXee! d#ee! d$ee! d)ee! d*ee! de@deMde8e@ dSe!de`e!ee! ee! f fdTdUZe#e=jjnde!dQe!dXee! d#ee! d$ee! d)ee! d*ee! de@deMde8e@ de`e!ee! ee! f fdVdWZe#e=jjrde!dQe!dXee! d#ee! d$ee! d)ee! d*ee! de@deMde8e@ dej!dej!dej!de`e!ee! ee! f fdXdYZe#e=jȃe/ddӐddQe!d,e!dXe!d#ee! d$ee! d)ee! dZee! dNeMfd[d\Ze#e=jɃe/ddӐddQe!d,e!dXe!d#ee! d$ee! d)ee! dZee! dNeMd]e!fd^d_Ze#e=jʃe/ eDdQe!de`eHeHf fd`daZd9e)de)de8eH deHfdbdcZe#e=j̓e/ d9e)de)de8eH fdddeZe#e=j΃e/ dQe)de)de8eH de8eH de8eH f
dfdgZe#e=jσddhde)deHde)die)d-e'f
djdkZe#e=jЃe/ ddhde)deHde)die)d-e'f
dldmZddhde)deHde)die)dne@d-e'fdodpZe#e=jjne=jjne6jddrdsZe#e=jӃde)deHde)die)fdtduZe#e=jԃe/ de)deHde)die)fdvdwZde)deHde)die)dne@f
dxdyZe#e=jփe/ddceDd9e!de`e!e!f fdzd{Ze#e=j׃e/ 	q	t	dde!d|ee@eHeMf d}ee@eHeMf d~eej fddZe#e=jكdddZِdd Zڐdd Ze#e=jj݃e#e=jj݃e#e=jj݃e=jjݠe6je=jjݠe6je=jjݠe6je=jjݠe6je=jjݠe6je=jjݠe6jdQe!dee8eH  dee8eM  de!fddZe#e=jj݃e#e=jj݃e#e=jj݃e=jjݠe6je=jjݠe6je=jjݠe6je=jjݠe6je=jjݠe6je=jjݠe6jdQe!dee8eH  dee8eM  de!fddZdddZe#e=jjne=jjrge=jjne6je=jjne6je/ddd	ddQe!de8eH deeM de!fddZe#e=jjne=jjrge=jjne6je=jjne6je/ddd	ddQe!de8eH deeM de!fddZe#e=jjne=jjrge=jjne6je=jjne6je/ddd		ddQe!de8eH deeM deeM de!f
ddZe#e=jjne=jjrge=jjne6je=jjne6je/ddd		ddQe!de8eH deeM deeM de!f
ddZe#e=jjne=jjrge=jjne6je=jjne6je/ddd			ddQe!de8eH deeM deeM deeM de!fddZe#e=jjne=jjrge=jjne6je=jjne6je/ddd			ddQe!de8eH deeM deeM deeM de!fddZeD	ddQe!de8eH de8eeM  de@de!f
ddZdd Zdd Zdd Zdd Z	dddZdd Zdd ZdddZdddZdd Ze#e=jje=jje6je=jje6jdd Ze#e=jje=jje6je=jje6jdd Ze#e=jje=jje6je=jje6jdd Ze#e=jje=jje6je=jje6jdd Zdd ZdddZdddÄZdĐdń Ze#e=jje=jje6je=jje6jdƐdǄ Ze#e=jje=jje6je=jje6jdȐdɄ Z dʐd˄ Zd̐d̈́ Ze#e=jje=jje6je=jje6jdΐdτ Ze#e=jje=jje6je=jje6jdАdф Ze#e=jj݃e=jjݠe6je=jjݠe6jdҐdӄ Ze#e=jj݃e=jjݠe6je=jjݠe6jdԐdՄ Z	e#e=j
j݃e#e=jj݃e=jjݠe6je=jjݠe6je=j
jݠe6je=j
jݠe6je=jjݠe6je=jjݠe6jd֐dׄ Ze#e=jjne=jjrge/ 	ddQe!de8eH de@deeM de!f
dِdڄZe#e=j
jne=j
jrge=j
jne6je/ 		ddQe!de8eH de@deeM deeM de!fdېd܄Z
e#e=jjne=jjrge/ 			ddQe!de8eH de@deeM deeM deeM de!fdݐdބZddߐdZdd Zdee! dee! de!de!fddZde*de!fddZeDdQe!de8eH de@de8eeM  de!f
ddZe#e=jjnde!de!de@fddZe#e=je=jge/ dd Ze#e=jgdd Ze#e=jgdddZe#e=jgdd Ze#e=jgdd Zd9e!dme!dXee! dgeHdeHde`e!e!f fddZe#e=je/ddd9e!dme!dXee! dgeHdeHde`e!e!f fddZe#e=je/ddd9e!dme!dXee! dgeHdeHde`e!e!f fddZde!deMde!fd dZde!deMde!fddZde!de*fddZde*de!de!fd	d
Z dee! de!fddZ!deHde@djejddDejfddZ"de!deHdeHde@fddZ#de!deHdeHdeHde@f
ddZ$de!de8eH de@fddZ%de!de8eH de@fddZ&e#e=j'e/ eDde!de8eH de@fddZ'	 	 		dde!de!deHd eHde@d!e@de!fd"d#Z(e#e=j)e/ eD	 	 	dde!de!deHd eHde@de!fd$d%Z)e#e=j*e/ddٍeDd&d' Z*e#e=j+e/ dde>jgjhfd(d)Z+d*ej!d+ej!d,e@de@fd-d.Z,e=j-jne6je=j-jre6je/dd/dd0d1d2Z-e#e=j.jne=j.jrge=j.jne6je/ eD		ddQe!de`eHeHf de@d3eeM d4eeM de!fd5d6Z/e#e=j.j݃e=j.jݠe6je=j.jݠe6je/ eD	dde!dee`eHeHf  de@dee`eMeMf  de!f
d7d8Z0e#e=j1e#e=j2e#e=j3eDe/ de!de`eHdf de!fd9d:Z4e#e=j5e#e=j6e#e=j7eDe/ de!de`eHdf de!fd;d<Z8de!de`eHdf d=eeHeHeHge!f de!fd>d?Z9e#e=j:e#e=j;e#e=j<e/d!d@dA Z=e#e=j>e/dBdCdddDdEdFZ>e#e=j?e/ dddGdHdIZ?e#e=j@jne=j@jrge/ dejAdddJde'djeejd dKejBdDeej dEe@f
dLdMZCe#e=j@jDgdejAdddJde'de'djeejd dKejBdDeej dEe@fdNdOZEe#e%dPdQ ZFe#e=jGe=jGjne6je/ ddde>jgjhfdQe!dme!de'dRe'dXee! dgeHde!fdSdTZGe#e=jHe=jHjne6je/ddUdQe!dme!dgeHde`e!e!f fdVdWZHe#e=jIjn	q	ddddXdYe!dZe!d6e!d[eMd\e@d]ee! d.eeM de`e!e!f fd^d_ZJd`da ZKe#e=jLge/ddٍeDddbdcZLe#e=jMe/ ddde ZMe#e=jNdfdg ZNe#e=jOjne=jOjrgdddhd9e!djeejd dee! de!fdidjZPe#e=jQjne=jQjRgdd9e!deeH fdkdlZSe#ej<j=jTddmdnZTe#e=jUe/ dddodpdqZUe#e=jVjnddrd9ej!d~eej dej!fdsdtZVddudvdwZWdddodxdyZXe#e=jYe/ dzd{ ZYe#e=jZdd|d}ZZeKe=j[e=j\ eKe=j]e=j eKe=j^e=j eKe=j_e=jL eKe=j`e=jO eKe=jae=jb eKe=jce=jV eKe=jde=je eKe=jfe=jS eKe=jge=jh eKe=jie=jj eKe=jke=jl eKe=jme=jn eKe=joe=jp eKe=jqe=jr eKe=jse=jt eKe=jue=jv eKe=jwe=jx eKe=jye=jz eKe=j{e=j| eKe=j}e=j~ eKe=je=j eKe=je=j eKe=je=j eKe=je=j\ dS (      N)Iterable)Enum)partialreduce)chainproduct)AnyCallablecastOptionalUnion)	sym_floatsym_intTensorregister_decomposition)	out_dtype)IntLike
NumberTypesuggest_memory_format
TensorLikeTensorSequenceType)_maybe_convert_to_dtype_maybe_resize_out_safe_copy_outout_wrapper)_pytree)tree_map__all__c                   @   s   e Zd ZdZdZdZdS )	Reductionr         N)__name__
__module____qualname__NONEMEANSUM r(   r(   b/home/www/facesmatcher.com/frenv_anti/lib/python3.10/site-packages/torch/_decomp/decompositions.pyr   0   s    r   Fftype_promotioncompute_dtype_onlyinclude_non_tensor_argsc                    s    t  fdd}|S )Nc                     s   rt tjjfnt f  fddtj| i |D }tj|di\fdd}fdd}t|| i t||}rA|S t||S )Nc                    s   g | ]	}t | r|qS r(   )
isinstance.0x)allowed_typesr(   r)   
<listcomp>D   s    z-type_casts.<locals>.inner.<locals>.<listcomp>type_promotion_kindc                       t | tr
|  S | S Nr.   r   tor1   computation_dtyper(   r)   increase_precN      

z0type_casts.<locals>.inner.<locals>.increase_precc                    r5   r6   r7   r9   )result_dtyper(   r)   decrease_precT   r=   z0type_casts.<locals>.inner.<locals>.decrease_prec)	r   torchtypesZ_NumberpytreeZarg_tree_leavesutilselementwise_dtypesr   )argskwargsZ	flat_argsr<   r?   rr,   r*   r-   r+   )r2   r;   r>   r)   inner?   s    


ztype_casts.<locals>.inner)	functoolswraps)r*   r+   r,   r-   rI   r(   rH   r)   
type_casts9   s    rL   T)r+   r,   )r+   )r+   r-   r1   dimreturnc                 C   s$   t ||   D ]}| d} q| S N)rangerM   	unsqueeze)r1   rM   _r(   r(   r)   _unsqueeze_to_dimv   s   rT   
grad_inputout_gradyc                 C   s   | d||     S Nr    Zconj_physicalrV   rW   r(   r(   r)   tanh_backward|      r[   c                 C   s   | |d|     S rX   rY   rZ   r(   r(   r)   sigmoid_backward   r\   r]   beta	thresholdc                 C   s.   ||   }t|| |k| | | |d  S N      ?)expr@   where)rV   r1   r^   r_   zr(   r(   r)   softplus_backward   s   "re   grad_outputalphascaleinput_scale	is_resultself_or_resultc           	      C   sb   || }|}|}|rt |dk| | ||  | | S t |dk| | | t ||  | | S Nr   )r@   rc   rb   )	rf   rg   rh   ri   rj   rk   ZnegcoefZposcoefZ
negiptcoefr(   r(   r)   elu_backward   s   rm   c                 C      t | |S r6   )r@   Z	full_likeselfvaluer(   r(   r)   fill_scalar      rr   rq   c                    s(   t   dk fdd t|  S )Nr   c                      s   d    dS )Nz@fill only supports 0-dimension value tensor but got tensor with z dimensionsrM   r(   rq   r(   r)   <lambda>       zfill_tensor.<locals>.<lambda>)r@   _checkrM   atencopyro   r(   ru   r)   fill_tensor   s
   

r{   rp   c                 C   s    t jt j| d ddddd S N   r   min   maxr@   clamprp   r(   r(   r)   hardsigmoid   s    r   c                 C   s   t |dk|dk @ | d dS )Ng      g      @gUUUUUU?        r@   rc   rf   rp   r(   r(   r)   hardsigmoid_backward   s
   r   min_valmax_valc                 C   s   t ||k||kB d| S )Nr   r   )rf   rp   r   r   r(   r(   r)   hardtanh_backward   s   r   c                 C   s$   | t jt j| d dddd d S r|   r   r   r(   r(   r)   	hardswish   s   $r   c              
   C   s,   t |dkdt |dk | |d d  | S )Nr   r}         ?r   r   r(   r(   r)   hardswish_backward   s
   r   c                 C   s   t ||kd| S rl   r   )rf   rp   r_   r(   r(   r)   threshold_backward      r   negative_slopeself_is_resultc                 C   s   t |dk| | | S rl   r   )rf   rp   r   r   r(   r(   r)   leaky_relu_backward   s   r   nonegradapproximatec                 C   s   d}d}d}|dkrO|| d }d}|| }|| }	||||	   }
t |
}d| }d| }d| }d||  }|dd| |   }|| | }| ||  S |}|| d }ddt ||   }|t || d	  }| |||   S )
Ng;f?g;f?gmBP?tanhr   gHm?r    r}   g      )r@   r   erfrb   )r   rp   r   ZM_SQRT2Z	M_SQRT1_2Z
M_2_SQRTPIZkBetaZkKappaZx_sqZx_cuberI   Z
tanh_innerleftrightZleft_derivativeZtanh_derivativeZinner_derivativeZright_derivativeZkAlphaZcdfZpdfr(   r(   r)   gelu_backward   s,   
r   inputc                 C   s:   t t|}t |}|| d||   }| ||  S rX   )r@   r   FZsoftplussigmoid)rf   r   Zinput_tanh_softplusZinput_sigmoidoutr(   r(   r)   mish_backward  s   
r   c                 C   s   | t |  S r6   )r@   r   r   r(   r(   r)   silu!  s   r   c                 C   s,   ddt |   }| | d|d|    S rX   )r@   rb   )rf   rp   r   r(   r(   r)   silu_backward(  s   r   weightc                 C   s   t | dk| ||  S rl   r   )rp   r   r(   r(   r)   _prelu_kernel0  s   r   c                 C   s4   t |dk| ||  }t |dkd||  }||fS )Nr   r   r   )rf   rp   r   Z
input_gradZweight_gradr(   r(   r)   _prelu_kernel_backward5  s   r   noiseloweruppertrainingc                 C   s6   |r|| dkr|  |S || d }t| |||S )Ngư>r!   )mulry   r   )rf   rp   r   r   r   r   r   r   r(   r(   r)   rrelu_with_noise_backward@  s   
r   bufferc                 C   sN   |dk }t |dd}t |dd}t t | }| |||d|     S )Nr   r    rP   )r@   rc   rb   abs)rf   rp   r   Zin_negativeZ	max_derivsignrd   r(   r(   r)   log_sigmoid_backwardU  s
   r   loss	reductionc                 C   s0   |t jjkrt| S |t jjkrt| S | S r6   )r   r&   rq   r@   meanr'   sum)r   r   r(   r(   r)   apply_loss_reductionb  s
   

r   dtypec                 C   s4   | t jkrt jS | t jkrt jS | t jkrt jS d S r6   )r@   Z	complex32Zfloat16Z	complex64float32Z
complex128Zfloat64r   r(   r(   r)   to_real_dtypek  s   


r   targetc                 C   s   | | d }t ||S )Nr!   )r   )rp   r   r   r   r(   r(   r)   mse_lossz  s   
r   c                 C   s,   |t jjkrd|  nd}|||  |  S )N       @)r   r&   rq   numel)rf   r   r   r   normr(   r(   r)   mse_loss_backward  s   r   c                 C   sF   t j| ||d}| td}t j||dd}t |}t |||S )N)rM   r   z-infTrM   keepdim)r@   Zsoftmaxeqfloatall
zeros_likerc   )rp   rM   r   r   ZmaskedZmasked_rowszerosr(   r(   r)   safe_softmax  s
   
r   ra   c                 C   s<   | |   }t||k d|d  | |d|  }t||S )Nr   r!   )r   r@   rc   r   )rp   r   r   r^   r   r(   r(   r)   smooth_l1_loss  s   	&
r   c           	      C   sZ   |t jjkrd|  nd}|| }t|}||  }t||k || | |t| S r`   )r   r&   rq   r   r@   r   rc   r   )	rf   rp   r   r   r^   r   r1   Zabs_xZ	norm_gradr(   r(   r)   smooth_l1_loss_backward  s   

r   c                 C   *   t | ||||}t||j t||ddS NTZ	copy_fromZcopy_toexact_dtype)r   r   shaper   )rf   rp   r   r   r^   rU   resultr(   r(   r)   smooth_l1_loss_backward_out     
r   deltac              
   C   s`   |t jjkrd|  nd}|| }t|| k | |  | t||k||  | || |  S r`   )r   r&   rq   r   r@   rc   )rf   rp   r   r   r   r   r1   r(   r(   r)   huber_loss_backward  s    r   c                 C   r   r   )r   r   r   r   )rf   rp   r   r   r   rU   r   r(   r(   r)   huber_loss_backward_out  r   r   ignore_indextotal_weightc                 C   s   |  dk rdnd}|tjjkr| | } ||}t||k|d}t|}	t|	||d}	|	  |     kr=dkrDn n| |} |d urcdd t	|  D }
|j
d |
|< ||
}| | } t||k| d} |	|  S )Nr!   r   r    g      c                 S   s   g | ]}d qS r    r(   r0   rS   r(   r(   r)   r3     rw   z&_nll_loss_backward.<locals>.<listcomp>)rM   r   r&   rq   rR   r@   rc   r   scatterrQ   r   reshape)rf   rp   r   r   r   r   r   channel_dimsafe_targetrU   Z	new_shaper(   r(   r)   _nll_loss_backward  s    	

 

r   c           
      C   s   |  dks
J dt|  |}||}|d dks'J d| d| |d }||d|}||||}t|}d| | | |  }	||  }tj||	g|dS )Nr   z*glu does not support 0-dimensional tensorsr!   z.Halving dimension must be even, but dimension z	 is size ra   rt   )rM   rC   canonicalize_dimsizenarrowr@   r   cat)
rf   rp   rM   Zwrap_dimZnInZ	inputSizeZ	firstHalfZ
secondHalfZgradInputFirstHalfZgradInputSecondHalfr(   r(   r)   glu_backward  s   

r   c                 C   sr  d|    krdksJ d J d|  dksJ d|  dko)|  dk}|sC|jd |jd ksCJ d|j d|j d| dksXJ d	|j d
|  df|d u si| |jd ksiJ d|tjjkr|  dkr|   dkr| jd |jd ksJ d|jd  d|    d| jd  n|   dkr|  dksJ d| j t| ||||||S )Nr   r!   input tensor should be 1D or 2Dr    ;0D or 1D target tensor expected, multi-target not supportedsize mismatch (got input: 
, target: ):expected total_weight to be a single element tensor, got: z (z
 elements)rP   z<weight tensor should be defined either for all or no classesz7Expected a tensor of dimension 1 and tensor.size[0] == z but got: dimension z and tensor.size[0] == z7Expected a single element grad_output tensor, but got: )rM   r   r   r   r%   rq   r   )rf   rp   r   r   r   r   r   no_batch_dimr(   r(   r)   nll_loss_backward  s:   ("
r   c                 C   s   |  dksJ d|   |  dksJ d|   |jd |jd kr<|jd |jd kr<|jd |jd ksHJ d|j d	|j | dks\J d
|j d|  dt| ||||||S )N   zSonly batches of spatial inputs supported (4D tensors), but got input of dimension: r}   zUonly batches of spatial targets supported (3D tensors) but got targets of dimension: r   r!   r    r   r   r   z ( z, elements))rM   r   r   r   )rf   rp   r   r   r   r   r   r(   r(   r)   nll_loss2d_backwardC  s0   r   c              	   C   s\   |d t t |  | dd |t t | | dd  }|d ur)|| }t||S )Nr    r(   i)r@   maximumlog1pnew_fulllogr   )rp   r   r   r   r   r(   r(   r)   binary_cross_entropyf  s   

r   c                 C   sR   d}| ||  t j|d|  |d }|d ur|| }|tjjkr'||  }|S )Ng-q=r    r~   )r@   r   r   r&   rq   r   )rf   rp   r   r   r   ZEPSILONr   r(   r(   r)   binary_cross_entropy_backward|  s   
"r   c                 C   s    t t |  | }t||S r6   )r@   r   rb   r   )r   r   r   r   r(   r(   r)   soft_margin_loss  s   
r   c                 C   s6   ||  t || d  }|tjjkr||  }|S rX   )r@   r   r   r&   rq   r   )rf   rp   r   r   rU   r(   r(   r)   soft_margin_loss_backward  s   	r   r!   otherpc                 C   s   t j| | |dS )N)r   )ry   r   )r   r   r   r(   r(   r)   dist  r   r   x1x2c           	      C   s   |  ddd}tj|tjd}| ddd}tj|tjd}t| d||gd}t|||gd}||j}|	d
 S )Nr!   rP   Tmemory_formatr   )powr   r@   	ones_likecontiguous_formatr   r   matmulmT	clamp_minsqrt)	r   r   Zx1_normZx1_padZx2_normZx2_padZx1_Zx2_r   r(   r(   r)   _euclidean_dist  s   r  input_sizesstartendstepc                 C   s   |  |}t|| ||||S r6   )	new_zerosr@   slice_scatter)rf   r  rM   r  r	  r
  rU   r(   r(   r)   slice_backward  s   

r  r    c                 C   sz  ddl m}m} |  }|dkrtdt|  |}t|  }t| 	 }	|dkr0td|d ur6|nd}
|d ur>|nt
j}||
dk rM|
|| 7 }
||dk rY||| 7 }||
dk rbd}
n||
|| krn|| }
||t
jkrz|| }n|||
k r|
}n|||| kr|| }|  |
|	|   }||
 }|| d | ||< |	|  |9  < | jrtd| ||	|S )Nr   )guard_size_obliviousstatically_known_truez,slice() cannot be applied to a 0-dim tensor.zslice step must be positiver    z<Slice decomposition for quantized tensors aren't implemented)%torch.fx.experimental.symbolic_shapesr  r  rM   RuntimeErrorrC   r   listr   stridesysmaxsizestorage_offsetZis_quantizedNotImplementedErrorZ
as_strided)rp   rM   r  r	  r
  r  r  ndimsizesstridesZ	start_valZend_valr  lenr(   r(   r)   slice_forward  sD   	
r  c                    s@   | j |  dtf fdd}||d d}|||  }||fS )zn
    Normalize start and end such that both are in the range
    [0, x.get_size()[dim]] and start <= end.
    rN   c                    s,   | d u r|S | dk r|   } t t| ||S rl   r   r   )valr   r   defaultdim_sizer(   r)   
clamp_wrap  s
   z(_normalize_start_end.<locals>.clamp_wrapr   )r   int)r1   rM   r  r	  r"  r(   r   r)   _normalize_start_end  s
   
r$  srcc              	   C   sB  t | j|}| j| }t| |||\}}t| j}|| |d  | ||< ||}|dkr;||kr;|dkr;| S d g|   }t	j
|| jd}	|	| | ||< t	j|| jt	jd}
|dkrht	|
|	|k}
||krtt	|
|	|k }
|dkrt	|
|	| | dk}
dg|   }d||< |
|}
t|
t||
|d| S )Nr    r   devicer'  r   rP   )rC   r   r  r   r$  r  expandclonerM   r@   aranger'  onesboollogical_andviewry   rc   _unsafe_masked_index)r   r%  rM   r  r	  r
  r!  Zsrc_sizeindicesidxmaskZ
mask_shaper(   r(   r)   r    s,   




r  indexc                 C   s   |  |}t|| ||S r6   )r  r@   select_scatter)rf   r  rM   r4  rU   r(   r(   r)   select_backwardE  s   
r6  offsetdim1dim2c                 C   s   |  |}t|| |||S r6   )r  r@   Zdiagonal_scatter)rf   r  r7  r8  r9  rU   r(   r(   r)   diagonal_backwardL  s   
r:  input_dtypec                 C   s   | j |kr
||}|S r6   )r   r8   )rf   rU   r;  r(   r(   r)   _cast_grad_to_input_dtypeU  s   

r<  outputc                 C   s0   | | }||t j||dd  }t| || S NTr   )r@   r   r<  
contiguous)rf   r=  rM   r;  Znew_grad_outputrU   r(   r(   r)   _softmax_backward_data]  s
   
r@  c                 C   s*   | t |t j| |dd  }t| ||S r>  )r@   rb   r   r<  )rf   r=  rM   r;  rU   r(   r(   r)   _log_softmax_backward_datao  s   
rA  c           
      C   sZ   | |d  ||d   }t tjtj|d}|d||d}|d|| |d}	||	 S )z/Utility function to implement im2col and col2imr!   r    r   r'  r   rP   )r   r@   r+  int64rR   )
Zinput_dZkernel_dZ
dilation_dZ	padding_dZstride_dr'  Zblocks_dZ	arange_kwZblocks_d_indicesZkernel_gridr(   r(   r)    _im2col_col2im_indices_along_dim{  s
   rD  kernel_sizedilationpaddingr  c              	      s&  t tdkdd  t t dkdd  t tdkdd  t tdkdd  ddd	}|d
 | d | ddd |d | jt}t |dv odtdd dd  D fdd tdd tdd   D t tdd D  fdd |dk}|s| d} | j\}}	}
}\}}\}} \}}\}}t|
||||| j	}t|||||| j	}t
| ||||f}|dd}|d d d d ||f }|dddddd}|d}|d}|||	| | || }|s|d}|S ) Nr!   c                   S      dS )Nz"im2col(): only 2D kernel supportedr(   r(   r(   r(   r)   rv         zim2col.<locals>.<lambda>c                   S   rH  )Nz$im2col(): only 2D dilation supportedr(   r(   r(   r(   r)   rv     rI  c                   S   rH  )Nz#im2col(): only 2D padding supportedr(   r(   r(   r(   r)   rv     rI  c                   S   rH  )Nz"im2col(): only 2D stride supportedr(   r(   r(   r(   r)   rv     rI  Tc                 S   <   |rt dd | D nt dd | D }t|dd  d S )Nc                 s       | ]}|d kV  qdS r   Nr(   r0   r   r(   r(   r)   	<genexpr>      z1im2col.<locals>.check_positive.<locals>.<genexpr>c                 s       | ]}|d kV  qdS rL  r(   rM  r(   r(   r)   rN    rO  c                   S   rH  )Nz<{param_name} should be greater {'than' zero, but got {param}r(   r(   r(   r(   r)   rv     rI  z0im2col.<locals>.check_positive.<locals>.<lambda>r   r@   rx   param
param_namestrictcondr(   r(   r)   check_positive     (zim2col.<locals>.check_positiverE  rF  rG  FrU  r  r}   r   c                 s       | ]}|d kV  qdS rL  r(   r0   dr(   r(   r)   rN    rO  zim2col.<locals>.<genexpr>r   c                         dt   S )NzmExpected 3D or 4D (batch mode) tensor for input with possible 0 batch size and non-zero dimensions, but got: tupler(   r   r(   r)   rv         c                 s   s>    | ]\}}}}}d |d|  ||d    d  |  V  qdS )r    r!   Nr(   r0   r   padZdilZkerstr(   r(   r)   rN    s
    "
r   c                 s   rK  rL  r(   )r0   cr(   r(   r)   rN    rO  c                      s6   dt dd   d d  d d d dS )	Nz!Given an input with spacial size r   , kernel_size=, dilation=
, padding=	, stride=z9, the calculated shape of the array of sliding blocks is z*, but its components must be at least one.r_  r(   rF  rE  output_sizerG  r   r  r(   r)   rv     s    r   r   rP   r    r}      T)r@   rx   r  r   r   r`  ziprR   rD  r'  r   rd  permuter   r   squeeze)r   rE  rF  rG  r  rW  r  batched_inputZ	batch_dimr   Zinput_hZinput_wstride_hstride_w	padding_h	padding_w
dilation_h
dilation_wkernel_hkernel_wZblocks_row_indicesZblocks_col_indicesZpadded_inputr=  Znum_blocks_rowZnum_blocks_colr(   rk  r)   im2col  sd   	



 




r{  rl  c              
      s  t tdkdd  t tdkdd  t tdkdd  t tdkdd  t tdkdd  d$d	d
}|d |d |ddd |d |d | jt}t |dv outdd dd  D fdd d d  }t d | dkfdd dd tD }	|	d |	d   t d  k fdd t  dk fdd |dk}
|
s| d} | j\}}\}}\}}\}}\}}| d d | g |	 } | dddd dd!} t	|||||| j
}t|d }t	|||||| j
}d"d tD }| d d t g| }d d ||f}tj||| dd#}t|| | | | f}|
sf|d}|S )%Nr!   c                   S   rH  )Nzonly 2D output_size supportedr(   r(   r(   r(   r)   rv     rI  zcol2im.<locals>.<lambda>c                   S   rH  )Nzonly 2D kernel supportedr(   r(   r(   r(   r)   rv     rI  c                   S   rH  )Nzonly 2D dilation supportedr(   r(   r(   r(   r)   rv     rI  c                   S   rH  )Nzonly 2D padding supportedr(   r(   r(   r(   r)   rv     rI  c                   S   rH  )Nzonly 2D stride supportedr(   r(   r(   r(   r)   rv     rI  Tc                 S   rJ  )Nc                 s   rK  rL  r(   rM  r(   r(   r)   rN    rO  z1col2im.<locals>.check_positive.<locals>.<genexpr>c                 s   rP  rL  r(   rM  r(   r(   r)   rN    rO  c                   S   rH  )Nz9{param_name} should be greater than zero, but got {param}r(   r(   r(   r(   r)   rv     rI  z0col2im.<locals>.check_positive.<locals>.<lambda>rQ  rR  r(   r(   r)   rW    rX  zcol2im.<locals>.check_positiverE  rF  rG  FrY  r  rl  )r!   r}   c                 s   r[  rL  r(   r\  r(   r(   r)   rN     rO  zcol2im.<locals>.<genexpr>r   c                      r^  )NzmExpected 2D or 3D (batch mode) tensor for input with possible 0 batch size and non-zero dimensions, but got: r_  r(   ra  r(   r)   rv     rb  r   r    c                      s   dd  d  S )Nz|Expected size of input's first non-batch dimension to be divisible by the product of kernel_size, but got input.shape[-2] = r   z and kernel_size=r(   r(   )rE  r   r(   r)   rv     s
    c                 S   s:   g | ]\}}}}}d |d|  ||d    d  |  qS r    r!   r(   rc  r(   r(   r)   r3     s    "zcol2im.<locals>.<listcomp>rP   c                      4   d d d d d d  dd  d	S 
NzGiven output_size=rg  rh  ri  rj  z , expected input.size(-1) to be 	 but got rP   .r(   r(   LrF  rE  rl  rG  r   r  r(   r)   rv         c                      r}  r~  r(   r(   r  r(   r)   rv     r  r}   r   rm  c                 S   s   g | ]
\}}|d |  qS r!   r(   )r0   or   r(   r(   r)   r3   6      
accumulatern  )r@   rx   r  r   r   ro  rR   r   rp  rD  r'  rT   r  prodry   _unsafe_index_putr   rd  rq  )r   rl  rE  rF  rG  r  rW  r  Zprod_kernel_sizecolrr  Zout_hZout_wrs  rt  ru  rv  rw  rx  ry  rz  Zindices_rowZindices_colZoutput_padded_sizer=  r2  r(   r  r)   col2im  s   




 



"

r  r3  c                 C   s$   | | | |  jt| d}|S Nr   )type_asr*  rC   r   )rf   r3  rh   rG   r(   r(   r)   native_dropout_backwardC  s   	r  
input_size	dimensionr   c           	      C   s   t |dkrt| dS tt ||}tj|| | jtjd}|d||	 }| 
d|d 	||d } | |}d| |f }tj||| dd S )Nr   r(  rP   r    r6   Tr  )r  r@   Zsqueeze_copyrC   r   r+  r'  int32ZunfoldflattenZmovedimr  ry   r  r?  )	r   r  r  r   r
  rM   r2  rU   r4  r(   r(   r)   unfold_backwardR  s   
r  epsc              	   C   st   |d ur|}d| }t t ||k||k| |d|   dS t t |dk|dk| |d|   |dtdS )Nra   r   r(   nan)r@   rc   r.  r   r   )rf   rp   r  lohir(   r(   r)   logit_backwarde  s   r  trainc                 C   s&   |r|dkrt | ||d S |  S rl   )ry   native_dropoutr*  )r   r   r  r(   r(   r)   dropoutz  s   r  out0out1c                 C   s   |r6|dkr6|dkrt | t j| t jdfS | jjstdt | |k}||  tdd|   }||fS | t j| t jdfS )Nr   r    r   z?result type Float can't be cast to the desired output type Longra   )	r@   r   r-  r   is_floating_pointr  	rand_liker   r   )r   r   r  Z	bool_maskresr(   r(   r)   r    s   r  half_to_floatc                 C   s   |   } |r| jtjksJ tj| tjjd\}}| |} | 	 dkr*t
| }ntj| |dd}t
| | }|tj||dd }|sJ||}|S Nr4   r   T)r   )r?  r   r@   halfrC   rD   ELEMENTWISE_TYPE_PROMOTION_KINDDEFAULTr8   r   rb   amaxr   )r1   rM   r  r;   r>   Zunnormalizedx_maxr   r(   r(   r)   _softmax  s   


r  )r   c           	      C   s   |   } |r| jtjksJ tj| tjjd\}}| |} | 	 dkr'| }ntj
| |dd}| | }ttjt||dd}|| }|sL||}|S r  )r?  r   r@   r  rC   rD   r  r  r8   r   r  r   r   rb   )	r1   rM   r  r;   r>   Zshiftedr  Zshifted_logsumexpr   r(   r(   r)   _log_softmax  s    


r  rP   r1  padding_idxscale_grad_by_freqsparsec                 C   sJ   |   dks
J d|jdkr!| d|}|jdkr|d}|S | | S )Nr!   z'weight' must be 2-Dr    r   )rM   r  Zindex_selectrq  )r   r1  r  r  r  r   r(   r(   r)   	embedding  s   	


r  num_weightsc                 C   s   t j| t jjd\}}| |} t|tj}|r8||f}t	|}t
j||g|dd}|| }	| |	d } t||k| j}
| |
d}| |f| j|jd   }t
j||g|dd|S )Nr  Tr  rP   r   )rC   rD   r  r  r8   r   r@   longr  r   ry   r  rR   rT   r  masked_fillr   )rf   r1  r  r  r  r;   r>   countsr,  Zgrad_weights_scaler3  r   grad_weightr(   r(   r)   embedding_dense_backward  s&   	


r  c                 C   s   d}| D ]}||9 }q|S rX   r(   )r1   rG   ir(   r(   r)   r    s   
r  tensors
num_chunksc           	      C   s   g }| D ]H}|  }|| | d | | }||| kr7dgd |j| d  d|||  g }t||d}|d | t|dg }||| q|S )Nr    r   r!   rP   )r   r  ry   constant_pad_ndr@   Sizeappendr   )	r  rM   r  padded_tensorstensortensor_sizeZpad_along_dimrd  Z	view_sizer(   r(   r)   
_pad_chunk  s   
r  c                 C   s(   | d j }| D ]
}|j |kr dS qdS )Nr   FTr  )r  r  r  r(   r(   r)   have_same_ndims  s   

r  c                 C   sB   | d   d | }| D ]}t|  d | |kdd  qd S )Nr   c                   S   rH  )NzG_chunk_cat expects same sizes of 0,...,dim-1 dimensions for all tensorsr(   r(   r(   r(   r)   rv   !  rI  z+leading_dimension_matches.<locals>.<lambda>)r   r@   rx   )r  rM   Zleading_dim_sizesr  r(   r(   r)   leading_dimension_matches  s   r  c                 C   s   t |dkdd  t t| dkdd  | d j}| d j}| D ]$}t | dkdd  t |j|kdd  t |j|kdd  q"t| rVt| d 	 |}nt |dkd	d  | D ]}t ||j
k d
d  qbt| | |S )Nr    c                   S   rH  )Nz&_chunk_cat expects positive num_chunksr(   r(   r(   r(   r)   rv   *  rI  z._preprocess_chunk_cat_inputs.<locals>.<lambda>r   c                   S   rH  )Nz0_chunk_cat expects a non-empty input tensor listr(   r(   r(   r(   r)   rv   ,  rI  c                   S   rH  )Nz#_chunk_cat expects non-empty tensorr(   r(   r(   r(   r)   rv   1  rI  c                   S   rH  )Nz8_chunk_cat expects all input tensors with the same dtyper(   r(   r(   r(   r)   rv   4  rI  c                   S   rH  )Nz8_chunk_cat expects all inputs tensors on the same devicer(   r(   r(   r(   r)   rv   8  rI  c                   S   rH  )NzK_chunk_cat expects non-negative dim when input tensors have different ndimsr(   r(   r(   r(   r)   rv   ?  rI  c                   S   rH  )Nz3_chunk_cat expects dim < ndim for all input tensorsr(   r(   r(   r(   r)   rv   D  rI  )r@   rx   r  r   r'  r   r  rC   r   rM   r  r  )r  rM   r  Zexpected_dtypeZexpected_devicer  r(   r(   r)   _preprocess_chunk_cat_inputs%  s:   


r  r   c                 C   sH   t | ||}t| ||}|d u rt||d S tj||d |d |S )Nr    )r   )r  r  r@   r   )r  rM   r  r   r  r(   r(   r)   
_chunk_catJ  s   r  split_sizesc                 C   sX   t j| ||d}|d u rdd |D S t||D ]\}}t||j t||dd qd S )Nrt   c                 S   s   g | ]	}|j tjd qS )r   )r*  r@   r  )r0   sr(   r(   r)   r3   f  s    z)split_with_sizes_copy.<locals>.<listcomp>Tr   )ry   split_with_sizesro  r   r   r   )rp   r  rM   r   Zsplitsr=  splitr(   r(   r)   split_with_sizes_copy[  s   	r  
split_size.c                 C      t j| ||S r6   )ry   r  r   )r   r  rM   r(   r(   r)   unsafe_splitn     r  c                 C   r  r6   )ry   r  r  )r   r  rM   r(   r(   r)   unsafe_split_with_sizess  s   r  c                    s   | j }|| } dkr|dksJ |  fS |  d   }ddlm} ||} fddt|D }  | |  |d< t| ||S )Nr   r    )	guard_intc                       g | ]} qS r(   r(   r0   r  r  r(   r)   r3     rw   zsplit.<locals>.<listcomp>rP   )r   detachr  r  rQ   r@   r  )rp   r  rM   r  r!  chunksr  r  r(   r  r)   r  z  s   
r  tensor_indices_or_sectionsc                    s   |j jdksJ |jtjksJ |  t dkp dk fdd  dkr9| }t|t	s3J | 
||S dd |D }| 
||S )Ncpur    r   c                      s   d  dS )Nz{tensor_split expected tensor_indices_or_sections to be a zero-dimensional or one-dimensional tensor, but got a tensor with z dimsr(   r(   Z	split_dimr(   r)   rv     s    zAtensor_split_tensor_indices_or_sections_py_impl.<locals>.<lambda>c                 S   s   g | ]}|  qS r(   )itemr  r(   r(   r)   r3         zCtensor_split_tensor_indices_or_sections_py_impl.<locals>.<listcomp>)r'  typer   r@   rC  rM   rx   r  r.   r   tensor_split)rp   r  rM   sectionsr1  r(   r  r)   /tensor_split_tensor_indices_or_sections_py_impl  s   

r  mat1mat2c                 C   sH   |   s|  st|}t|}|t|| }|dkr|S |||   S rl   )r  
is_complexr#  r@   mm)rp   r  r  r^   rg   r   r(   r(   r)   addmm  s   r  use_geluc                 C   s<   t | ||||}|r| jrtj|ddS t|S t|S )Nr   )r   )r  is_cudary   gelurelu)rp   r  r  r^   rg   r  r   r(   r(   r)   _addmm_activation  s   

r  vecc                 C   s\   |   s|  st|}t|}|t|| }|dkr|S | dkr(||  S |||   S rl   )r  r  r#  r@   mvr   )rp   r  r  r^   rg   r   r(   r(   r)   addmv  s   r  r   rstdgammaNCHxWgroupoutput_maskc
              	      s  t j| ||dd t j|| dd t j|dd t|    k fdd tjfkfdd td u pJ  k fdd t \}
}t|dk fdd t| |	 j
d	gd
}| 	 j
d	gd
}d }d }d }|	d r:d|
  }d urt|d|

d	}t|d|

d	}t|dd|
}n&||

d	}||

d	}t|dtjd|
f|jd}| | | | | | }|  || |  }|d}t|d}t|d}t| |
|t||
| | }||j|j}|	d r_|	|
|	|
d  |d j
dgd
 }|	d	 rk|j
dgd
}|||fS )NF)Zallow_cpu_scalar_tensorsc                      s   d    dS )NzExpect input to have z	 elementsr(   r(   )r  r  r  r(   r)   rv     r  z,native_group_norm_backward.<locals>.<lambda>c                      s   d  d dj  S )NzExpect mean to have shape (, z
, but got ra  r(   )r  r  r   r(   r)   rv         c                      s$   d  dd ur   S d S )NzExpect gamma to have z elements but got rP   )r   r(   )r  r  r(   r)   rv        $ r   c                      s   d  d S )NzExpect number of channels z, to be evenly-divisible by number of groups r(   r(   )r  r  r(   r)   rv   	  rw   r!   rt   ra   rP   r    r&  r   )rC   Zcheck_same_deviceZcheck_same_shaper@   rx   r   r   divmodr   r/  r   rR   r   r,  r'  rT   r8   r   )rf   r   r   r  r  r  r  r  r  r  ZcpgZ_remZdsdbd_inputZd_gammad_biasr  Zds_valZdb_valc1c2c3r(   )r  r  r  r  r  r   r)   native_group_norm_backward  s   
 
""



$

r  out2c
                C   d   t | |||||||||	
}|
||f}t|D ]\}}|d ur/t|| |j t||| dd q|S r   )r  	enumerater   r   r   )rf   r   r   r  r  r  r  r  r  r  r  r  r  r   rU   r  rG   r(   r(   r)   native_group_norm_backward_out?  s   
r   c                 C   s   | d ur	|  |S | S r6   r8   )r1   r   r(   r(   r)   _maybe_cast\  s   
r  grad_outnormalized_shapebiasc           "         sf  |j }| }	t|j  fdd| |||fD \}
}}}|
d us$J |	t| }||d  }|d | }g }g }t|	D ]}||krJ|| q>|| q>t|}t|}ddl	m
} ||dksj||dkr|d rs||nd |d r|||d  nd |d r|||d  fS d fS t|| }t|| }|| | }|d ur|
| }n|
}|| }t||d}t||}t||d}t||}|| | }d }d } d }!|d r|| | }|d r|d urt|dkrt|
| |d} n|
| } |d r"|d ur"t|dkrt|
|d}!n|
 }!t||jt| |jt|!|jfS )	Nc                 3   s,    | ]}|d ur|j  tjdn|V  qd S r  )r8   r@   r  r/   r:   r(   r)   rN  q  s    
z-native_layer_norm_backward.<locals>.<genexpr>r   r  r    r!   TF)r   rM   rC   get_computation_dtyper   r  rQ   r  r  r  r  r  rT   r@   r   r   r*  r  )"r  r   r  r   r  r   r  r  input_shapeZ
input_ndimgrad_out_cast
input_castweight_castZ	bias_castaxisZ
inner_dimsZ
outer_dimsZinner_dim_indicesZouter_dim_indicesr  r  Mr  Zx_hatZ
grad_x_hatabr  r  r  rI   r  Zd_weightr  r(   r:   r)   native_layer_norm_backwardc  sn   





r  c             	   C   s`   t | |||||||}||	|
f}t|D ]\}}|d ur-t|| |j t||| dd q|S r   )r  r  r   r   r   )r  r   r  r   r  r   r  r  r  r  r  r   rU   r  rG   r(   r(   r)   native_layer_norm_backward_out  s   
r  running_meanrunning_varmomentum
functionalc	                 C   sT  dgt td|   }	t| j}
|}|}|rt| j}
| j|
d}tj||	ddd\}}t	|| }| | | }t
||	}t
||	}|d ur]|| d| |  }|s]|| |d ur|  | jd  }t
||	}|||d   }|| d| |  }|s|| nT|d ur|d usJ |j|
dd}|}|j|
dd}|}|}dt||  }| jjdkr|}|}n
| d	}| d	}t||  d }t||  d }| | | }|d ur| }t||  d }|| }|d ur	| }t||  d }|| }| jjdkr|j| jd}|j| jd}|j| jd||||fS )
Nr   r!   r   T)rM   Z
correctionr   r    )r   rz   r  r   )r  rQ   rM   rC   r  r   r8   r@   Zvar_meanrsqrtrq  copy_r   r   r  r'  r  r  rT   r  )r   r   r  r  r  r   r  r  r  Zreduction_dimsr;   new_running_meannew_running_varZ	input_accZ
biased_varr   r  r=  	save_mean	save_rstdnZsqueezed_varZunbiased_varinvstdr(   r(   r)   native_batch_norm_helper  st   





r  r  save_invstdc              
   C   ,   t | |||||||d	\}}	}
}}||	|
fS NFr  r   r   r  r  r  r   r  r  r=  r  r  rS   r(   r(   r)   native_batch_norm#  s   
r%  c              
   C   sv   |d u r|d u rt | |||||S |d u rtd|d u r"td|r0t | |||||||S t | ||||||S )Nz`running_mean is None, but running_var is provided. They should both be None or both be provided.z`running_var is None, but running_mean is provided. They should both be None or both be provided.)ry   _native_batch_norm_legitr  $_native_batch_norm_legit_no_training)r   r   r  r  r  r   r  r  r(   r(   r)   native_batch_norm_decomposition?  s&   r(  c                    s|   |  |}|| d |   dkr4|dkr4 fdd|D }  | |  ||d < tjjj| ||S tjjj|  |S )Nr    r   c                    r  r(   r(   r   r  r(   r)   r3   j  rw   z(unsafe_chunk_py_impl.<locals>.<listcomp>)r   r@   opsry   r  r  r  r   )r  r  rM   r!  r  r(   r  r)   unsafe_chunk_py_impld  s   
r*  c              
   C   s   t j| ||||d||S r"  )ry   r&  r  )r   r   r  r  r  r  r  r(   r(   r)   r'  p  s   
r'  c              
   C   r!  r"  r#  r$  r(   r(   r)   r&    s   
r&  c           
   
   C   s,   t | ||d d |||d	\}}}}	}	|||fS r"  r#  )
r   r   r  r   r  r  r=  r  r  rS   r(   r(   r)   !_native_batch_norm_legit_no_stats  s   	
r+  c              
   C   sP   t | |||||||d	\}}	}
}}|d usJ d|d us!J d||	|
||fS )NT#new_running_mean should not be None"new_running_var should not be Noner#  )r   r   r  r  r  r   r  r  r=  r  r  r  r  r(   r(   r)   #_native_batch_norm_legit_functional  s   r.  c           	   	   C   sP   t j| ||||d|}d}|t jjjkrt j| |}t j|t j| j| j	dS )a  
    Return a reserve tensor for batch norm, used only by cudnn to pass forward state to the
    backward pass. This is needed for `_batch_norm_with_update` and `_batch_norm_no_update`,
    which support a variety of backends including cudnn. We create this tensor here to get
    the correct shape in the traced graph if we detect that will call the cudnn kernel,
    and rely on DCE to avoid materializing this tensor.
    Tr   )r   layoutr'  )
r@   _CZ_select_batch_norm_backendZ_BatchNormBackendZCudnnZ(_get_cudnn_batch_norm_reserve_space_sizeemptyuint8r/  r'  )	r   r   r  r  r  r  r   backendZreserve_sizer(   r(   r)   _get_batch_norm_reserve_tensor  s   r4  c              
   C   sD   t | ||||d||d	\}}}	}
}
t| |||||dd}|||	|fS )NTFr   r  r4  r   r   r  r  r  r  r  r=  r  r  rS   reserver(   r(   r)   _batch_norm_with_update     
r9  c              
   C   sh   t | ||||d||d	\}}}	}
}t| |||||dd}|
d us$J d|d us,J d|||	||
|fS )NTr5  r,  r-  r6  )r   r   r  r  r  r  r  r=  r  r  Znew_rmZnew_rvr8  r(   r(   r)   "_batch_norm_with_update_functional  s   r;  c              
   C   sD   t | ||||d||d	\}}}	}
}
t| |||||dd}|||	|fS )NFr5  r6  r7  r(   r(   r)   _batch_norm_no_update  r:  r<  c                 C   sB   |d u sJ t | |k jt jd}|| |  d|  }||fS )Nr   ra   )r@   r  r8   r2  r  )r   r   	generatorr3  r  r(   r(   r)   _fused_dropout_decomposition-  s   r>  )r   r/  r'  
pin_memorynon_blockingr   r'  r?  r@  r   c          	      C   s
  |r|t jksJ d|rJ dt| t jttttfsJ |d u r6|d u r6|d u r6t| t jr4|  S | S d}t| t jrA| }nt 	| }|d uri||j
kri|d ura|jdkrat j||}d}t j|||}|d urx|sxt j||}d}|d urt j||dS |S )NTODOFr  Tr   )r@   stridedr.   r   r#  r   r-  complexr*  scalar_tensorr'  r  _primsZconvert_element_typeZ
device_put)	r1   r   r/  r'  r?  r@  r   Zdtype_convertedZx_tensorr(   r(   r)   _to_copy7  s,   
rF  c                 C   s
   t | S r6   )ry   aliasr9   r(   r(   r)   nop_decompositione  s   
rH  Zout3exponential_average_factorepsilonc              
   C   s^   t | |||||||\}}	}
|r||	|
| jdtjdfS ||d|d| jdtjdfS )Nr  r   )ry   r%  r  r@   r2  )r   r   r  r  r  r   rI  rJ  r  r  rf  r(   r(   r)   cudnn_batch_normm  s"   
rK  c                 C   s@   t |D ]\}}|dkr|| jk r| j| |ks| |} q| S rX   )r  r  r   rR   )r1   broadcast_maskr  r3  r(   r(   r)   _broadcast_batch_norm_backward  s
    
rM  r8  c                 C   s   t | |||||||||	
S r6   )native_batch_norm_backward)r  r   r   r  r  r  r   r  r  r  r8  r(   r(   r)   batch_norm_backward  s   rO  c
           &         s  |j }
|d ur|j }n|
}t|j   fdd| ||||||fD \}}}}}}}|j}| }|dks9J dd}tt|||  }|}|}|rV|d urS|d usUJ n|d ur^|d us`J |}t|| }dg| }|| ||< g }t	|D ]}||kr|
| qzt||}d| }t||}t|||  |}t|| |}tt|| || |} |d u rt||d }!nt|| |}!|r|| |  }"||" | |! }#n||! }#|	d r|| }$nd }$|	d r|}%nd }%|#|
t|$|t|%|fS )Nc                 3   s&    | ]}|d ur|  n|V  qd S r6   r  r/   r:   r(   r)   rN    s
    
z-native_batch_norm_backward.<locals>.<genexpr>r!   z$rank of the input must be at least 2r    ra   )r   rC   r  r   rM   r  r  r@   r  rQ   r  rM  r   r   r8   r  )&r  r   r   r  r  r  r   r  r  r  r;  Zweight_dtyper	  r
  r  Zrunning_mean_castZrunning_var_castZsave_mean_castZsave_invstd_castr  Z
input_rankr  Znum_featuresr   r  rL  Zreduction_axesr  r   Zgrad_output_sumZdot_pZ	grad_meanZ
proj_scaleZ
grad_scaleZprojrU   r  Z	grad_biasr(   r:   r)   rN    s   
	



rN  c
                C   r  r   )rN  r  r   r   r   )r  r   r   r  r  r  r   r  r  r  r  r  r  r   rU   r  rG   r(   r(   r)   native_batch_norm_backward_out	  s&   
rP  save_varc                 C       t || |||||d|g d
S NT)TTTry   rN  )r   rf   r   r  r  r  rQ  rJ  r(   r(   r)   miopen_batch_norm_backwardA	  s   rU  reserveSpacec	           	      C   rR  rS  rT  )	r   rf   r   r  r  r  rQ  rJ  rV  r(   r(   r)   cudnn_batch_norm_backward[	  s   rW  c                    s  | j  | jttdv fdd | jdd  D ]}t|dkfdd qd |d  dkrjd |d  dkrjtdd	 tdd  |D }td
d	 tdd  ||D }tjj	| ||S dd dd  fdd}|d |d \}}}}	|d |d \}
}}}| dt
|d|
f }|	s|stj|ddS dd }|||||	dd\}}|||||dd\}}d }tt|jd t|jd D ]\}}|d u r|d|d d |f }q||d|d d |f  }q|||  S )NrZ  c                      
   d  S )Nz9adaptive_avg_pool2d(): Expected 3D or 4D tensor, but got r(   r(   r  r(   r)   rv   	     
 z%adaptive_avg_pool2d.<locals>.<lambda>r   r   c                         dt   dS )Nzjadaptive_avg_pool2d(): Expected input to have non-zero size for non-batch dimensions, but input has shape r  r_  r(   ra  r(   r)   rv   	  s    rP   c                 s   s    | ]	\}}|| V  qd S r6   r(   )r0   r  r  r(   r(   r)   rN  	      z&adaptive_avg_pool2d.<locals>.<genexpr>c                 s   s&    | ]\}}}||d  |  V  qdS r    Nr(   )r0   r  r  r  r(   r(   r)   rN  	  s    
c                 S   s   t j| | |ddS )NtruncZrounding_moder@   divr  r  rf  r(   r(   r)   start_index	  s   z(adaptive_avg_pool2d.<locals>.start_indexc                 S   s    t j| d | | d |ddS )Nr    r]  r^  r_  ra  r(   r(   r)   	end_index	      z&adaptive_avg_pool2d.<locals>.end_indexc                    s   t j| t jd}||| }| | d }| | }|dkp"|| dk }|r+|d7 }n|dkr3|d8 }t j| t jd}|d| }|rbt j| d |j|jd}	t ||	}||| }
|
| }n|}||||fS )Nr(  r    r   rP   rB  )r@   r+  rC  rR   rD  r   r'  minimum)in_sizeout_sizeZorangeZi0Z	maxlengthZin_size_modadaptive	range_maxr2  maxvali1length)r'  rc  rb  r(   r)   compute_idx	  s(   

z(adaptive_avg_pool2d.<locals>.compute_idx.r   )r   rP   rt   c                 S   s`   t |tr	| |fS |dk sJ ||dk}|dkrt|d}t| |d} t|| }| |fS )Nr   rP   r   r   r   )r.   r   rR   rT   r@   r  )valsrl  ri  rh  rM   r3  r(   r(   r)   
maybe_mask	  s   

z'adaptive_avg_pool2d.<locals>.maybe_mask)rh  rM   r   )r'  r   r  r@   rx   r`  ro  nnr  Z
avg_pool2drT   r   r   rQ   )r   rl  r]  r  Zkernelrm  ZidxhZlength_hZrange_max_hZ
adaptive_hZidxwZlength_wZrange_max_wZ
adaptive_wrn  ro  retr  jr(   )r'  rc  r  r   rb  r)   adaptive_avg_pool2dv	  sN   

(  



&rs  c           	      C   s   t d| d ttj| jd |  }ttj|}dg| j }| jd |  |d | < |tj|| j	d
||  d}| t| jd |  t| }tj|d|g| ddd
|jS )NZmax_unpoolingZd_forward_outr    r&  rP   Fr  )rC   Zalert_not_deterministicr   operatorr   r   r  ry   r+  r'  r/  r   r  r  r  )	rp   r1  rl  rM   ncZhwZindices_nc_shapeZindices_flatr=  r(   r(   r)   _max_unpoolnd	  s   	"rv  c                    s   t jt jkfdd t tdkfdd t jdv fdd t jjkfdd tdjD ] t  d	k fd
d q>t	dS )Nc                         d j  S )Nz2elements in indices should be type int64 but got: r   r(   )r1  r(   r)   rv    
      zmax_unpool2d.<locals>.<lambda>r!   c                      rZ  )NzMThere should be exactly two elements (height, width) in output_size, but got 
 elements.r  r(   rl  r(   r)   rv   
     rZ  c                         d j  dS )NzLInput to max_unpooling2d should be a 3d or 4d Tensor, but got a tensor with  dimensions.r  r(   r   r(   r)   rv   
  s   c                         dj  d j  S NzBExpected shape of indices to be same as that of the input tensor (z%) but got indices tensor with shape: ra  r(   )r1  rp   r(   r)   rv   
     
r    r   c                         dj  d  dS )NzZmax_unpooling2d(): Expected input to have non-zero size for non-batch dimensions, but got  with dimension  being empty.ra  r(   )r  rp   r(   r)   rv   
  
   )
r@   rx   r   rC  r  r  r   rQ   r   rv  )rp   r1  rl  r(   )r  r1  rl  rp   r)   max_unpool2d	  s,   





	r  c                    s  t jt jkdd  t jdv fdd t tdkfdd t tdkfdd t tdkfdd t jjkfd	d td
jD ] t  dk fdd qXt d dko~d
 dko~d dkfdd t	dS )Nc                   S   rH  )Nz(elements in indices should be type int64r(   r(   r(   r(   r)   rv   0
  rI  zmax_unpool3d.<locals>.<lambda>r   rm  c                      r}  )NzLInput to max_unpooling3d should be a 4d or 5d Tensor, but got a tensor with r~  r  r(   r   r(   r)   rv   4
      r}   c                      rZ  )NzVThere should be exactly three elements (depth, height, width) in output_size, but got ry  rz  r(   r{  r(   r)   rv   8
  r|  c                      rZ  )NzRThere should be exactly three elements (depth, height, width) in stride, but got: ry  rz  r(   r  r(   r)   rv   ?
  rw   c                      rZ  )NzSThere should be exactly three elements (depth, height, width) in padding, but got: ry  rz  r(   )rG  r(   r)   rv   C
  rw   c                      r  r  ra  r(   )r1  r   r(   r)   rv   G
  r  r    r   c                      r  )NzZmax_unpooling3d(): Expected input to have non-zero size for non-batch dimensions, but got r  r  ra  r(   )r  r   r(   r)   rv   P
  r  r!   c                      rX  )Nz5strides should be greater than zero, but got stride: r(   r(   r  r(   r)   rv   Y
  rY  )
r@   rx   r   rC  r  r  r   rQ   r   rv  )r   r1  rl  r  rG  r(   )r  r1  r   rl  rG  r  r)   max_unpool3d&
  sB   	







	"
r  )rg   r  c                C      t | |||d|dS )NTinplacerg   
_index_addr1   rM   r4  r  rg   r(   r(   r)   
index_add__
  s   	r  c                C   r  )NFr  r  r  r(   r(   r)   	index_addk
  s   
r  r  c                   s"  t | jtjdkfdd jdkrdnd|jdkr*|ndtkfdd  dkr]t | jttkpQt 	t
  fdd |  }| jdk}|ri| dn| }d f }|rwtjntj}	|	|||dd	}
|r| S |r|
dS |
 S )
Nr    c                      r}  Nz(Index should have dimension 1 or 0 (got r   r  r(   r4  r(   r)   rv   
  r  z_index_add.<locals>.<lambda>r   c                      s   d d d S )NzNumber of indices (z') should be equal to tensor.size(dim) (z), for dim=r(   r(   )rM   
index_sizer  r(   r)   rv   
      c                      s   dt   d dS )Nzalpha argument of type z cannot be safely cast to type !)r  r(   )rg   python_typer(   r)   rv   
  r  r6   Tr  )rC   canonicalize_dimsr  r@   rx   r   Zdtype_to_typer   r-  Zis_weakly_lesser_typer  rR   ry   
index_put_	index_putrq  r?  )r1   rM   r4  r  r  rg   zero_dimr   r2  r  r   r(   )rg   rM   r4  r  r  r  r)   r  x
  s6   	

r  r   c              
   C   s   t t| dkdd  t| }| d  }|dd  }tdd | D }|r,||f}n||f}|| }| d ||}dt| }	t|D ]+}
| |
 }t||	d||d f |}|rhtj	||d|
d}qFtj	||d|
d}qF|S )	Nr   c                   S   rH  )Nz#received an empty list of sequencesr(   r(   r(   r(   r)   rv   
  rI  zpad_sequence.<locals>.<lambda>r    c                 s   s    | ]}| d V  qdS rL  r   r/   r(   r(   r)   rN  
      zpad_sequence.<locals>.<genexpr>)r   r   rM   r4  )
r@   rx   r  r   r   r   rQ   ry   r  r5  )	sequencesbatch_firstZpadding_valueZsequences_sizemax_sizeZtrailing_dimsmax_lenZout_dimsr   Zdim_paddingsr  Zcurrseqrowr(   r(   r)   pad_sequence
  s(   
r  c                 C      t | |||ddS )NTr  _index_copyr1   rM   r4  r  r(   r(   r)   index_copy_
     r  c                 C   r  )NFr  r  r  r(   r(   r)   
index_copy
  r   r  c          
         s   t | j|}t jdk fdd | jdk}|r | dn| } jdkr, dn  d|  f }|r:tjntj}||||}	|rG| S |rN|		dS |	
 S )Nr    c                      r}  r  r  r(   r  r(   r)   rv   
  r  z_index_copy.<locals>.<lambda>r   r6   )rC   r  r  r@   rx   rR   ry   r  r  rq  r?  )
r1   rM   r4  r  r  r  r   r2  r  r   r(   r  r)   r  
  s   

r  c                 C   sR   t | d| }t t |  }| js| jr| d}n|}|t | |fS )Nr(   r  )r@   re  r  rb   r   r  Zis_xpur   )rp   r   rd   r   r(   r(   r)   log_sigmoid_forward
  s   r  lowhighr=  c                 C   s$   t j| jt|t|| j| j|dS )N)r  r  r   r'  r=  )primsZ_uniform_helperr   r   r   r'  )r1   r  r  r=  r(   r(   r)   uniform
  s   r  c                 C   s   |  t| |||S r6   )r  r  )rp   r  r  r=  r(   r(   r)   uniform_
  s   r  c                 C   s   t | d }|d ur"t|d u dd  tt ||kdd  |S |d urjt|d u dd  tt ||kdd  g }t|D ]%\}}t||krZ|| |d  t|  qB|t| |d  |  qB|S tddd  d S )	Nr!   c                   S   rH  Nz9Must specify exactly one of output_size and scale_factorsr(   r(   r(   r(   r)   rv     rI  z.upsample_compute_output_size.<locals>.<lambda>c                   S   rH  N r(   r(   r(   r(   r)   rv     rI  c                   S   rH  r  r(   r(   r(   r(   r)   rv     rI  c                   S   rH  r  r(   r(   r(   r(   r)   rv     rI  Fc                   S   rH  r  r(   r(   r(   r(   r)   rv     rI  )r  r@   rx   r  r#  r  r   )r  rl  scale_factorsZspatial_dimensionsr  r  r(   r(   r)   upsample_compute_output_size  s.   r  c                 C   s   | d u rd S | | S r6   r(   )scalesr2  r(   r(   r)   get_scale_value  s   r  r  c                 C   s2   t |  ||}|r|nd gt| }t| ||S r6   r  r   r  _upsample_nearestr   rl  r  osizer  r(   r(   r)   _upsample_nearest_vec#  s   r  c                 C   s6   t |  ||}|r|nd gt| }t| ||ddS NTexactr  r  r(   r(   r)   _upsample_nearest_exact_vec8  s   r  c                 C   s   g }t |}|r
dnd}t|D ]I}|| }| j| |  }	|| d ur,|	|	||   n|	| }
tj|tj| jd}|| |
 tj}t|d | D ]}|	d}qL|
| q|S )Nr   r   rB  r    rP   )r  rQ   r   r@   r+  r   r'  r8   rC  rR   r  )r   rl  r  r  r1  Znum_spatial_dimsr7  r]  r  isizerh   Zoutput_indicesZinput_indicesrS   r(   r(   r)   !_compute_upsample_nearest_indicesM  s   $r  )Zpreserve_memory_formatr   r  c                 C   s   t | ||gS r6   r  r   rl  r  r(   r(   r)   upsample_nearest1dm  s   	r  c                 C   s   t | ||gddS r  r  r  r(   r(   r)   upsample_nearest_exact1dy     r  scales_hscales_wc                 C   s   t | |||gS r6   r  r   rl  r  r  r(   r(   r)   upsample_nearest2d  s   
r  c                 C   s   t | |||gddS r  r  r  r(   r(   r)   _upsample_nearest_exact2d  s   r  scales_dc                 C   s   t | ||||gS r6   r  r   rl  r  r  r  r(   r(   r)   upsample_nearest3d  r  r  c                 C   s   t | ||||gddS r  r  r  r(   r(   r)   _upsample_nearest_exact3d  s   r  r  c           	      C   sp   t | |||d}d d g| }t| |}|jdkr6t| }| jd }| jjdkr0|dk r0t	j
}|j|d}|S )Nr  r   r    cudar   )r  ry   _unsafe_indexr  rC   r   r   r'  r  r@   r  r?  )	r   rl  r  r  Zspatial_indicesr1  r   r   
n_channelsr(   r(   r)   r    s   


r  c                    sb   |r|rd n|rd n|rd nd t   dks!J t  fddtdt  D S )Nrm  r   r}   r!   r   c                    s    g | ]}t ||   qS r(   r_  r  Z
group_sizeparamsr(   r)   r3     s    z!gather_params.<locals>.<listcomp>)r  rQ   )r  
has_biaseshas_projectionsr(   r  r)   gather_params  s   r  c                 C   sh   |r!| d|  |d|  }}| d| d  |d| d  }}n| | || }}d\}}||||fS )Nr!   r    NNr(   )r  hiddensr  bidirectional
cur_params
cur_hiddenbidir_paramsbidir_hiddenr(   r(   r)   params_hiddens  s   $r  c                 C   s2   ||ksJ | | d|||  | dd|S rl   )r  r   )r  last_batch_size
batch_sizer  r(   r(   r)   update_hidden_for_packed  s   r  c              	   C   s4   ||kr| S ||k sJ t | |d||| fS rl   )r@   concatr   )r  r  r  Z
inp_hiddenr(   r(   r)    update_hidden_for_packed_reverse  s   r  c                 C   s$  |d }|d }|r|d nd }	|r|d nd }
g }g }|r"|d n|d }| dd|}t| t|}|r>|d d d }|D ]-} | jd }||krLn|rVt||||}nt||||}|| |||	||
}|}|| q@|ru|  n	|| |  t	|d}|st	|dn|}||fS )Nr   r    r!   r}   rP   )
r   r@   r  r  r   r  r  r  reverser   )inphiddenr  r  	hidden_fnbatch_sizesr  	ih_weight	hh_weightih_biashh_biasstep_outputr  r  r  	split_inpr  r   
hidden_outr(   r(   r)   one_layer_rnn_data  s@   


r  c                        fdd}|S )Nc                    s    t ||||  S r6   r   linearr  r  r  r  r  r  nonlinearityr(   r)   rI   <  s   zrnn_cell.<locals>.innerr(   r  rI   r(   r  r)   rnn_cell;  s   r  c                    r  )Nc                    s$   t | ||}  t ||||  S r6   r  r  r  r(   r)   rI   C  s   zrnn_cell_data.<locals>.innerr(   r  r(   r  r)   rnn_cell_dataB  s   r  c                 C   s   |d }|d }|r|d nd }|r|d nd }	t | ||}
|r&|
dn|
}
|d}g }|
D ]}|||||||	}|| q1|rH|  t|d}||dfS )Nr   r    r!   r}   )	r   r  fliprR   r  r  r@   r   rq  )r  r  r  r  r  r  r  r  r  r  precomputed_inputr  r  r  r   r(   r(   r)   one_layer_rnnJ  s   
r  c                 C   s   |d }|d }|r|d }|d }nt | }t | }|d d}	|d d}
g }d}|	d}d}d}d}d}|  } |	 }	|
 }
t jjj| |||||	|
|||||||||}|d |d |d }}}||	d|	dffS )Nr   r    r!   r}   F)
r@   r   r   rR   r?  r)  ry   Zmkldnn_rnn_layerr  rq  )r  r  r  r  r  w0w1w2w3hxcxr  modeZhidden_size
num_layersr  r  r  ZoutputsrW   hycyr(   r(   r)   mkldnn_one_layer_lstm`  sN   


r  c
                 C   s   |r|  ddn| } g }
t|D ]^}t||||\}}}}|r'||d k r'|nd}|	| |||\}}|
| |rI|	| |||dd\}}|
| |rXt||g| d } n|} |dkrn|rn||d k rntj| |dd} q|rw|  ddn| } | |
fS )Nr   r    r   T)r  )r  )	transposerQ   r  r  r@   r   rM   r  )r   r  r  r  r
  r  r  r  r  layer_fnfinal_hiddensr  r  r  r  r  Zfwd_inpZ
fwd_hiddenZbwd_inpZ
bwd_hiddenr(   r(   r)   _rnn_helper  s,   



r  c	                 C   R   | d}	t||d}t| |	|||||||ttttjd
\}
}|
t|dfS Nr   Fr  )	unbindr  r  r   r  r  r@   r   stackr   r  r  r  r
  r  r  r  r  r  r   r  r(   r(   r)   rnn_tanh_input     
r  c	                 C   r  r  )	r  r  r  r   r  r  r@   r  r  r  r(   r(   r)   rnn_relu_input  r  r  c	                 C   T   | d}	t||d}t| |	||||||dtt|ttjd
\}
}|
t|dfS Nr   Fr  r  )	r  r  r  r   r  r  r@   r  r  datar  r  r  r  r
  r  r  r  r  r   r  r(   r(   r)   rnn_relu_data  &   
r   c	                 C   r  r  )	r  r  r  r   r  r  r@   r   r  r  r(   r(   r)   rnn_tanh_data  r!  r"  c                 C   s   t ||||  }|d|}|d  }	|d  }
|d  }|d  }|
| |	|  }||  }|d u r;|nt ||d }||fS )Nr   r   r    r!   r}   r   r  chunkr   r   )r  r  r  r  r  	hr_weight	chunk_dimZgatesZchunked_gatesZin_gateZforget_gateZ	cell_gateZout_gater  r  r(   r(   r)   	lstm_cell?  s   r'  c              
   C   s   |d }|d }|r|d nd }|r|d nd }t |dkr"|d nt |dkr,|d nd }	|d d}
|d d}t| ||}|rJ|dn|}g }|D ]} t| |
||||	dd\}
}||
 qP|rk|  t	|d}||

d|
dffS )Nr   r    r!   r}   rm  r   r&  )r  rR   r   r  r   r'  r  r  r@   r   rq  )r  r  r  r  r  r  r  r  r  r%  r  r  r  r  r   r(   r(   r)   one_layer_lstmM  s$   *r)  c              
   C   s
  |d }|d }|r|d nd }|r|d nd }	t |dkr"|d nt |dkr,|d nd }
g }g }|r8|d n|d }t| t|}|rM|d d d }|d }|d }|dd||dd|}}|D ]l} | jd }t| ||} ||k r||d||| |d||| f |dd||dd|}}||krt	||d||| fd}t	||d||| fd}t
| ||||	|
dd\}}|}|| qf|r|  ||f}n|||f |  t| \}}t|dt|df}t|d}||fS )	Nr   r    r!   r}   rm  r   rP   r(  )r  r@   r  r  r   r   r   r  r  r  r'  r  ro  r   )r  r  r  r  r  r  r  r  r  r  r%  r  r  r  r  Zorig_hxZorig_cxr  r  r  r  Zhidden0Zhidden1r   r(   r(   r)   one_layer_lstm_datah  s\   *

r*  c                 C   s   dd }|| ||rt S tS )a*  Check whether we could use decompose lstm with mkldnn_rnn_layer.
    All the below conditions need to be met:
        * ``torch._C._get_mkldnn_enabled()`` returns ``True``.
        * All the input args are on CPU.
        * The dtypes of args are either torch.float or torch.bfloat16.
        * Inference.
        * ``has_projections`` returns ``False``.

    Args:
        * input: the input sequence to LSTM
        * hx: a tuple of the input hidden state and cell state ``(h_0, c_0)`` to LSTM
        * params: the weight and bias tensors of LSTM
    c           	      S   s   t j sdS | gt| tt| }dd |D }t|dkr$dS | }|t dkr1dS dd |D }|D ]}|t j	t j
fvrG dS q:| jrMdS |d d|d dk}|r_dS d	S )
NFc                 S      h | ]}|j qS r(   r&  r0   tr(   r(   r)   	<setcomp>      zEselect_one_layer_lstm_function.<locals>.use_mkldnn.<locals>.<setcomp>r    r  c                 S   r+  r(   r   r,  r(   r(   r)   r.    r/  r   r!   T)r@   r0  Z_get_mkldnn_enabledr  r   from_iterabler  popr'  r   bfloat16requires_gradr   )	r   r  r  r  Zdevicesr'  Zdtypesr   r  r(   r(   r)   
use_mkldnn  s(   
z2select_one_layer_lstm_function.<locals>.use_mkldnn)r  r)  )r   r  r  r4  r(   r(   r)   select_one_layer_lstm_function  s   r5  c	                 C   s   t |dks
J dt|||d d|d dk}tt|d |d }	t| ||}
t| |	||||||||

\}}tt| }|t|d dt|d dfS )Nr!   lstm expects two hidden statesr   r    )	r  r  r   r  ro  r5  r  r@   r  )r   r  r  r  r
  r  r  r  r  r  r  r   r  r(   r(   r)   	lstm_impl  s$   $"r7  c	                 C   s   t |dks
J dt|||d d|d dk}tt|d |d }	t| |	||||||dtt|d
\}
}tt| }|
t	|d dt	|d dfS )Nr!   r6  r   r    F)r  )
r  r  r   r  ro  r  r   r*  r@   r  r  r(   r(   r)   lstm_data_impl  s"   $
"r8  c                 C   sr   |  dd}t||| dd}|d |d   }|d |d   }	|d |d |   }
||
 |	 |
 S )Nr}   r    r!   r   )r$  r   r  r   r   r  r  r  r  r  r  Zchunked_igatesZchunked_hgatesZ
reset_gateZ
input_gateZnew_gater(   r(   r)   gru_cell  s   r:  c                 C   s|   t | ||dd}t |||dd}|d |d   }|d |d   }	|d |d |   }
||
 |	 |
 S )Nr}   r    r   r!   r#  r9  r(   r(   r)   gru_cell_data&  s   r;  c	                 C   sJ   t ||d}t| |d||||||dtt|td
\}	}
|	t|
dfS )NFr   r  )r  r  r  r   r  r;  r@   r  )r  r  r  r  r  r
  r  r  r  r   r  r(   r(   r)   gru_impl_data/  s   r<  c	                 C   sH   t ||d}t| |d|||||||tttd
\}	}
|	t|
dfS )NFr   r  )r  r  r  r   r  r:  r@   r  )r   r  r  r  r
  r  r  r  r  r   r  r(   r(   r)   gru_implM  s   
r=  c                 C   :   t |  ||}t|d}t|d}tjj| ||||S Nr   r    )r  r   r  r@   r)  ry   _upsample_bilinear2d_aar   rl  align_cornersr  r  scale_hscale_wr(   r(   r)   upsample_bilinear2d_aa_veck     


rE  c                 C   r>  r?  )r  r   r  r@   r)  ry   _upsample_bicubic2d_aarA  r(   r(   r)   upsample_bicubic2d_aa_vecw  rF  rH  c                 C   s4   t |  ||}|r|nd gt| }t| |||S r6   )r  r   r  _upsample_linear)r   rl  rB  r  r  r  r(   r(   r)   _upsample_linear_vec  s   	rJ  rB  c                 C   s   t | |||gS r6   rI  )r   rl  rB  r  r(   r(   r)   upsample_linear1d  s   rL  c                 C   s   t | ||||gS r6   rK  )r   rl  rB  r  r  r(   r(   r)   upsample_bilinear2d  s   rM  c                 C   s   t | |||||gS r6   rK  )r   rl  rB  r  r  r  r(   r(   r)   upsample_trilinear3d  s   rN  c                 C   s@   |r|dkr| d |d  S dS |d ur|dkrd| S | | S )Nr    ra   r   r(   )rf  rg  rB  rh   r(   r(   r)   _compute_scale  s    rO  c                 C   s   |r| | S | |d  d S Nr   r(   )rh   Z	dst_indexrB  r(   r(   r)   _compute_source_index  s   rQ  weightsweights_precisionc                 C   sB   t dd t| |D d|d >  }||? }t|ddtjS )Nc                 s   s,    | ]\}}| tj| tj V  qd S r6   )r8   r@   r  )r0   r  rf  r(   r(   r)   rN    s    
z%_sum_tensors_uint8.<locals>.<genexpr>r    r      )_sum_tensorsro  r@   r   r8   r2  )r%  rR  rS  r=  r(   r(   r)   _sum_tensors_uint8  s   
rV  c                 C   sJ   t |  }d}t j||jd}d|d|d >   }|dk}||  S )N   r&  r   r    i   )r@   r  r   r+  r'  r   )rR  Z
max_weightZmax_weight_precisionZ
precisionsvaluesr3  r(   r(   r)   _compute_weight_precision  s   rY  c                    s  j d }j dd  }t|tjtjjd\}fddfddtt|||D }tt| \}g }	t	ddgg  D ]# d d g fd	dt
D  }
t|
}t|}|	| qGtt
D ]'}|| |  d
dfddt|	d d d |	dd d D }	qqt|	dksJ |	d }t}jjdkr|dk rtj}t|tjsJ |j|d} s| }|S )Nr    r!   r  c           	         s   t | | |}tj|jdjd}t|| jdd}|j|jd gdg| R  }|tj	}|d j| d d}|||fS )Nr&  r   r   r~   r   r    r   )
rO  r@   r+  r'  r8   rQ  r   r   r   rC  )	inp_sizerg  r  ZnsqueezeZscale_factorr  Zx_f32r1   Zxp1)rB  r   r   r(   r)   
get_values  s   
z$_upsample_linear.<locals>.get_valuesc                    s,   g | ]\}\}}} |||d  | qS r   r(   )r0   r  rZ  rg  r  )r[  n_dimsr(   r)   r3     s    z$_upsample_linear.<locals>.<listcomp>r   c                    s(   g | ]} | d kr| n| qS r  r(   )r0   k)r  xp1sxsr(   r)   r3        ( r   ra   c                    s$   g | ]\}}|t ||   qS r(   )r@   r   )r0   Zv1Zv2)xscaler(   r)   r3     s    r     r   )r   r  rC   rD   r  INT_TO_FLOATr  ro  r  r   rQ   ry   r  r   r  reversedr   r8   r   r'  r  r@   r  r.   r   r?  r  round)r   rl  rB  r  r  Z	inp_sizesrS   rX  Zxs_f32vsr2  vr  r   r   r(   )	r  rB  r   r[  r   r\  r^  r_  ra  r)   rI    sF   


"


rI  r  r  c                 C   s   | j |j kS r6   ra  )r  r  r(   r(   r)   is_same_size'  rs   rh  c                 G   rn   r6   )ry   r/  )r1   r   rE   r(   r(   r)   _reshape_alias,  s   ri  c                 C   rn   r6   )ry   r4  )r1   r1  r(   r(   r)   r  2  rs   r  c                 C   s   t | |||S r6   )ry   r  )r1   r1  rq   r  r(   r(   r)   r  7  r  r  c                 C   s   |D ]}|d urt |jt jt jfv dd  qt |jt jkdd  ddlm} ||  dkr@t j	
| |}| |j|S tt|D ]}|| }|d ur^|jd| |d d||< qFt| || |S )Nc                   S   rH  Nz3tensors used as indices must be long or int tensorsr(   r(   r(   r(   r)   rv   B  rI  z&_unsafe_masked_index.<locals>.<lambda>c                   S   rH  Nz*tensors used as masks must be bool tensorsr(   r(   r(   r(   r)   rv   G  rI  r   r  r    r  )r@   rx   r   r  r#  r-  r  r  r   Z_meta_registrationsZmeta_index_Tensorr   r   rQ   r  r   r   ry   r  r  )r1   r3  r1  fillr4  r  Zmeta_resultr  r(   r(   r)   r0  <  s*   
r0  c                 C   s   |D ]}|d urt |jt jt jfv dd  qt |jt jkdd  |  dkr.|  S tt	|D ]}|| }|d urP|j
| | | |d d||< q4|| d}tj| ||ddS )	Nc                   S   rH  rj  r(   r(   r(   r(   r)   rv   ^  rI  z5_unsafe_masked_index_put_accumulate.<locals>.<lambda>c                   S   rH  rk  r(   r(   r(   r(   r)   rv   c  rI  r   r    r  Tr  )r@   rx   r   r  r#  r-  r   r*  rQ   r  r   r   r  ry   r  )r1   r3  r1  rX  r4  r  Zmasked_valuer(   r(   r)   #_unsafe_masked_index_put_accumulateX  s(   
$rm  c                 C   sV  |   }d}|dk rd}|d ur,|dkr&dg| }|jd ||< ||}n|}| | } t||k|d}	|	|}
t| ||
| }t||k|d}|tj	j
krb|dkrb| dd}||fS |d ur|| j}t|||
|}t||k|d}| }n	||k | }|tjj
kr| }||fS |tjj
kr| | }||fS )Nr    r!   r   r(   r   )rM   r   r/  r@   rc   rR   gatherrq  r   r%   rq   r   r)  r   r8   r'   r&   )rp   r   r   r   r   r\  r   r   wr   Zsafe_target_r   r   Zwsumr(   r(   r)   _nll_loss_forwardr  sB   


rp  c                 C   s   |   dkr|   dksJ d|  dksJ d|   dko%|  dk}|s?| jd |jd ks?J d| j d|j d| jd	 }|d u s_|  dkrT| |ks_J d
| d|j t| ||||S )Nr   r!   r   r    r   r   r   r   rP   z/weight tensor should be defined either for all z7 classes or no classes but got weight tensor of shape: )rM   r   r   rp  )rp   r   r   r   r   r   Z	n_classesr(   r(   r)   nll_loss_forward  s    	
"rq  c                 C   s   t | ||||S r6   )rp  )rp   r   r   r   r   r(   r(   r)   nll_loss2d_forward  s   	rr  Ac                 C   s    |d |  |d  |  |  d S )Nr!   r}   r    r(   r1   rs  r(   r(   r)   _upsample_cubic_convolution1  rd  ru  c                 C   s(   ||  d|  |  d|  |  d|  S )Nrm     r   r(   rt  r(   r(   r)   _upsample_cubic_convolution2  s   (rw  r-  c           
      C   s   d}| j t dkrDtj| d|  gdd}tj| d d|  gdd}t||}t||}tj|dd\}}tj|dd\}}	|||	|fS t| d |t| |td|  |td|  |fS )Ng      r  ra   r   rt   r   )r'  r@   r  rw  ru  r  )
r-  rs  Ztt1Ztt2Zw03Zw12r  r  r  r  r(   r(   r)    _upsample_get_cubic_coefficients  s   

rx  coeffstsc                 C   s    t |}tdd t| |D S )Nc                 s       | ]	\}}|| V  qd S r6   r(   r0   r  r  r(   r(   r)   rN    r[  z+_upsample_cubic_interp1d.<locals>.<genexpr>)rx  rU  ro  )ry  rz  Zcoeffs2r(   r(   r)   _upsample_cubic_interp1d  s   r}  c                 C   s   t tj| S r6   )r   r@   add)rz  r(   r(   r)   rU    s   rU  	num_stepsc                 C   sB   | dkrt jd||dS |s| d |  nd}t j| || ||dS )Nr    r   r(  )Zstepsr'  r   )r@   r  Zlinspace)r  rB  r   r'  r  r(   r(   r)   _linspace_from_neg_one  s   r  thetahro  c           	      C   s   | j }| j}t||||d|d}t|||||dd}tjd||d}tjjj|dddd}tjjj|dddd}tjjj|d	ddd}|| | S )
Nr    )r    r    r    rB  )r   r!   constantr   rd  r	  rq   r    r    )r!   r   	r   r'  r  r/  r@   r,  rp  r  rd  )	r  r  ro  rB  r   r'  grid_xgrid_ygrid_oner(   r(   r)   _make_base_grid_4d  s   r  r]  c                 C   s   | j }| j}t||||dd|d}t||||d|dd}t|||||ddd}	tjd||d}
tjjj|dddd}tjjj|dddd}tjjj|	d	ddd}	tjjj|
d
ddd}
|| |	 |
 S )Nr    )r    r    r    r    rB  )r   r}   r  r   r  r|  r!   r    )r}   r   r  )r  r]  r  ro  rB  r   r'  r  r  Zgrid_zr  r(   r(   r)   _make_base_grid_5d  s   r  c           	      C   sL   |\}}}}t | |||d}|ddd| jd d}||||dS )NrB  rP   r}   r    r   r!   )r  r/  r  rR   r   )	r  r   rB  r  rS   r  ro  	base_gridgridr(   r(   r)   _affine_grid_generator_4d$  s    r  c           
      C   sR   |\}}}}}t | ||||d}|ddd| jd d}	|	||||dS )Nr  rP   r   r    r   r}   )r  r/  r  rR   r   )
r  r   rB  r  rS   r]  r  ro  r  r  r(   r(   r)   _affine_grid_generator_5d.  s    r  c                 C   s@   t t|dv dd  t|dkrt| ||dS t| ||dS )Nr  c                   S   rH  )NzCaffine_grid_generator needs 4d (spatial) or 5d (volumetric) inputs.r(   r(   r(   r(   r)   rv   >  rI  z'affine_grid_generator.<locals>.<lambda>r   r  )r@   rx   r  r  r  )r  r   rB  r(   r(   r)   affine_grid_generator8  s   
r  r  interpolation_modepadding_mode_expand_gridc                    sJ  t dv fdd t dv fdd dtdtdtffdd	dtd
tdtdtfdddtdtdtffdddtdtdtffdd}j\ |j\}}|dkscJ ru|d| d}dtdtdtffddt jjddddt j jdd dddtdtdtdt	f fdddtdtdtffdd
|d  }	|d! }
d"kr1||	}||
}|
 |
 d }}d }}||}}|| ||  }|| ||  }|| ||  }| |  }t
fd#d$|f|||f|||f|||ffD S dkrN||	}||
}| }| }
||dS |	}|
}|
 |
 | | }sud|d}dtdtdtf
fd%d&d'tdtffd(d)	t	fd*d$td+D }t||S ),N)r   r    r!   c                      rX  )NzInvalid interpolation mode r(   r(   )r  r(   r)   rv   W  rY  z"_grid_sampler_2d.<locals>.<lambda>c                      rX  )NzInvalid padding mode r(   r(   )r  r(   r)   rv   Z  rY  coordsr   rN   c                    s0    r|d d n|d }|d d }| | | S rP  r(   )r  r   r   ofsr  r(   r)   unnormalize]  s   z%_grid_sampler_2d.<locals>.unnormalize	twice_low
twice_highc                 S   sv   ||kr	t | S |d }|| d }| |  }t ||}||  jt jd}t |d@ dk|| || | S )Nr!   r   r    r   )r@   r   r   fmodfloorr8   Zint8rc   )r  r  r  Z
coords_minZcoords_spanZcoords2extraZflipsr(   r(   r)   reflect_coordinatesh  s   
z-_grid_sampler_2d.<locals>.reflect_coordinatesc                    sf   dkr| S dkrt | d|d S  r | dd|d  }n
| dd| d }t |d|d S )Nr   r    r!   rP   r   )r  r   Zcoords_reflected)rB  r  r  r(   r)   compute_coordinatest  s   z-_grid_sampler_2d.<locals>.compute_coordinatesc                    s   | |} ||S r6   r(   )r  r   Z	coords_un)r  r  r(   r)   compute_source_index  s   

z._grid_sampler_2d.<locals>.compute_source_indexr!   r    r_  ysc                    s,   t d| kt | k t d|k| k S rl   r@   r.  )r_  r  )iHiWr(   r)   in_bounds_cond  s   $z(_grid_sampler_2d.<locals>.in_bounds_condr&  wsc                    sN   | |r	nd t  fdd| jtjd|jtjd|fD S )Nr    c                 3   s*    | ]}t |d  V  qdS rL  )r@   rc   r/  r,  )r  rf  rV  oHoWr(   r)   rN    s
    
z1_grid_sampler_2d.<locals>.clip.<locals>.<genexpr>r   )r`  r8   r@   rC  )r_  r  r  )r  r  r  r  r  r  )rf  rV  r)   clip  s
   
z_grid_sampler_2d.<locals>.clipixiyc                    s&   | ||\}}} ||f | S r6   r(   )r  r  ro  Zidx_xZidx_yZw_)C_idxN_idxr  r  r(   r)   get_summand  s   z%_grid_sampler_2d.<locals>.get_summand).r   ).r    r   c                 3   s"    | ]\}}} |||V  qd S r6   r(   )r0   r  r  ro  )r  r(   r)   rN    s
    

z#_grid_sampler_2d.<locals>.<genexpr>c                    s     | } |}||dS rX   r(   )r  r  r1   rW   )r  r  r  r  r(   r)   get_value_bounded  s   

z+_grid_sampler_2d.<locals>.get_value_boundedr  c                    sF   | d  } d | | d | d |f}t |S )Nr    r!   )r}  )r  Ziy_ofscs)r  ix_nwiy_nwtxr(   r)   	get_coeff  s   
z#_grid_sampler_2d.<locals>.get_coeffc                 3       | ]} |V  qd S r6   r(   )r0   r  )r  r(   r)   rN    rO  r   )r@   rx   r   r#  r   r/  r)  r+  r'  r   r  rU  re  rR   r`  rQ   r}  )r  r  r  r  rB  r  r  rS   twor1   rW   r  r  Zix_neZiy_neZix_swZiy_swZix_seZiy_seZw_nwZw_neZw_swZw_seZ
ix_nearestZ
iy_nearesttyry  r(   )r  r  r  r  r  r  rB  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r)   _grid_sampler_2dF  sx   
 ( 




	





 

r  c                 C   s   t | ||||dS )N)r  r  r  rB  )r  )r  r  r  r  rB  r(   r(   r)   grid_sampler_2d  s   
r  c                    s`   t   dko dk fdd t  ddk fdd   jddS )Nr!   r    c                      s   d    d   S )Nzmatrix @ vector expected, got r  rt   r(   rp   r  r(   r)   rv     r  zmv.<locals>.<lambda>r   c                      s*   d  d d  d d d dS )Nzsize mismatch, got input (r   r1   r    z), vec (r   r  r(   r  r(   r)   rv   	  s   * rt   )r@   rx   rM   r   r   r  r(   r  r)   r    s   r  c                 C   sd   |d ur|d | d }d| |  |t |   }nd| |  t |  }|d ur-|| }t||S rX   )r   Z
logsigmoidr   )rp   r   r   Z
pos_weightr   Z
log_weightr   r(   r(   r)    binary_cross_entropy_with_logits  s   
r  tensor1tensor2is_outc           	         s   | j |j kr
| |fn|| f\}}ddlm  |j dkr |j dks"dS |jr)|s)dS | j dkr0dS  | dkr:dS |j}| }dg}t|dd  D ]}|||d   qLt	 fd	d
t
|tt||D S )Nr   r  r}   r!   FTr    rP   c                 3   s*    | ]\}}} |d kp||kV  qdS r\  r(   )r0   r   r   r   r  r(   r)   rN  7  s
    
zshould_fold.<locals>.<genexpr>)r  r  r  r3  r   r   r  rd  r  r   ro  r  )	r  r  r  t1t2Zt1_shapeZ	t1_strideZexpected_strider   r(   r  r)   should_fold  s(    

r  )Zpass_is_out)r  c                C   sx  |   }|  }|dkr|dksJ |dkr |dkr t| |S |dkr.|dkr.t| |S |dkrD|dkrDttt| d|dS |dkrR|dkrRt| |S t| ||r||k}|ra|jn| }|sg|n	|dkro| 	 n| }|j
}t|d d }	ttj|	}
|  dk}|r|	|j
d  ||
|d }|rtjj|||	}|r|j S |S tjj|||	S |dkr|dkr|dkr| dnd}| d}| j
d d }|dkr|dn|d}|dkr|dnd}g }t|d D ]
}||| q|dkrA|dkrA|d |d krA|d dkr.| jr.t| d|S |d dkrA|jrAt| |dS tt||}|||g }t|}| ||||}|dk}|rv||g }||||d}n|||g }|||||}|}	|dkr|	| |dkr|	| |r||d|	S |||	S tddd	  d S )
Nr   r    r!   rP   r   r}   Fc                   S   rH  )Nz/both arguments to matmul need to be at least 1Dr(   r(   r(   r(   r)   rv     rI  zmatmul.<locals>.<lambda>)rM   r@   dotr  rq  r  rR   r  r  r-  r   r  r   rt  r   r  r   r)  ry   _unsafe_viewr?  r   rQ   r3  r  Zbroadcast_shapesr  r)  bmmr/  rx   )r  r  r  Zdim_tensor1Zdim_tensor2r  r  r  Zsizes_1Zoutput_shapeZfolded_dim1Zt2_is_matrixZ	t1_foldedr=  r  m1Zbatch_tensor1m2r   Zbatch_tensor2r  Zexpand_batch_portionZtensor1_expand_sizeZexpand_batch_productZtensor1_expandedZ
vector_rhsZtensor2_expand_sizeZtensor2_expandedr(   r(   r)   r  ?  s   	








r  rC  rD  c                    s  j \}}t|d ||}t|d ||}tjtjjd\}}tj|d jdj	|d}	tj|d jdj	|d}
t
||
|}t
||	|}|d}| }| }|| dd}|| dd}|	tj}|	tj}|d ||d |d	 f}|d ||d |d	 ft|t|}d
\jtjkrtt|fddD fdd|D }fddfdd t fdd|D }jtjkrd usJ t||}ntdd t||D }t}|j|d}|S )Nr   r    r  r&  r   rP   r   ra   r!   r  c                    .   g | ]}|d  >  t |d  t jqS r    r   r@   r   r8   Zint16r0   ro  )weights_precision_xr(   r)   r3          z.upsample_bicubic2d_default.<locals>.<listcomp>c                    r  r  r  r  )weights_precision_yr(   r)   r3     r  c                    s<   t | d d }t |dd }td d ||g}|S r?  )r@   r   ry   r  )r  r_  Zy_idxZx_idxrg  )in_hin_wr   r(   r)   load_bounded  s   z0upsample_bicubic2d_default.<locals>.load_boundedc                    sT   t  fddD }jtjkrd usJ t|S tdd t|D S )Nc                 3   s    | ]} |V  qd S r6   r(   )r0   Zx_ofs)r  rW   r(   r)   rN    r  zCupsample_bicubic2d_default.<locals>.get_x_interp.<locals>.<genexpr>c                 s   r{  r6   r(   r|  r(   r(   r)   rN    r[  )r`  r   r@   r2  rV  rU  ro  )rW   Zsrc_x)r   ixs_ofsr  r  	weights_x)rW   r)   get_x_interp  s
   z0upsample_bicubic2d_default.<locals>.get_x_interpc                 3   r  r6   r(   )r0   Zy_ofs)r  r(   r)   rN    rO  z-upsample_bicubic2d_default.<locals>.<genexpr>c                 s   r{  r6   r(   r|  r(   r(   r)   rN    r[  r   )r   rO  rC   rD   r  rc  r@   r+  r'  r8   rQ  rR   r  r   rC  rx  r   r2  rY  r`  rV  rU  ro  r   r?  )r   rl  rB  rC  rD  rS   Zh_scale_factorZw_scale_factorr   r  rr  Zx_floatZy_floatr1   rW   Zyscalera  Ziys_ofsZ	weights_yZsrc_yr   r   r(   )	r  r  r  r   r  r  r  r  r  r)   upsample_bicubic2d_default  sR   




r  c                 C   s   t t|t| dkdd  |d u r2|d usJ ttttf tdd t| jdd  |D }|r6|nd\}}t| ||||S )Nr    c                   S   rH  )Nz:Must specify exactly one of output_size and scale_factors.r(   r(   r(   r(   r)   rv     rI  z(upsample_bicubic2d_vec.<locals>.<lambda>c                 s   s$    | ]\}}t t|| V  qd S r6   )r   r   )r0   ro  rh   r(   r(   r)   rN  #  s
    
z)upsample_bicubic2d_vec.<locals>.<genexpr>r!   r  )	r@   rx   r-  r
   r`  r#  ro  r   r  )r  rl  rB  r  rC  rD  r(   r(   r)   upsample_bicubic2d_vec  s   
r  c                        fdd}t  ||S )Nc                    s4   t j|  ||  jd}|d |d |    S )Nr&  r    )r@   r+  r'  r   r   middler   Zdim_idxr  r(   r)   r2  2  s   z_reflection_pad.<locals>.idx_reflection_or_replication_padr  rG  r2  r(   r  r)   _reflection_pad,     r  c                    r  )Nc                    s*   t j|  ||  jd}t |d|d S )Nr&  r   r    )r@   r+  r'  r   r  r  r(   r)   r2  C  s   z_replication_pad.<locals>.idxr  r  r(   r  r)   _replication_pad=  r  r  idx_fnc                    s   t d  t|   d  d fv  fdd | j  d  }|    } fddt D } fddt D }| }t D ]}d g|  }	||| || || |	|| < t||	}qFt	|}
|j
|
d}|S )	Nr!   r    c                      s    d  d d  d d  dS )NZreflection_padzd requires r    zD or r!   zD inputr(   r(   rt   r(   r)   rv   V       z0_reflection_or_replication_pad.<locals>.<lambda>c                        g | ]}d  d |   qS r  r(   r  rM   rG  r(   r)   r3   [  r  z2_reflection_or_replication_pad.<locals>.<listcomp>c                    $   g | ]}d  d |  d  qS r  r(   r  r  r(   r)   r3   \  r  r   )r  r@   rx   rM   r   rQ   ry   r  rC   r   r?  )r  rG  r  Z	inp_shapeZnc_dimpadding_leftpadding_rightr   r  r2  r   r(   r  r)   r  N  s"   
 
r  c                    s\  t d dd |j d  D fddtD fddtD g }t|jD ]}dg|j }d||< |tj|j| |jd| q2|d    | d  
d	d
 
fddtD 
fddtD }
fddtD }fddtD 	t	
tj	fddtD }t|  d}	 fdd}
tjdd tD  D ]f}|tdg krqg }g }tD ]K}|| dkr| }	| }n0|| dkr|| }
| d| f}n|| dkr|| }
| | |  | d f}|| || q|
|	||}	q|	S )Nr!   c                 S   s   g | ]}|d  qS r   r(   )r0   r  r(   r(   r)   r3   q  r  z,_reflection_pad_backward.<locals>.<listcomp>c                    r  r  r(   r  r  r(   r)   r3   s  r  c                    r  r  r(   r  r  r(   r)   r3   t  r  r    rP   r&  c                 S   s   | \}}}t ||k||kS r6   r  )index_ranger  ZlbZubr(   r(   r)   index_range_condition  s   
z7_reflection_pad_backward.<locals>.index_range_conditionc                    s   g | ]
}|  |  qS r(   r(   r  r  xyzr(   r)   r3     r  c                    s   g | ]
} | |  qS r(   r(   r  r  r(   r)   r3     r  c                    s(   g | ]}d  |  |  |  qS r  r(   r  )dhwr  r  r(   r)   r3     r`  c                    s.   g | ]} | d | |  |  fqS r  r(   r  )centerr  r  r  r(   r)   r3     s    "c                    s   g | ]} | qS r(   r(   r  )r  range_cr(   r)   r3     r  r   c                    st   t D ]}|| d || d k }t|tr|r|   S qttjfdd|D }t| | d}| | S )Nr!   r    c                    s   g | ]} |qS r(   r(   )r0   r  )r  r(   r)   r3     r  z@_reflection_pad_backward.<locals>.accumulate.<locals>.<listcomp>r   )rQ   r.   r-  rJ   r   ry   r.  r0  )r   r   index_rangesr  Zupper_less_than_lowerrV  g)r  rM   rf   r  r(   r)   r    s   z,_reflection_pad_backward.<locals>.accumulatec                 S   s   g | ]}g d qS ))rP   r   r    r(   r   r(   r(   r)   r3     r  r   )r  r   rQ   r  r  r@   r+  r'  r/  rJ   r   ry   r.  r0  	itertoolsr   r`  )rf   r1   rG  r1  r  Z
view_shapeZleft_reflectZright_reflectrV  r   r  ZareaZoutsr  r   r  r(   )r  r  r  rM   rf   r  rG  r  r  r  r  r)   _reflection_pad_backwardj  sT   $
"
r  r   r   r   c                C   s(   t j| ||d}t j| ||d}||fS )Nr   )r@   aminr  )rp   rM   r   r  r  r(   r(   r)   aminmax  s   r  r   c                C   s"   t jtt| d| |||dS )Nr   r   )ry   r   r@   rc   isnan)rp   rM   r   r   r(   r(   r)   nansum  s   "r  r   r/  r'  r?  r/  c             	   C   s   t jjd| d||||dS )Nr   r    r  ry   r+  Z
start_step)r	  r   r/  r'  r?  r(   r(   r)   arange_default     
r  c             	   C   s   t jj| |d||||dS )Nr    r  r  )r  r	  r   r/  r'  r?  r(   r(   r)   arange_start  r  r  c                  O   s   ddl m} || i |S )Nr   )out_dtype_dense)!torch._higher_order_ops.out_dtyper  )rE   rF   r  r(   r(   r)   out_dtype_decomp  s   r  marginc           	         s  t t jd jd  t |dkp|dkdd  t jdko, dkfdd t jdko? kfdd d urdt t jdko\  k fdd dt jdd	}||  }|	d}|dkr|n|| }d ur|  }t j
 jd
}t |k|d}|tjjkr| S |tjjkr| |jd  S |jddS )Nr   r    r!   c                   S   rH  )Nz only p == 1 and p == 2 supportedr(   r(   r(   r(   r)   rv     rI  z#multi_margin_loss.<locals>.<lambda>c                      rw  NzMExpected non-empty vector or matrix with optional 0-dim batch size, but got: ra  r(   r  r(   r)   rv   
  rx  c                         d  dj  S )Nz#inconsistent target size, expected r  ra  r(   )nframer   r(   r)   rv     r/  c                      r   )Nz#inconsistent weight size, expected r  ra  r(   )rM   r   r(   r)   rv     r/  r  r&  rt   )r@   
atleast_2dZ
atleast_1dr   rx   r  r   rR   rn  r  r+  r'  rc   r   r&   rq   r   r'   r   )	r   r   r   r  r   r   urd   r2  r(   )rM   r   r  r   r   r)   multi_margin_loss  sB   







r  	is_targetc                    s  | j  |j t| } t|}| j d }tt dko |dk fdd ttdko2 k fdd tj||jd}|dk}tjt|||dd	d
}||k }t||d}tj	| d|d}	t||d}
tj
||
jddkdd}d|	jjdd |  }|d}|| }t|d|}|tjjkr|jdd }n|tjjkr| }n|jdd}|| j}||fS )Nr    r!   r   c                      rX  r  r(   r(   )orig_input_shaper(   r)   rv   6  rY  z0multilabel_margin_loss_forward.<locals>.<lambda>c                      s   d d  S )Nzinconsistent target size: z for input of size: r(   r(   r  Zorig_target_shaper(   r)   rv   :  rw   r&  rP   Tr   r  rt   ra   )r   rP   )r   r@   r  rx   r  r+  r'  r  rc   rn  anyrR   Tr  r   r&   rq   r   r   r'   r8   r   r   )r   r   r   rM   r2  Zis_endZend_idxZtarget_maskZtidx0r  Ztidx1r  rd   r(   r  r)   multilabel_margin_loss_forward'  s@   





r
  )	attn_maskrh   querykey	dropout_p	is_causalr  c          	   
      s   t t fdd t  dko  dko  dkfdd t  dk fdd t jd jd koJjd jd kdd  tjj| |d |d	\}}|d
dddj	t j
ddd
dd}||fS )Nc                      rw  )Nz-query must be FP32, FP64, BF16, FP16 but got r   r(   )r  r(   r)   rv   s  rx  z<scaled_dot_product_flash_attention_for_cpu.<locals>.<lambda>r   c                      s"   d   d    d   S )Nz,q, k, v must be a 4 dimensional tensor, got r  rt   r(   )r  r  rq   r(   r)   rv   w  s   " r   c                      rX  )Nz&dropout probability must be zero, got r(   r(   )r  r(   r)   rv   z  rY  r}   c                   S   rH  )Nz&q, k, v should have the same head sizer(   r(   r(   r(   r)   rv   ~  rI  )r  r  r  Zdropout_maskrh   r!   r   r    r   )r@   rx   r  rM   r   ry   Z"_scaled_dot_product_attention_mathr  rp  r?  r  )	r  r  rq   r  r  r  rh   r=  Zattnr(   )r  r  r  rq   r)   *scaled_dot_product_flash_attention_for_cpuf  s>   
"&
"r  c                    s   t |  fdd}|S )Nc                     s    | i |}| d  |S rl   )r  )rE   rF   r   outplace_opr(   r)   
inplace_op  s   z$register_inplace.<locals>.inplace_opr   )Zaten_opr  r  r(   r  r)   register_inplace  s   r  c                 C   sx   |   s|  st|}t|}t||}t|tjr |dkr$|| }|dkr*|S t|tjr4|dkr8| | } | | S )Nr    r   )r  r  r#  r@   r  r.   numbersNumber)rp   Zbatch1Zbatch2r^   rg   r   r(   r(   r)   baddbmm  s   r  c                 C   s   t j| |ddS )Nr  r^  r_  )rp   r   r(   r(   r)   floor_divide  s   r  c                 C   s   t tj| jdS rX   )rJ   r   rt  r   r   )r-  r(   r(   r)   	sym_numel  r  r  r   r   c                C   s.   |d u rt jj| g |dS t jj| g ||dS )Nr   r  )ry   r   Zdim_IntListZIntList_out)rp   r   r   r(   r(   r)   sum_default  s   r  c                 C   sB   t | tjs| S |d u rtj| tt|  S tj| |gS r6   )	r.   r@   r   ry   rq  dimsr  rQ   rM   )rp   rM   r(   r(   r)   squeeze_default  s
   r  c                    s`   t  fddtt| jD }|jtjkrtjnd }| jd|d|d}| ||	|j  |fS )Nc                 3   s    | ]	}| kr|V  qd S r6   r(   r  rt   r(   r)   rN    r[  z)_weight_norm_interface.<locals>.<genexpr>r!   T)r   r   )
r`  rQ   r  r   r   r@   r2  r   r   r8   )rg  r  rM   Zkeep_dimZ
norm_dtyper   r(   rt   r)   _weight_norm_interface  s    r  assume_uniqueinvertc                C   s|   t | tjstj| |jd} t |tjs"|rt| |S t| |S | dt|  d k r6t	| ||dS t
| |||dS )Nr&  g      $@g(\?r!  r  )r.   r@   r   r  r'  ner   r   r   isin_defaultisin_sorting)elementstest_elementsr   r!  r(   r(   r)   isin  s   r(  )r=  c                C   sP   |d u rt j|  t j| jd}nt j|  |t j| jd}|| k | j}|S )NrB  )r=  r   r'  )r@   Zrandr   r   r'  r8   r   )rp   r=  Zraw_pr   r(   r(   r)   	bernoulli  s   r)  r"  c                C   sl   |   dkrtj| tjdS | jd|j  }| |}ttd|j d d}||kj	|d}|r4| S |S )Nr   r   r   rP   r    rt   )
r   r@   
empty_liker-  r   r  r/  r`  rQ   r  )r&  r'  r!  Zexpanded_elem_shaper1   rM   r  r(   r(   r)   r$    s   
r$  c                C   s   |   }|  }|rIt||g}tj|dd\}}|dd  |d d k}	t|	ddgd}	|r5|	 }	t|	}
|
d||	}
|
d|   S t|\}}t	||}t
|| k |d}|| |k}|rm| n|}|| jS )NT)Zstabler    rP   r   F)r  r@   r   sortr  Zlogical_notr*  r  r   Zsearchsortedrc   r   r   )r&  r'  r   r!  Zelements_flatZtest_elements_flatZall_elementsZsorted_elementsZsorted_orderZduplicate_maskr3  Zsorted_test_elementsrS   r2  Ztest_idxcmpr(   r(   r)   r%  #  s$   
r%  c                 C   s   |  d}|| S rO   )r   )rp   r4  Z	flattenedr(   r(   r)   take@  s   
r-  c                 C   s2   |d u rt j}|t jkrt|}tj| |j|dS r  )r@   r  Zpreserve_formatr   ry   resizer   )rp   r   r   r(   r(   r)   	resize_asG  s
   
r/  )FF)r   r6   r  )r   NNr    )rP   FFrL  r  r  )r    r    F)Fr   )r   ra   N)r   r    N)Fr  )NNN)r   r   FT)r   r   Fr"  )r   F(  rJ   r  r  rt  r  collections.abcr   enumr   r   r   r   r   typingr   r	   r
   r   r   r@   Ztorch._meta_registrationsZtorch._primsrE  r  Ztorch._prims_commonZ_prims_commonrC   Ztorch.nn.functionalrp  r  r   r   r   r   Ztorch._decompr   r  r   r   r   r   r   r   Ztorch._prims_common.wrappersr   r   r   r   Ztorch.utilsr   rB   Ztorch.utils._pytreer   r0  ZDispatchKeyr   r  str__annotations__Z_opsr)  ry   r   r  r-  rL   r  Zcompute_only_pw_cast_for_opmathZpw_cast_for_opmathZ"pw_cast_for_opmath_non_tensor_argsrc  Zpw_cast_for_int_to_realr#  rT   r[   r]   re   r   rm   rl  ZScalarrr   r{   r   r   r   r   r   r   r   r   r   r   r   r   r   r`  r   r   r   r   r   r   r&   rq   r   Z_safe_softmaxr   r   r   r  rU   r   r   r   r   r   r   r   r   r   r   r   r   r   r  r  slicer  r$  r  r6  r:  r<  r@  rA  rD  r{  r  r  r  r  r  Zpy_implZCompositeImplicitAutogradZAutogradr  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r   r  r  r  r  r%  r(  Zunsafe_chunkr*  r'  r&  Zno_statsr+  r.  r4  r9  r;  r<  Z_fused_dropoutr>  rF  r'  r   r  ZliftZ
lift_freshrH  rK  rM  rO  rN  rP  rU  rW  Z_adaptive_avg_pool2drs  rv  r  r  r  r  r  r  r  r  r  r  r  	Generatorr  r  r  r  r  r  r  r  Z_upsample_nearest_exact1dr  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  Zrnn_tanhr   r  Zrnn_relur  r  r   r"  r'  r)  r*  r5  Zlstmr7  r8  r:  r;  Zgrur<  r=  r@  rE  rG  rH  rM  rN  rL  rJ  rO  rQ  rV  rY  rI  rh  ri  r  r  r  r0  rm  rp  rq  rr  ru  rw  rx  r}  rU  r  r  r  r  r  r  r  r  r  r  r  r  Zupsample_bicubic2dr  r  Zreflection_pad1dZreflection_pad2dZreflection_pad3dr  Zreplication_pad1dZreplication_pad2dZreplication_pad3dr  r  Zreflection_pad1d_backwardZreflection_pad2d_backwardZreflection_pad3d_backwardr  r  r  r+  rB  r/  r  r  r  r  r  r
  Z+_scaled_dot_product_flash_attention_for_cpur  r  r  r  r  r   r  rq  rM   r  r  r(  r)  r$  r%  r-  r/  Zaddbmm_ZaddbmmZaddmm_Zaddmv_Zbaddbmm_Zfill_Zgelu_r  Z
hardswish_Z	hardtanh_ZhardtanhZhardsigmoid___iand____and____ilshift__
__lshift__r  r  Zindex_reduce_Zindex_reduce__ior____or____irshift__
__rshift____ixor____xor__Zleaky_relu_Z
leaky_reluZlogit_ZlogitZrelu_r  Zrenorm_ZrenormZround_re  Zscatter_r   Zscatter_add_Zscatter_addZscatter_reduce_Zscatter_reduceZsilu_r(   r(   r(   r)   <module>   s  


* 
 

 
	




  *!	
9

'"
	P`
 
	
%


(


(
 00
	

W	

	
R
	
R		#

	

	



,
	

	

h	
%	$f-7
("$$





 




  		

.2
)


  @
2
					

	
I"

5


 (
.$$


* 
'


* 
w
S


0


0




W


,

<

	C	

"
	


