o
    hA                     @   s  d dl mZ d dlmZ d dlmZmZmZmZ d dl	m
Z
 d dlmZ ddlmZ ddlmZ d	d
lmZmZmZ d	dlmZ d	dlmZmZ g dZG dd de
jZG dd de
jZG dd de
jZG dd de
j Z!G dd de
j Z"G dd de
jZ#G dd de
jZ$G dd de
j Z%de&ee!e"f  dee&eeeef   d e'e( d!ed"e
j f d#ee d$e)d%ed&e%fd'd(Z*d)ed*d+d,Z+G d-d. d.eZ,G d/d0 d0eZ-G d1d2 d2eZ.e ed3e,j/fd4dd5d6d#ee, d$e)d%ed&e%fd7d8Z0e ed3e-j/fd4dd5d6d#ee- d$e)d%ed&e%fd9d:Z1e ed3e.j/fd4dd5d6d#ee. d$e)d%ed&e%fd;d<Z2d	d=lm3Z3 e3e,j/j4e-j/j4e.j/j4d>Z5dS )?    )Sequence)partial)AnyCallableOptionalUnionN)Tensor   )VideoClassification)_log_api_usage_once   )register_modelWeightsWeightsEnum)_KINETICS400_CATEGORIES)_ovewrite_named_paramhandle_legacy_interface)VideoResNetR3D_18_WeightsMC3_18_WeightsR2Plus1D_18_Weightsr3d_18mc3_18r2plus1d_18c                       ^   e Zd Z	ddededee dededdf fd	d
Zededeeeef fddZ  Z	S )Conv3DSimpleN   	in_planes
out_planes	midplanesstridepaddingreturnc                    s   t  j||d||dd d S )N)r	   r	   r	   FZin_channelsZout_channelskernel_sizer    r!   biassuper__init__selfr   r   r   r    r!   	__class__ e/home/www/facesmatcher.com/frenv_anti/lib/python3.10/site-packages/torchvision/models/video/resnet.pyr(      s   
zConv3DSimple.__init__c                 C   
   | | | fS Nr-   r    r-   r-   r.   get_downsample_stride(      
z"Conv3DSimple.get_downsample_strideNr   r   
__name__
__module____qualname__intr   r(   staticmethodtupler2   __classcell__r-   r-   r+   r.   r      "    &r   c                       sX   e Zd Zddedededededdf fd	d
Zededeeeef fddZ  ZS )Conv2Plus1Dr   r   r   r   r    r!   r"   Nc                    s`   t  tj||dd||fd||fddt|tjddtj||d|ddf|ddfdd d S )	Nr   r	   r	   r   r   Fr$   r    r!   r%   TZinplacer	   r   r   r'   r(   nnConv3dBatchNorm3dReLUr)   r+   r-   r.   r(   .   s   
zConv2Plus1D.__init__c                 C   r/   r0   r-   r1   r-   r-   r.   r2   ?   r3   z!Conv2Plus1D.get_downsample_strider   r   )	r6   r7   r8   r9   r(   r:   r;   r2   r<   r-   r-   r+   r.   r>   -   s    (&r>   c                       r   )Conv3DNoTemporalNr   r   r   r   r    r!   r"   c                    s(   t  j||dd||fd||fdd d S )Nr?   r   r   Fr#   r&   r)   r+   r-   r.   r(   E   s   
zConv3DNoTemporal.__init__c                 C   s
   d| | fS Nr   r-   r1   r-   r-   r.   r2   R   r3   z&Conv3DNoTemporal.get_downsample_strider4   r5   r-   r-   r+   r.   rI   D   r=   rI   c                       sb   e Zd ZdZ		ddedededejf dedeej d	df fd
dZ	de
d	e
fddZ  ZS )
BasicBlockr   Ninplanesplanesconv_builder.r    
downsampler"   c                    s   || d d d |d d d|   }t    t|||||t|tjdd| _t||||t|| _tjdd| _|| _	|| _
d S )Nr	   TrA   )r'   r(   rD   
SequentialrF   rG   conv1conv2relurO   r    r*   rL   rM   rN   r    rO   r   r+   r-   r.   r(   [   s   (

zBasicBlock.__init__xc                 C   sB   |}|  |}| |}| jd ur| |}||7 }| |}|S r0   )rQ   rR   rO   rS   r*   rU   Zresidualoutr-   r-   r.   forwardn   s   




zBasicBlock.forwardr   Nr6   r7   r8   	expansionr9   r   rD   Moduler   r(   r   rX   r<   r-   r-   r+   r.   rK   W   s$    rK   c                       sb   e Zd ZdZ		ddedededejf ded	eej d
df fddZ	de
d
e
fddZ  ZS )
Bottleneck   r   NrL   rM   rN   .r    rO   r"   c                    s   t    || d d d |d d d|   }ttj||dddt|tjdd| _t|||||t|tjdd| _ttj||| j	 dddt|| j	 | _
tjdd| _|| _|| _d S )Nr	   r   F)r$   r%   TrA   )r'   r(   rD   rP   rE   rF   rG   rQ   rR   r[   conv3rS   rO   r    rT   r+   r-   r.   r(      s   
	("
zBottleneck.__init__rU   c                 C   sL   |}|  |}| |}| |}| jd ur| |}||7 }| |}|S r0   )rQ   rR   r_   rO   rS   rV   r-   r-   r.   rX      s   





zBottleneck.forwardrY   rZ   r-   r-   r+   r.   r]   |   s$    r]   c                       "   e Zd ZdZd fddZ  ZS )	BasicStemz$The default conv-batchnorm-relu stemr"   Nc              
      s4   t  tjdddddddtdtjdd	 d S )
Nr	   @   )r	      rc   r   r   r   r?   Fr@   TrA   rC   r*   r+   r-   r.   r(      s
   
zBasicStem.__init__r"   Nr6   r7   r8   __doc__r(   r<   r-   r-   r+   r.   ra          ra   c                       r`   )R2Plus1dStemzRR(2+1)D stem is different than the default one as it uses separated 3D convolutionr"   Nc                    sZ   t  tjdddddddtdtjdd	tjdd
dddddtd
tjdd	 d S )Nr	   -   )r   rc   rc   rd   )r   r	   r	   Fr@   TrA   rb   rB   r   r   r   )r   r   r   rC   re   r+   r-   r.   r(      s   

zR2Plus1dStem.__init__rf   rg   r-   r-   r+   r.   rj      ri   rj   c                       s   e Zd Z		ddeeeef  deeeee	e
f   dee dedejf ded	ed
df fddZded
efddZ	ddeeeef  deeee	e
f  dededed
ejfddZ  ZS )r     Fblockconv_makerslayersstem.num_classeszero_init_residualr"   Nc                    s  t    t|  d| _| | _| j||d d|d dd| _| j||d d|d dd| _| j||d d|d dd| _| j||d d	|d dd| _	t
d
| _t
d	|j || _|  D ]N}t|t
jrt
jj|jddd |jdurt
j|jd q`t|t
jrt
j|jd t
j|jd q`t|t
jrt
j|jdd t
j|jd q`|r|  D ]}t|trt
j|jjd qdS dS )a^  Generic resnet video generator.

        Args:
            block (Type[Union[BasicBlock, Bottleneck]]): resnet building block
            conv_makers (List[Type[Union[Conv3DSimple, Conv3DNoTemporal, Conv2Plus1D]]]): generator
                function for each layer
            layers (List[int]): number of blocks per layer
            stem (Callable[..., nn.Module]): module specifying the ResNet stem.
            num_classes (int, optional): Dimension of the final FC layer. Defaults to 400.
            zero_init_residual (bool, optional): Zero init bottleneck residual BN. Defaults to False.
        rb   r   r   r1      r      r	   i   rl   Zfan_outrS   )modeZnonlinearityNg{Gz?)r'   r(   r   rL   rq   _make_layerlayer1layer2layer3layer4rD   ZAdaptiveAvgPool3davgpoolZLinearr[   fcmodules
isinstancerE   initZkaiming_normal_weightr%   Z	constant_rF   Znormal_r]   Zbn3)r*   rn   ro   rp   rq   rr   rs   mr+   r-   r.   r(      s<   


zVideoResNet.__init__rU   c                 C   sT   |  |}| |}| |}| |}| |}| |}|d}| |}|S rJ   )rq   rx   ry   rz   r{   r|   flattenr}   )r*   rU   r-   r-   r.   rX      s   







zVideoResNet.forwardr   rN   rM   blocksr    c           
   	   C   s   d }|dks| j ||j kr+||}ttj| j ||j d|ddt||j }g }||| j |||| ||j | _ td|D ]}	||| j || qDtj| S )Nr   F)r$   r    r%   )	rL   r[   r2   rD   rP   rE   rF   appendrange)
r*   rn   rN   rM   r   r    rO   Z	ds_striderp   ir-   r-   r.   rw   
  s   

zVideoResNet._make_layer)rm   F)r   )r6   r7   r8   typer   rK   r]   r   r   rI   r>   listr9   r   rD   r\   boolr(   r   rX   rP   rw   r<   r-   r-   r+   r.   r      sB    4r   rn   ro   rp   rq   .weightsprogresskwargsr"   c                 K   sT   |d urt |dt|jd  t| |||fi |}|d ur(||j|dd |S )Nrr   
categoriesT)r   Z
check_hash)r   lenmetar   Zload_state_dictZget_state_dict)rn   ro   rp   rq   r   r   r   modelr-   r-   r.   _video_resnet$  s   	r   rH   zKhttps://github.com/pytorch/vision/tree/main/references/video_classificationzThe weights reproduce closely the accuracy of the paper. The accuracies are estimated on video-level with parameters `frame_rate=15`, `clips_per_video=5`, and `clip_len=16`.)Zmin_sizer   ZrecipeZ_docsc                	   @   D   e Zd Zedeedddi eddddd	id
dddZeZdS )r   z7https://download.pytorch.org/models/r3d_18-b3b3357e.pthp   r   rt      Z	crop_sizeZresize_sizeiP5Kinetics-400gO@g-T@zacc@1zacc@5gK7YD@g"_@Z
num_paramsZ_metricsZ_ops
_file_sizeurlZ
transformsr   N	r6   r7   r8   r   r   r
   _COMMON_METAKINETICS400_V1DEFAULTr-   r-   r-   r.   r   C  $    r   c                	   @   r   )r   z7https://download.pytorch.org/models/mc3_18-a90a0ba3.pthr   r   r   iPu r   g{GO@gQU@r   gClE@gtVF@r   r   Nr   r-   r-   r-   r.   r   W  r   r   c                	   @   r   )r   z<https://download.pytorch.org/models/r2plus1d_18-91a641e6.pthr   r   r   ir   gʡP@g33333U@r   gOnBD@g1Z^@r   r   Nr   r-   r-   r-   r.   r   k  r   r   Z
pretrained)r   T)r   r   c                 K   .   t | } tttgd g dt| |fi |S )a  Construct 18 layer Resnet3D model.

    .. betastatus:: video module

    Reference: `A Closer Look at Spatiotemporal Convolutions for Action Recognition <https://arxiv.org/abs/1711.11248>`__.

    Args:
        weights (:class:`~torchvision.models.video.R3D_18_Weights`, optional): The
            pretrained weights to use. See
            :class:`~torchvision.models.video.R3D_18_Weights`
            below for more details, and possible values. By default, no
            pre-trained weights are used.
        progress (bool): If True, displays a progress bar of the download to stderr. Default is True.
        **kwargs: parameters passed to the ``torchvision.models.video.resnet.VideoResNet`` base class.
            Please refer to the `source code
            <https://github.com/pytorch/vision/blob/main/torchvision/models/video/resnet.py>`_
            for more details about this class.

    .. autoclass:: torchvision.models.video.R3D_18_Weights
        :members:
    r^   r   r   r   r   )r   verifyr   rK   r   ra   r   r   r   r-   r-   r.   r        
r   c                 K   s4   t | } tttgtgd  g dt| |fi |S )a  Construct 18 layer Mixed Convolution network as in

    .. betastatus:: video module

    Reference: `A Closer Look at Spatiotemporal Convolutions for Action Recognition <https://arxiv.org/abs/1711.11248>`__.

    Args:
        weights (:class:`~torchvision.models.video.MC3_18_Weights`, optional): The
            pretrained weights to use. See
            :class:`~torchvision.models.video.MC3_18_Weights`
            below for more details, and possible values. By default, no
            pre-trained weights are used.
        progress (bool): If True, displays a progress bar of the download to stderr. Default is True.
        **kwargs: parameters passed to the ``torchvision.models.video.resnet.VideoResNet`` base class.
            Please refer to the `source code
            <https://github.com/pytorch/vision/blob/main/torchvision/models/video/resnet.py>`_
            for more details about this class.

    .. autoclass:: torchvision.models.video.MC3_18_Weights
        :members:
    r	   r   )r   r   r   rK   r   rI   ra   r   r-   r-   r.   r     s   
r   c                 K   r   )a  Construct 18 layer deep R(2+1)D network as in

    .. betastatus:: video module

    Reference: `A Closer Look at Spatiotemporal Convolutions for Action Recognition <https://arxiv.org/abs/1711.11248>`__.

    Args:
        weights (:class:`~torchvision.models.video.R2Plus1D_18_Weights`, optional): The
            pretrained weights to use. See
            :class:`~torchvision.models.video.R2Plus1D_18_Weights`
            below for more details, and possible values. By default, no
            pre-trained weights are used.
        progress (bool): If True, displays a progress bar of the download to stderr. Default is True.
        **kwargs: parameters passed to the ``torchvision.models.video.resnet.VideoResNet`` base class.
            Please refer to the `source code
            <https://github.com/pytorch/vision/blob/main/torchvision/models/video/resnet.py>`_
            for more details about this class.

    .. autoclass:: torchvision.models.video.R2Plus1D_18_Weights
        :members:
    r^   r   )r   r   r   rK   r>   rj   r   r-   r-   r.   r     r   r   )
_ModelURLs)r   r   r   )6collections.abcr   	functoolsr   typingr   r   r   r   Ztorch.nnrD   Ztorchr   Ztransforms._presetsr
   utilsr   Z_apir   r   r   _metar   Z_utilsr   r   __all__rE   r   rP   r>   rI   r\   rK   r]   ra   rj   r   r   r   r9   r   r   r   r   r   r   r   r   r   r   r   r   Z
model_urlsr-   r-   r-   r.   <module>   sx    %1^
*#*#*$