o
    hTQ                  !   @   s8  d dl mZmZmZ d dlZd dlmZ ddlmZmZm	Z	m
Z
mZmZmZmZmZmZmZmZmZ ddgZG dd deZd	d
e de
 de de	 d	 e_							d)dee dee dee dee dee dee dee dedee dededededededef ddZd d! Zdee dee dee dee dee dee dedededededededefd"d#Zdee dee dee dee dee dee dedededededededefd$d%Zdee dee dee dee dee dee dedededededededed&dfd'd(ZdS )*    )castOptionalUnionN)Tensor   )_default_to_fused_or_foreach_device_dtype_check_for_fused_differentiable_doc_foreach_doc_get_scalar_dtype
_get_value_maximize_doc_params_doc
_to_scalar_use_grad_for_differentiable_view_as_real	OptimizerParamsTAdagradadagradc                       s   e Zd Z						ddddddedeeef d	ed
edededee dededee f fddZ	 fddZ
dd Zdd ZedddZ  ZS )r   {Gz?r   绽|=NF)maximizedifferentiablefusedparamslrlr_decayweight_decayinitial_accumulator_valueepsforeachr   r   r   c                   sd  t |tr| dkrtdd|kstd| d|ks%td| d|ks0td| d|ks;td| d|ksFtd| t||||||||	|
d		}t || |
rk|	rbtd
|rhtdd| _| j	D ]A}|d D ]:}| j
| }|d rtjdt|d d|jdntjdt d|d< t|rt||n|}tj||tjd|d< qtqnd S )Nr   zTensor lr must be 1-elementg        zInvalid learning rate: zInvalid lr_decay value: zInvalid weight_decay value: z)Invalid initial_accumulator_value value: zInvalid epsilon value: )	r   r   r    r   r   r!   r   r   r   z)`fused` does not support `differentiable`z0`fused` and `foreach` cannot be `True` together.Tr   r    Zis_fused)dtypedevicer$   step)Zmemory_formatsum)
isinstancer   Znumel
ValueErrordictsuper__init__RuntimeError"_need_device_dtype_check_for_fusedparam_groupsstatetorchZzerosr   r%   tensor
is_complexcomplexZ	full_likeZpreserve_format)selfr   r   r   r   r   r    r!   r   r   r   defaultsgrouppr1   Z
init_value	__class__r"   Y/home/www/facesmatcher.com/frenv_anti/lib/python3.10/site-packages/torch/optim/adagrad.pyr-      sh   

zAdagrad.__init__c                    s   t  | d }| jD ]}|dd  |dd |dd |dd }qt| j }t|dko;t	|d d }|sS|D ]}tj
t|d t|dd	|d< q@d S d S )
Nr!   r   Fr   r   r   r'   r#   r&   )r,   __setstate__r0   
setdefaultlistr1   valueslenr2   Z	is_tensorr3   floatr   )r6   r1   r   r8   Zstate_valuesZstep_is_tensorsr:   r"   r<   r=   b   s$   

zAdagrad.__setstate__c                 C   s4   | j D ]}|d D ]}| j| }|d   q	qd S )Nr   r(   )r0   r1   Zshare_memory_)r6   r8   r9   r1   r"   r"   r<   share_memoryw   s   

zAdagrad.share_memoryc           
      C   s   d\}}|d D ]E}|j d urM|d r"t| ddr"t|dd d| _||j jO }|t|O }|| ||j  | j| }	||	d  ||	d	  q||fS )
N)FFr   r   r/   T)Zcuda_unsupportedFr(   r'   )	gradgetattrr   r/   	is_sparser2   r4   appendr1   )
r6   r8   params_with_gradgrads
state_sumsstate_stepshas_sparse_gradhas_complexr9   r1   r"   r"   r<   _init_group}   s&   



zAdagrad._init_groupc           
      C   s   d}|durt   | }W d   n1 sw   Y  | jD ]A}g }g }g }g }| |||||\}}	t|||||d |d |d |d ||d |d |d |	|d	 t| d
dt| ddd q |S )zPerform a single optimization step.

        Args:
            closure (Callable, optional): A closure that reevaluates the model
                and returns the loss.
        Nr   r   r   r    r!   r   r   r   
grad_scale	found_inf)r   r   r   r    rM   r!   r   r   rN   r   rP   rQ   )r2   Zenable_gradr0   rO   r   rF   )
r6   closureZlossr8   rI   rJ   rK   rL   rM   rN   r"   r"   r<   r'      s@   




zAdagrad.step)r   r   r   r   r   NN)__name__
__module____qualname__r   r   rB   r   r   boolr-   r=   rD   rO   r   r'   __classcell__r"   r"   r:   r<   r      sJ    


Fa[  Implements Adagrad algorithm.

    .. math::
       \begin{aligned}
            &\rule{110mm}{0.4pt}                                                                 \\
            &\textbf{input}      : \gamma \text{ (lr)}, \: \theta_0 \text{ (params)}, \: f(\theta)
                \text{ (objective)}, \: \lambda \text{ (weight decay)},                          \\
            &\hspace{12mm}    \tau \text{ (initial accumulator value)}, \: \eta\text{ (lr decay)}\\
            &\textbf{initialize} :  state\_sum_0 \leftarrow \tau                          \\[-1.ex]
            &\rule{110mm}{0.4pt}                                                                 \\
            &\textbf{for} \: t=1 \: \textbf{to} \: \ldots \: \textbf{do}                         \\
            &\hspace{5mm}g_t           \leftarrow   \nabla_{\theta} f_t (\theta_{t-1})           \\
            &\hspace{5mm} \tilde{\gamma}    \leftarrow \gamma / (1 +(t-1) \eta)                  \\
            &\hspace{5mm} \textbf{if} \: \lambda \neq 0                                          \\
            &\hspace{10mm} g_t \leftarrow g_t + \lambda \theta_{t-1}                             \\
            &\hspace{5mm}state\_sum_t  \leftarrow  state\_sum_{t-1} + g^2_t                      \\
            &\hspace{5mm}\theta_t \leftarrow
                \theta_{t-1}- \tilde{\gamma} \frac{g_t}{\sqrt{state\_sum_t}+\epsilon}            \\
            &\rule{110mm}{0.4pt}                                                          \\[-1.ex]
            &\bf{return} \:  \theta_t                                                     \\[-1.ex]
            &\rule{110mm}{0.4pt}                                                          \\[-1.ex]
       \end{aligned}

    For further details regarding the algorithm we refer to `Adaptive Subgradient Methods for Online Learning
    and Stochastic Optimization`_.
    z
    Args:
        a  
        lr (float, Tensor, optional): learning rate (default: 1e-2)
        lr_decay (float, optional): learning rate decay (default: 0)
        weight_decay (float, optional): weight decay (L2 penalty) (default: 0)
        initial_accumulator_value (float, optional): initial value of the
            sum of squares of gradients (default: 0)
        eps (float, optional): term added to the denominator to improve
            numerical stability (default: 1e-10)
        z	
        a  
        fused (bool, optional): whether the fused implementation (CPU only) is used.
            Currently, `torch.float64`, `torch.float32`, `torch.float16`, and `torch.bfloat16`
            are supported. (default: None). Please note that the fused implementations does not
            support sparse or complex gradients.
    .. _Adaptive Subgradient Methods for Online Learning and Stochastic
        Optimization: http://jmlr.org/papers/v12/duchi11a.html

    Fr   rJ   rK   rL   r   rP   rQ   rM   r!   r   rN   r   r   r   r    r   c                C   s   t dd |D std|du r|du rt| |	dd\}}|du r$d}|du r*d}|r5tj r5td|r@tj r@td|rJtj sJt}n|rTtj sTt}nt}|| ||||||||||	|
||d	 dS )
ztFunctional API that performs Adagrad algorithm computation.

    See :class:`~torch.optim.Adagrad` for details.
    c                 s   s    | ]	}t |tjV  qd S rS   )r)   r2   r   ).0tr"   r"   r<   	<genexpr>  s    zadagrad.<locals>.<genexpr>zPAPI has changed, `state_steps` argument must contain a list of singleton tensorsNF)Z	use_fusedz6torch.jit.script not supported with foreach optimizersz4torch.jit.script not supported with fused optimizers
r   r   r   r    rM   r   r   rN   rP   rQ   )	allr.   r   r2   jitis_scripting_fused_adagrad_multi_tensor_adagrad_single_tensor_adagrad)r   rJ   rK   rL   r   rP   rQ   rM   r!   r   rN   r   r   r   r    r   _funcr"   r"   r<   r      sJ   

c                 C   s   |   }t|||S rS   )sizer2   Zsparse_coo_tensor)rE   grad_indicesr@   re   r"   r"   r<   _make_sparse=  s   rg   c             	   C   s  |d u r|d u s
J t j st|}t| |||D ]\}}}}|d7 }t|}|s,|n| }|dkrA|jr:td|j||d}|d|d |   }|jr|	 }|
 }| }|t|||d ||}|  |	}|jt|||| | d qt |}|rt |}t |}t |}|j||dd |r| |	 }n| |	}|j||| d |rt |}t |}qd S )Nr   r   z;weight_decay option is not compatible with sparse gradientsalpha   value)r2   r^   r_   r   zipr   rG   r.   addZcoalesceZ_indicesZ_valuesZadd_rg   powZsparse_maskZsqrt_r4   Zview_as_realZaddcmul_sqrtZaddcdiv_Zview_as_complex)r   rJ   rK   rL   rP   rQ   r   r   r   r    rM   r   r   rN   paramrE   Z	state_sumZstep_tr'   Zclrrf   Zgrad_valuesstdZ
std_valuesr4   r"   r"   r<   rb   B  sN   







rb   c                   s  |rJ d|d u r|d u sJ t | dkrd S t  t| |||g}| D ]\\}}}}}ttt |}ttt |}ttt |}ttt |}|
oWtdd |D }|rmt	|||| ||	d|||||d q)|rut
||| |r|t|}tj s|d jrtj|tjddd	dd
 nt|d |dkr|rtj|||d
 ntj|||d
} fdd|D }tj|||dd t|}t||	 |dks|rt|| |}nt||}t||| q)d S )Nz#_foreach ops don't support autogradr   c                 s   s    | ]}|j V  qd S rS   )rG   )rY   rE   r"   r"   r<   r[     s    
z(_multi_tensor_adagrad.<locals>.<genexpr>Tr\   g      ?cpu)r%   rh   r   c                    s&   g | ]}  d t |d     qS )r   )r   )rY   r'   r   r   r"   r<   
<listcomp>  s    z)_multi_tensor_adagrad.<locals>.<listcomp>rk   )rA   r   r   "_group_tensors_by_device_and_dtyper@   r   r?   r   anyrb   r   r2   Z_foreach_negcompilerZis_compilingZis_cpu_foreach_add_r3   Z_foreach_addZ_foreach_addcmul_Z_foreach_sqrtZ_foreach_mul_Z_foreach_mulZ_foreach_addcdiv_)r   rJ   rK   rL   rP   rQ   r   r   r   r    rM   r   r   rN   Zgrouped_tensorlistsdevice_params_device_grads_device_state_sums_device_state_steps_rc   device_paramsdevice_gradsdevice_state_sumsdevice_state_stepsZdevice_has_sparse_gradZ	minus_clrrr   	numeratorr"   rt   r<   ra     s   


ra   returnc                C   s~  | sd S |
s|rt d|rt dt|}|d ur|j|ind }|d ur*|j|ind }t| |||g}| D ]\\}}\\}}}}}ttt |}ttt |}ttt |}ttt |}d\}}|d ur~|d ur~||vrz|j	|dd||< || }|d ur|d ur||vr|j	|dd||< || }t
|d t
j||||||||	|||d |d urt
||gt|  q9d S )Nz5`fused` does not support sparse grad or complex paramz<adagrad with fused=True does not support differentiable=True)NNT)Znon_blockingr   )r   r   r   r    r   rP   rQ   )r.   r   r%   r   rv   itemsr   r?   r   tor2   ry   Z_fused_adagrad_Z_foreach_sub_rA   )r   rJ   rK   rL   rP   rQ   r   r   r   r    rM   r   r   rN   Zgrad_scale_dictZfound_inf_dictZgrouped_tensorsr%   rc   rz   r{   r|   r}   r~   r   r   r   Zdevice_grad_scaleZdevice_found_infr"   r"   r<   r`     sp   
r`   )NNNFNFF)typingr   r   r   r2   r   Z	optimizerr   r   r	   r
   r   r   r   r   r   r   r   r   r   __all__r   __doc__r?   rW   rB   r   rg   rb   ra   r`   r"   r"   r"   r<   <module>   s*  < '
8

J	

A	

m	
