
    3jY              #          S SK Jr  S SKrS SKJr  SSKJrJrJrJrJ	r	J
r
JrJrJrJrJrJrJrJr  SS/r " S S\5      rS	S
\ S\ S\ S\ S3	-   \l               S$S\\   S\\   S\\   S\\   S\S-  S\S-  S\S-  S\S\S-  S\S\S\S\S\S\S\SS4"S jjrS  rS\\   S\\   S\\   S\\   S\S-  S\S-  S\S\S\S\S\S\S\S\SS4S! jrS\\   S\\   S\\   S\\   S\S-  S\S-  S\S\S\S\S\S\S\S\SS4S" jrS\\   S\\   S\\   S\\   S\S-  S\S-  S\\-  S\S\S\S\S\S\S\SS4S# jrg)%    )castN)Tensor   )_default_to_fused_or_foreach_device_dtype_check_for_fused_differentiable_doc_foreach_doc_get_scalar_dtype
_get_value_maximize_doc_params_doc
_to_scalar_use_grad_for_differentiable_view_as_real
DeviceDict	OptimizerParamsTAdagradadagradc                      ^  \ rS rSr      SSSSS.S\S\\-  S\S\S	\S
\S\S-  S\S\S\S-  SS4U 4S jjjjrU 4S jr	SS jr
S r\SS j5       rSrU =r$ )r      NF)maximizedifferentiablefusedparamslrlr_decayweight_decayinitial_accumulator_valueepsforeachr   r   r   returnc          
      ~  > [        U[        5      (       a  UR                  5       S:w  a  [        S5      eSU::  d  [        SU 35      eSU::  d  [        SU 35      eSU::  d  [        SU 35      eSU::  d  [        SU 35      eSU::  d  [        SU 35      eUUUUUUUU	U
S	.	n[        TU ]  X5        U
(       a2  U	(       a  [        S
5      eU(       a  [        S5      eSU l        SU l        U R                   H  nUS    H  nU R                  U   nUS   (       a*  [        R                  " S[        US   S9UR                  S9O[        R                  " S[        5       S9US'   [        R                   " U5      (       a  [#        XU5      OUn[        R$                  " X[        R&                  S9US'   M     M     g )Nr   zTensor lr must be 1-element        zInvalid learning rate: zInvalid lr_decay value: zInvalid weight_decay value: z)Invalid initial_accumulator_value value: zInvalid epsilon value: )	r   r   r    r   r   r!   r   r   r   z)`fused` does not support `differentiable`z0`fused` and `foreach` cannot be `True` together.Tr   r    is_fuseddtypedevicer)   stepmemory_formatsum)
isinstancer   numel
ValueErrorsuper__init__RuntimeError"_need_device_dtype_check_for_fused_step_supports_amp_scalingparam_groupsstatetorchzerosr
   r*   tensor
is_complexcomplex	full_likepreserve_format)selfr   r   r   r   r   r    r!   r   r   r   defaultsgrouppr9   
init_value	__class__s                   M/home/wildlama/miniconda3/lib/python3.13/site-packages/torch/optim/adagrad.pyr4   Adagrad.__init__   s    b&!!bhhjAo:;;by6rd;<<h7zBCCl";L>JKK//;<U;VW  cz6se<==  ()B ,

 	*"#NOO"#UVV6:D3.2D+&&E8_

1 W~ KK/wH xx c1B1DE f ''** 5Q2 
  %1F1F e! % '    c           	      r  > [         T
U ]  U5        S nU R                   GH  nUR                  SS 5        UR                  SS5        UR                  SS5        UR                  SS 5      nUS    H  nU R                  R                  U/ 5      n[        U5      S:w  d  M0  [        R                  " US   5      (       a  MP  [        US   5      nUS   (       a'  [        R                  " U[        US	9UR                  S
9O[        R                  " U[        5       S9US'   M     GM     [        U R                  R                  5       5      n[        U5      S:g  =(       a    [        R                  " US   S   5      nU(       d5  U H.  n	[        R                  " [        U	S   5      [        US	9S9U	S'   M0     g g )Nr!   r   Fr   r   r   r   r,   r&   r(   r+   )r3   __setstate__r8   
setdefaultr9   getlenr:   	is_tensorfloatr<   r
   r*   listvalues)rA   r9   r   rC   rD   p_statestep_valstate_valuesstep_is_tensorsrF   s             rG   rK   Adagrad.__setstate__d   s|   U# &&EY-Z/-u5$$Wd3E8_**..B/w<1$U__WV_-M-M$WV_5H !> $"3U"C#$88 #\\(:K:MN FO	 % '( DJJ--/0l+q0 
eooOF#7
 !!LL!F)$,=u,M&	 " rI   c                     U R                    H1  nUS    H%  nU R                  U   nUS   R                  5         M'     M3     g)z6Calls tensor.share_memory_() on the state sum tensors.r   r/   N)r8   r9   share_memory_)rA   rC   rD   r9   s       rG   share_memoryAdagrad.share_memory   s=    &&E8_

1e**, % 'rI   c                 x   Su  pgUS    GH  nUR                   c  M  US   (       a$  [        U SS5      (       a  [        U5        SU l        XhR                   R                  -  nU[
        R                  " U5      -  nUR                  U5        UR                  UR                   5        U R                  U   n	[        U	5      S:X  a  US   (       a  [        U5        US   (       a*  [
        R                  " S[        US   S	9UR                  S
9O[
        R                  " S[        5       S9U	S'   U R                  S   n
[
        R                  " U5      (       a  [        X5      OU
n[
        R                   " X[
        R"                  S9U	S'   UR                  U	S   5        UR                  U	S   5        GM     Xg4$ )N)FFr   r   r6   TFr   r%   r&   r(   r$   r+   r,   r   r-   r/   )gradgetattrr   r6   	is_sparser:   r=   appendr9   rN   r;   r
   r*   r<   rB   r>   r?   r@   )rA   rC   params_with_gradgrads
state_sumsstate_stepshas_sparse_gradhas_complexrD   r9   r   rE   s               rG   _init_groupAdagrad._init_group   s   '3$xAvv!>g8' '
 2!4>CD;66#3#33u//22 ''*QVV$

1u:?W~5a8 !> "3U7^"L#$88 #\\#5F5HI &M 1531-
 !++A..   9U6 
 $)??U5J5J$E%L !!%,/""5=1Q !T ++rI   c                 h   SnUb%  [         R                  " 5          U" 5       nSSS5        U R                   Hf  n/ n/ n/ n/ nU R                  X4XVU5      u  p[	        UUUUUS   US   US   US   UUS   US   US   U	US	   [        U S
S5      [        U SS5      S9  Mh     U$ ! , (       d  f       N= f)zPerform a single optimization step.

Args:
    closure (Callable, optional): A closure that reevaluates the model
        and returns the loss.
Nr   r   r   r    r!   r   r   r   
grad_scale	found_inf)r   r   r   r    rf   r!   r   r   rg   r   rk   rl   )r:   enable_gradr8   rh   r   r_   )
rA   closurelossrC   rb   rc   rd   re   rf   rg   s
             rG   r,   Adagrad.step   s     ""$y % &&E-/"$E')J(*K+/+;+;K,(O  ;">2z*%L /i(z*$%56'Gn"4t<!$T:! ': A %$s   B##
B1)r6   r7   )g{Gz?r   r   r   g|=N)r"   NN)__name__
__module____qualname____firstlineno__r   rP   r   boolr4   rK   r[   rh   r   r,   __static_attributes____classcell__)rF   s   @rG   r   r      s     "+,#E $!EE FNE 	E
 E $)E E E E E d{E 
E EN!F-,,\ "* "*rI   a[  Implements Adagrad algorithm.

    .. math::
       \begin{aligned}
            &\rule{110mm}{0.4pt}                                                                 \\
            &\textbf{input}      : \gamma \text{ (lr)}, \: \theta_0 \text{ (params)}, \: f(\theta)
                \text{ (objective)}, \: \lambda \text{ (weight decay)},                          \\
            &\hspace{12mm}    \tau \text{ (initial accumulator value)}, \: \eta\text{ (lr decay)}\\
            &\textbf{initialize} :  state\_sum_0 \leftarrow \tau                          \\[-1.ex]
            &\rule{110mm}{0.4pt}                                                                 \\
            &\textbf{for} \: t=1 \: \textbf{to} \: \ldots \: \textbf{do}                         \\
            &\hspace{5mm}g_t           \leftarrow   \nabla_{\theta} f_t (\theta_{t-1})           \\
            &\hspace{5mm} \tilde{\gamma}    \leftarrow \gamma / (1 +(t-1) \eta)                  \\
            &\hspace{5mm} \textbf{if} \: \lambda \neq 0                                          \\
            &\hspace{10mm} g_t \leftarrow g_t + \lambda \theta_{t-1}                             \\
            &\hspace{5mm}state\_sum_t  \leftarrow  state\_sum_{t-1} + g^2_t                      \\
            &\hspace{5mm}\theta_t \leftarrow
                \theta_{t-1}- \tilde{\gamma} \frac{g_t}{\sqrt{state\_sum_t}+\epsilon}            \\
            &\rule{110mm}{0.4pt}                                                          \\[-1.ex]
            &\bf{return} \:  \theta_t                                                     \\[-1.ex]
            &\rule{110mm}{0.4pt}                                                          \\[-1.ex]
       \end{aligned}

    For further details regarding the algorithm we refer to `Adaptive Subgradient Methods for Online Learning
    and Stochastic Optimization`_.
    z
    Args:
        a  
        lr (float, Tensor, optional): learning rate (default: 1e-2)
        lr_decay (float, optional): learning rate decay (default: 0)
        weight_decay (float, optional): weight decay (L2 penalty) (default: 0)
        initial_accumulator_value (float, optional): initial value of the
            sum of squares of gradients (default: 0)
        eps (float, optional): term added to the denominator to improve
            numerical stability (default: 1e-10)
        z	
        a  
        fused (bool, optional): whether the fused implementation (CPU and CUDA only) is used.
            Currently, `torch.float64`, `torch.float32`, `torch.float16`, and `torch.bfloat16`
            are supported. (default: None). Please note that the fused implementations does not
            support sparse or complex gradients.
    .. _Adaptive Subgradient Methods for Online Learning and Stochastic
        Optimization: http://jmlr.org/papers/v12/duchi11a.html

    r   rc   rd   re   r   rk   rl   rf   r!   r   rg   r   r   r   r    r   r"   c                L   [        S U 5       5      (       d  [        S5      eUc  Uc  [        X	SS9u  nnUc  SnUc  SnU(       a.  [        R                  R                  5       (       a  [        S5      eU(       a.  [        R                  R                  5       (       a  [        S5      eU(       a*  [        R                  R                  5       (       d  [        nO7U(       a*  [        R                  R                  5       (       d  [        nO[        nU" U UUUUUUUUUU	U
UUS9  g)	zlFunctional API that performs Adagrad algorithm computation.

See :class:`~torch.optim.Adagrad` for details.
c              3   V   #    U  H  n[        U[        R                  5      v   M!     g 7frq   )r0   r:   r   ).0ts     rG   	<genexpr>adagrad.<locals>.<genexpr>6  s     @Kqz!U\\**Ks   ')zPAPI has changed, `state_steps` argument must contain a list of singleton tensorsNF)	use_fusedz6torch.jit.script not supported with foreach optimizersz4torch.jit.script not supported with fused optimizers
r   r   r   r    rf   r   r   rg   rk   rl   )	allr5   r   r:   jitis_scripting_fused_adagrad_multi_tensor_adagrad_single_tensor_adagrad)r   rc   rd   re   r   rk   rl   rf   r!   r   rg   r   r   r   r    r   _funcs                     rG   r   r     s   2 @K@@@^
 	
 }1e

7 }599))++STT''))QRRUYY++--	//11$%!'%rI   c                 P    U R                  5       n[        R                  " XU5      $ rq   )sizer:   sparse_coo_tensor)r^   grad_indicesrR   r   s       rG   _make_sparser   g  s     99;D""<>>rI   c          
         Uc  Ub  [        S5      e[        R                  R                  5       (       d  [	        U5      n[        XX#SS9 GH   u  pnnUS-  n[        U5      nU(       d  UOU* nUS:w  a+  UR                  (       a  [        S5      eUR                  XS9nUSUS-
  U-  -   -  nUR                  (       a  UR                  5       nUR                  5       nUR                  5       nUR                  [        UUUR                  S5      5      5        UR!                  U5      nUR                  5       R#                  5       R                  U	5      nUR                  [        UUUU-  5      U* S9  GM/  [        R$                  " U5      nU(       aB  [        R&                  " U5      n[        R&                  " U5      n[        R&                  " U5      nUR)                  XSS	9  U(       a  UR+                  5       U	-   nOUR+                  5       R                  U	5      nUR-                  UUU* S	9  U(       d  GM  [        R.                  " U5      n[        R.                  " U5      nGM#     g )
N,Expected grad_scale and found_inf to be NoneT)strictr   r   z;weight_decay option is not compatible with sparse gradientsalpha   value)AssertionErrorr:   r   r   r   zipr   r`   r5   addcoalesce_indices_valuesadd_r   powsparse_masksqrt_r=   view_as_realaddcmul_sqrtaddcdiv_view_as_complex)r   rc   rd   re   rk   rl   r   r   r   r    rf   r   r   rg   paramr^   	state_sumstep_tr,   clrr   grad_valuesstd
std_valuesr=   s                            rG   r   r   l  s   " !6KLL99!!##^*-zt+&Y 	!&!#t$1~~"Q  88E86DAX--.>>==?D==?L,,.KNN<lKOOA<NOP''-C,,.33C8JJJT<z1IJSVRV   ))%0J))$/!..y9	**51t3nn&,nn&++C0NN4SDN1z--e4!11)<	U+rI   c                r   U(       a  [        S5      eUc  Ub  [        S5      e[        U 5      S:X  a  g [        U5      n[        R                  " XX#/5      nUR                  5        GHG  u  u  nnnnn[        [        [           U5      n[        [        [           U5      n[        [        [           U5      n[        [        [           U5      nU
=(       a    [        S U 5       5      nU(       a  [        UUUUUUUU	SUUUUUS9  M  U(       a  [        UUU5        U(       a  [        R                  " U5      n[        R                  R                  5       (       d>  US   R                   (       a*  [        R"                  " U[        R$                  " SSS	9SS
9  O[        R"                  " US5        US:w  a4  U(       a  [        R"                  " UUUS
9  O[        R&                  " UUUS
9nU Vs/ s H  nU* S[)        U5      S-
  U-  -   -  PM     nn[        R*                  " UUUSS9  [        R,                  " U5      n[        R"                  " UU	5        US:w  d  U(       a  [        R.                  " UU5        UnO[        R0                  " UU5      n[        R2                  " UUU5        GMJ     g s  snf )Nz#_foreach ops don't support autogradr   r   c              3   8   #    U  H  oR                   v   M     g 7frq   )r`   )r{   r^   s     rG   r}   (_multi_tensor_adagrad.<locals>.<genexpr>  s      9
'3tNN|s   Tr   g      ?cpu)r*   r   r   r   )r   rN   r   r   "_group_tensors_by_device_and_dtyperR   r   rQ   r   anyr   r   r:   _foreach_negcompileris_compilingis_cpu_foreach_add_r<   _foreach_addr   _foreach_addcmul__foreach_sqrt_foreach_mul__foreach_mul_foreach_addcdiv_)r   rc   rd   re   rk   rl   r   r   r   r    rf   r   r   rg   grouped_tensorlistsdevice_params_device_grads_device_state_sums_device_state_steps_r   device_paramsdevice_gradsdevice_state_sumsdevice_state_stepsdevice_has_sparse_gradr,   	minus_clrr   	numerators                                rG   r   r     s   " BCC!6KLL 6{a	BB#FF	
0  &&(		 	T&\>:DL-8 f/AB!$v,0CD!0 "
S 9
'39
 6
 ""!")! $!-'%#   -7HI --l;L ~~**,,1CA1F1M1M"ELLU$C3  2A61##L-|T$11 -| 
 GY
FXdRC1
4(1,889FX 	 
 	 1<UVW!!"34C%1i8$I**<CIy#>Q )p
s   0"J4c                   U (       d  g U
(       d  U(       a  [        S5      eU(       a  [        S5      eUb  UR                  U0O0 nUb  UR                  U0O0 n[        U[        5      (       a'  [	        UR                  5      S:w  a  UR                  U0OS n[
        R                  " XX#/5      nUR                  5        GH9  u  u  nnu  u  nnnnn[        [        [           U5      n[        [        [           U5      n[        [        [           U5      n[        [        [           U5      nSu  nnUb   UR                  UUR                  USS95      nUb   UR                  UUR                  USS95      nUb  UU;  a  UR                  USS9UU'   UU   n[        R                  " US5        [        R                  " UUUUUUUU	UUUS	9  Uc  GM  [        R                  " UU/[!        U5      -  5        GM<     g )
Nz5`fused` does not support sparse grad or complex paramz<adagrad with fused=True does not support differentiable=Truer   )NNT)non_blocking)r*   r   r   )r   r   r   r    r   rk   rl   )r5   r*   r0   r   strr   r   itemsr   rQ   rL   tor:   r   _fused_adagrad__foreach_sub_rN   )r   rc   rd   re   rk   rl   r   r   r   r    rf   r   r   rg   grad_scale_dictfound_inf_dictlr_dictgrouped_tensorsr*   r   r   r   r   r   r   r   r   r   device_grad_scaledevice_found_infs                                 rG   r   r     s$   " +RSSJ
 	

 ,6+A		J'r  *3)>		9%B  &b&11c"))n6MBSW   BB	
0O 
			 	 	
	T&\>:DL-8 f/AB!$v,0CD.8++! / : :
f4@!  -88	V$?  6#8 ee6eEGFOB.2%(&	
 '"%5$6=O9P$PC 
!rI   )NNNFNFF)typingr   r:   r   	optimizerr   r   r   r	   r
   r   r   r   r   r   r   r   r   r   __all__r   __doc__rQ   rv   rP   r   r   r   r   r   r%   rI   rG   <module>r      s        $ i
 Ki K^4		 	 
 		 		 5. p  $# " GLG<G VG f	G
 $;G G }G G D[G G G 	G  !G" #G$ 
%G& 'G( 
)GT?
A=LA=<A= VA= f	A=
 A= }A= 	A= A= A= 
A= A= A= A= A=  
!A=Hl?Ll?<l? Vl? f	l?
 l? }l? 	l? l? l? 
l? l? l? l? l?  
!l?^SLS<S VS f	S
 S }S 	S S S 
S S S S S  
!SrI   