
    3j4>                         S SK r S SKrS SKJrJr  S SKJrJr  S SKJ	r	  SSK
Jr  SSKJr  / SQr " S	 S
\5      r " S S\5      r\\\   -  \-  r " S S\5      r " S S\5      r " S S\5      rg)    N)SizeTensor)
functionalinit)	Parameter   )CrossMapLRN2d)Module)LocalResponseNormr	   	LayerNorm	GroupNormRMSNormc                      ^  \ rS rSr% Sr/ SQr\\S'   \\S'   \\S'   \\S'    SS\S\S\S\SS	4
U 4S
 jjjr	S\
S\
4S jrS rSrU =r$ )r      a;  Applies local response normalization over an input signal.

The input signal is composed of several input planes, where channels occupy the second dimension.
Applies normalization across channels.

.. math::
    b_{c} = a_{c}\left(k + \frac{\alpha}{n}
    \sum_{c'=\max(0, c-n/2)}^{\min(N-1,c+n/2)}a_{c'}^2\right)^{-\beta}

Args:
    size: amount of neighbouring channels used for normalization
    alpha: multiplicative factor. Default: 0.0001
    beta: exponent. Default: 0.75
    k: additive factor. Default: 1

Shape:
    - Input: :math:`(N, C, *)`
    - Output: :math:`(N, C, *)` (same shape as input)

Examples::

    >>> lrn = nn.LocalResponseNorm(2)
    >>> signal_2d = torch.randn(32, 5, 24, 24)
    >>> signal_4d = torch.randn(16, 5, 7, 7, 7, 7)
    >>> output_2d = lrn(signal_2d)
    >>> output_4d = lrn(signal_4d)

)sizealphabetakr   r   r   r   returnNc                 R   > [         TU ]  5         Xl        X l        X0l        X@l        g Nsuper__init__r   r   r   r   selfr   r   r   r   	__class__s        X/home/wildlama/miniconda3/lib/python3.13/site-packages/torch/nn/modules/normalization.pyr   LocalResponseNorm.__init__4   $     		
	    inputc                     [         R                  " XR                  U R                  U R                  U R
                  5      $ z
Runs the forward pass.
)Flocal_response_normr   r   r   r   r   r"   s     r   forwardLocalResponseNorm.forward=   s+     $$UIItzz499dffUUr!   c                 :    SR                   " S0 U R                  D6$ 0
Return the extra representation of the module.
z){size}, alpha={alpha}, beta={beta}, k={k} format__dict__r   s    r   
extra_reprLocalResponseNorm.extra_reprC        ;AARDMMRRr!   r   r   r   r   )-C6?      ?g      ?)__name__
__module____qualname____firstlineno____doc____constants__int__annotations__floatr   r   r(   r2   __static_attributes____classcell__r   s   @r   r   r      s~    : 3M
IL
KH NQ %49EJ	 VV V VS Sr!   r   c                      ^  \ rS rSr% \\S'   \\S'   \\S'   \\S'    SS\S\S\S\SS4
U 4S jjjrS	\S\4S
 jr	S\
4S jrSrU =r$ )r	   J   r   r   r   r   r   Nc                 R   > [         TU ]  5         Xl        X l        X0l        X@l        g r   r   r   s        r   r   CrossMapLRN2d.__init__P   r    r!   r"   c                     [         R                  " XR                  U R                  U R                  U R
                  5      $ r$   )_cross_map_lrn2dapplyr   r   r   r   r'   s     r   r(   CrossMapLRN2d.forwardY   s+      %%eYY

DIItvvVVr!   c                 :    SR                   " S0 U R                  D6$ r+   r.   r1   s    r   r2   CrossMapLRN2d.extra_repr_   r4   r!   r5   )r6   r7   r   )r8   r9   r:   r;   r>   r?   r@   r   r   r(   strr2   rA   rB   rC   s   @r   r	   r	   J   sz    
IL
KH NO %49EJ	 WV W WSC S Sr!   r	   c                      ^  \ rS rSr% Sr/ SQr\\S4   \S'   \	\S'   \
\S'        SS\S\	S\
S	\
S
S4
U 4S jjjrSS jrS\S
\4S jrS
\4S jrSrU =r$ )r   i   a  Applies Layer Normalization over a mini-batch of inputs.

This layer implements the operation as described in
the paper `Layer Normalization <https://arxiv.org/abs/1607.06450>`__

.. math::
    y = \frac{x - \mathrm{E}[x]}{ \sqrt{\mathrm{Var}[x] + \epsilon}} * \gamma + \beta

The mean and standard-deviation are calculated over the last `D` dimensions, where `D`
is the dimension of :attr:`normalized_shape`. For example, if :attr:`normalized_shape`
is ``(3, 5)`` (a 2-dimensional shape), the mean and standard-deviation are computed over
the last 2 dimensions of the input (i.e. ``input.mean((-2, -1))``).
:math:`\gamma` and :math:`\beta` are learnable affine transform parameters of
:attr:`normalized_shape` if :attr:`elementwise_affine` is ``True``.
The variance is calculated via the biased estimator, equivalent to
`torch.var(input, correction=0)`.

.. note::
    Unlike Batch Normalization and Instance Normalization, which applies
    scalar scale and bias for each entire channel/plane with the
    :attr:`affine` option, Layer Normalization applies per-element scale and
    bias with :attr:`elementwise_affine`.

This layer uses statistics computed from input data in both training and
evaluation modes.

Args:
    normalized_shape (int or list or torch.Size): input shape from an expected input
        of size

        .. math::
            [* \times \text{normalized\_shape}[0] \times \text{normalized\_shape}[1]
                \times \ldots \times \text{normalized\_shape}[-1]]

        If a single integer is used, it is treated as a singleton list, and this module will
        normalize over the last dimension which is expected to be of that specific size.
    eps: a value added to the denominator for numerical stability. Default: 1e-5
    elementwise_affine: a boolean value that when set to ``True``, this module
        has learnable per-element affine parameters initialized to ones (for weights)
        and zeros (for biases). Default: ``True``
    bias: If set to ``False``, the layer will not learn an additive bias (only relevant if
        :attr:`elementwise_affine` is ``True``). Default: ``True``

Attributes:
    weight: the learnable weights of the module of shape
        :math:`\text{normalized\_shape}` when :attr:`elementwise_affine` is set to ``True``.
        The values are initialized to 1.
    bias:   the learnable bias of the module of shape
            :math:`\text{normalized\_shape}` when :attr:`elementwise_affine` is set to ``True``.
            The values are initialized to 0.

Shape:
    - Input: :math:`(N, *)`
    - Output: :math:`(N, *)` (same shape as input)

Examples::

    >>> # NLP Example
    >>> batch, sentence_length, embedding_dim = 20, 5, 10
    >>> embedding = torch.randn(batch, sentence_length, embedding_dim)
    >>> layer_norm = nn.LayerNorm(embedding_dim)
    >>> # Activate module
    >>> layer_norm(embedding)
    >>>
    >>> # Image Example
    >>> N, C, H, W = 20, 5, 10, 10
    >>> input = torch.randn(N, C, H, W)
    >>> # Normalize over the last three dimensions (i.e. the channel and spatial dimensions)
    >>> # as shown in the image below
    >>> layer_norm = nn.LayerNorm([C, H, W])
    >>> output = layer_norm(input)

.. image:: ../_static/img/nn/layer_norm.jpg
    :scale: 50 %

normalized_shapeepselementwise_affine.rR   rS   rT   Nbiasr   c                 "  > XVS.n[         TU ]  5         [        U[        R                  5      (       a  U4n[        U5      U l        X l        X0l        U R                  (       ay  [        [        R                  " U R                  40 UD65      U l        U(       a0  [        [        R                  " U R                  40 UD65      U l        O7U R                  SS 5        O$U R                  SS 5        U R                  SS 5        U R                  5         g )NdevicedtyperU   weight)r   r   
isinstancenumbersIntegraltuplerR   rS   rT   r   torchemptyrZ   rU   register_parameterreset_parameters)	r   rR   rS   rT   rU   rX   rY   factory_kwargsr   s	           r   r   LayerNorm.__init__   s     %+;&(8(899 02 %&6 7"4""#D11D^DDK %KK 5 5HH	 ''5##Hd3##FD1r!   c                     U R                   (       aO  [        R                  " U R                  5        U R                  b!  [        R
                  " U R                  5        g g g r   )rT   r   ones_rZ   rU   zeros_r1   s    r   rb   LayerNorm.reset_parameters   s?    ""JJt{{#yy$DII& % #r!   r"   c                     [         R                  " XR                  U R                  U R                  U R
                  5      $ r   )r%   
layer_normrR   rZ   rU   rS   r'   s     r   r(   LayerNorm.forward   s.    ||(($++tyy$((
 	
r!   c                 Z    SR                   " S0 U R                  DSU R                  S L0D6$ )NzW{normalized_shape}, eps={eps}, elementwise_affine={elementwise_affine}, bias={use_bias}use_biasr-   r/   r0   rU   r1   s    r   r2   LayerNorm.extra_repr   <    $f% V'+}}V?CyyPT?TV	
r!   )rU   rT   rS   rR   rZ   )h㈵>TTNNr   N)r8   r9   r:   r;   r<   r=   r^   r>   r?   r@   bool_shape_tr   rb   r   r(   rN   r2   rA   rB   rC   s   @r   r   r   i   s    KZ FMCHo%	J
 #' "    !	 
   
   B'
V 
 


C 
 
r!   r   c                      ^  \ rS rSr% Sr/ SQr\\S'   \\S'   \\S'   \	\S'       SSS
.S\S\S\S\	S\	SS	4U 4S jjjjr
SS jrS\S\4S jrS\4S jrSrU =r$ )r      a  Applies Group Normalization over a mini-batch of inputs.

This layer implements the operation as described in
the paper `Group Normalization <https://arxiv.org/abs/1803.08494>`__

.. math::
    y = \frac{x - \mathrm{E}[x]}{ \sqrt{\mathrm{Var}[x] + \epsilon}} * \gamma + \beta

The input channels are separated into :attr:`num_groups` groups, each containing
``num_channels / num_groups`` channels. :attr:`num_channels` must be divisible by
:attr:`num_groups`. The mean and standard-deviation are calculated
separately over each group. :math:`\gamma` and :math:`\beta` are learnable
per-channel affine transform parameter vectors of size :attr:`num_channels` if
:attr:`affine` is ``True``.
The variance is calculated via the biased estimator, equivalent to
`torch.var(input, correction=0)`.

This layer uses statistics computed from input data in both training and
evaluation modes.

Args:
    num_groups (int): number of groups to separate the channels into
    num_channels (int): number of channels expected in input
    eps: a value added to the denominator for numerical stability. Default: 1e-5
    affine: a boolean value that when set to ``True``, this module
        has learnable per-channel affine parameters initialized to ones (for weights)
        and zeros (for biases). Default: ``True``
    bias: If set to ``False``, the layer will not learn an additive bias (only relevant if
        :attr:`affine` is ``True``). Default: ``True``

Shape:
    - Input: :math:`(N, C, *)` where :math:`C=\text{num\_channels}`
    - Output: :math:`(N, C, *)` (same shape as input)

Examples::

    >>> input = torch.randn(20, 6, 10, 10)
    >>> # Separate 6 channels into 3 groups
    >>> m = nn.GroupNorm(3, 6)
    >>> # Separate 6 channels into 6 groups (equivalent with InstanceNorm)
    >>> m = nn.GroupNorm(6, 6)
    >>> # Put all 6 channels into a single group (equivalent with LayerNorm)
    >>> m = nn.GroupNorm(1, 6)
    >>> # Activating the module
    >>> output = m(input)
)
num_groupsnum_channelsrS   affinerw   rx   rS   ry   TN)rU   rU   r   c                  > XVS.n[         T	U ]  5         X!-  S:w  a  [        SU SU S35      eXl        X l        X0l        X@l        U R                  (       ae  [        [        R                  " U40 UD65      U l
        U(       a&  [        [        R                  " U40 UD65      U l        O7U R                  SS 5        O$U R                  SS 5        U R                  SS 5        U R                  5         g )NrW   r   znum_channels (z#) must be divisible by num_groups ()rU   rZ   )r   r   
ValueErrorrw   rx   rS   ry   r   r_   r`   rZ   rU   ra   rb   )
r   rw   rx   rS   ry   rX   rY   rU   rc   r   s
            r   r   GroupNorm.__init__%  s     %+;$) .QR\Q]]^_  %(;;#EKK$O$OPDK%ekk,&Q.&QR	''5##Hd3##FD1r!   c                     U R                   (       aO  [        R                  " U R                  5        U R                  b!  [        R
                  " U R                  5        g g g r   )ry   r   rf   rZ   rU   rg   r1   s    r   rb   GroupNorm.reset_parametersG  s=    ;;JJt{{#yy$DII& % r!   r"   c                     [         R                  " XR                  U R                  U R                  U R
                  5      $ r   )r%   
group_normrw   rZ   rU   rS   r'   s     r   r(   GroupNorm.forwardM  s'    ||E??DKKDHHUUr!   c                 Z    SR                   " S0 U R                  DSU R                  S L0D6$ )NzI{num_groups}, {num_channels}, eps={eps}, affine={affine}, bias={use_bias}rm   r-   rn   r1   s    r   r2   GroupNorm.extra_reprP  rp   r!   )ry   rU   rS   rx   rw   rZ   )rq   TNNrr   )r8   r9   r:   r;   r<   r=   r>   r?   r@   rs   r   rb   r   r(   rN   r2   rA   rB   rC   s   @r   r   r      s    -^ DMO	JL            	  
       
     D'VV V V
C 
 
r!   r   c            	          ^  \ rS rSr% Sr/ SQr\\S4   \S'   \	S-  \S'   \
\S'       SS\S\	S-  S\
S	S4U 4S
 jjjrSS jrS\R                  S	\R                  4S jrS	\4S jrSrU =r$ )r   iW  a  Applies Root Mean Square Layer Normalization over a mini-batch of inputs.

This layer implements the operation as described in
the paper `Root Mean Square Layer Normalization <https://arxiv.org/pdf/1910.07467.pdf>`__

.. math::
    y_i = \frac{x_i}{\mathrm{RMS}(x)} * \gamma_i, \quad
    \text{where} \quad \text{RMS}(x) = \sqrt{\epsilon + \frac{1}{n} \sum_{i=1}^{n} x_i^2}

The RMS is taken over the last ``D`` dimensions, where ``D``
is the dimension of :attr:`normalized_shape`. For example, if :attr:`normalized_shape`
is ``(3, 5)`` (a 2-dimensional shape), the RMS is computed over
the last 2 dimensions of the input.

Args:
    normalized_shape (int or list or torch.Size): input shape from an expected input
        of size

        .. math::
            [* \times \text{normalized\_shape}[0] \times \text{normalized\_shape}[1]
                \times \ldots \times \text{normalized\_shape}[-1]]

        If a single integer is used, it is treated as a singleton list, and this module will
        normalize over the last dimension which is expected to be of that specific size.
    eps (float, optional): a value added to the denominator for numerical stability.
        If not specified, uses the machine epsilon of the computation (opmath) type:
        fp16/bf16 and fp32 inputs use ``torch.finfo(torch.float32).eps``, while fp64
        inputs use ``torch.finfo(torch.float64).eps``. Default: ``None``
    elementwise_affine: a boolean value that when set to ``True``, this module
        has learnable per-element affine parameters initialized to ones (for weights). Default: ``True``.

Shape:
    - Input: :math:`(N, *)`
    - Output: :math:`(N, *)` (same shape as input)

Examples::

    >>> rms_norm = nn.RMSNorm([2, 3])
    >>> input = torch.randn(2, 2, 3)
    >>> rms_norm(input)

rQ   .rR   NrS   rT   r   c                 l  > XES.n[         TU ]  5         [        U[        R                  5      (       a  U4n[        U5      U l        X l        X0l        U R                  (       a0  [        [        R                  " U R                  40 UD65      U l        OU R                  SS 5        U R                  5         g )NrW   rZ   )r   r   r[   r\   r]   r^   rR   rS   rT   r   r_   r`   rZ   ra   rb   )r   rR   rS   rT   rX   rY   rc   r   s          r   r   RMSNorm.__init__  s     %+;&(8(899 02 %&6 7"4""#D11D^DDK ##Hd3r!   c                 h    U R                   (       a!  [        R                  " U R                  5        gg)zC
Resets parameters based on their initialization used in __init__.
N)rT   r   rf   rZ   r1   s    r   rb   RMSNorm.reset_parameters  s"     ""JJt{{# #r!   xc                 n    [         R                  " XR                  U R                  U R                  5      $ r$   )r%   rms_normrR   rZ   rS   )r   r   s     r   r(   RMSNorm.forward  s%     zz!22DKKJJr!   c                 :    SR                   " S0 U R                  D6$ )r,   zF{normalized_shape}, eps={eps}, elementwise_affine={elementwise_affine}r-   r.   r1   s    r   r2   RMSNorm.extra_repr  s)    
66<f= N?C}}N	
r!   )rT   rS   rR   rZ   )NTNNrr   )r8   r9   r:   r;   r<   r=   r^   r>   r?   r@   rs   rt   r   rb   r_   r   r(   rN   r2   rA   rB   rC   s   @r   r   r   W  s    )V FMCHo%	
 !#' "  T\  !	  
   0$K K%,, K
C 
 
r!   r   )r\   r_   r   r   torch.nnr   r%   r   torch.nn.parameterr   
_functionsr	   rI   moduler
   __all__r   r>   listrt   r   r   r   r-   r!   r   <module>r      s       * ( 9  V7S 7StSF S8 c?T!C
 C
Le
 e
P]
f ]
r!   