
    3j=                     V   S r SSKrSSKJr  SSKJr  SSKJr  SSKJ	r	J
r
  SSKJrJrJrJr  SS	KJr  SS
KJr  SSKJrJrJr  SSKJrJr  SSKJr  SSKJr  \R>                  " \ 5      r! " S S\RD                  5      r# " S S\RH                  5      r% " S S\RH                  5      r& " S S\RH                  5      r' " S S\RH                  5      r(\ " S S\5      5       r) " S S\)5      r*\ " S S\)5      5       r+\" S S!9 " S" S#\)5      5       r,\" S$S!9 " S% S&\	\)5      5       r-/ S'Qr.g)(zPyTorch ConvNext model.    N)nn   )initialization)ACT2FN)BackboneMixinfilter_output_hidden_states)BackboneOutputBaseModelOutputWithNoAttention(BaseModelOutputWithPoolingAndNoAttention$ImageClassifierOutputWithNoAttention)PreTrainedModel)Unpack)TransformersKwargsauto_docstringlogging)can_return_tuplemerge_with_config_defaults)capture_outputs   )ConvNextConfigc                   v   ^  \ rS rSrSrSSS.U 4S jjrS\R                  S\R                  4U 4S	 jjrS
r	U =r
$ )ConvNextLayerNorm'   a5  LayerNorm that supports two data formats: channels_last (default) or channels_first.
The ordering of the dimensions in the inputs. channels_last corresponds to inputs with shape (batch_size, height,
width, channels) while channels_first corresponds to inputs with shape (batch_size, channels, height, width).
ư>channels_lastepsdata_formatc                `   > [         TU ]  " U4SU0UD6  US;  a  [        SU 35      eX0l        g )Nr   )r   channels_firstzUnsupported data format: )super__init__NotImplementedErrorr   )selfnormalized_shaper   r   kwargs	__class__s        h/home/wildlama/miniconda3/lib/python3.13/site-packages/transformers/models/convnext/modeling_convnext.pyr"   ConvNextLayerNorm.__init__-   s=    )=s=f=AA%(A+&OPP&    featuresreturnc                    > U R                   S:X  a9  UR                  SSSS5      n[        TU ]  U5      nUR                  SSSS5      nU$ [        TU ]  U5      nU$ )zt
Args:
    features: Tensor of shape (batch_size, channels, height, width) OR (batch_size, height, width, channels)
r    r      r   r   )r   permuter!   forward)r$   r+   r'   s     r(   r0   ConvNextLayerNorm.forward3   sj    
 //''1a3Hwx0H''1a3H  wx0Hr*   r   __name__
__module____qualname____firstlineno____doc__r"   torchTensorr0   __static_attributes____classcell__r'   s   @r(   r   r   '   s9    
 15/ ' '   r*   r   c                   f   ^  \ rS rSrSrU 4S jrS\R                  S\R                  4S jr	Sr
U =r$ )ConvNextEmbeddingsA   zThis class is comparable to (and inspired by) the SwinEmbeddings class
found in src/transformers/models/swin/modeling_swin.py.
c                   > [         TU ]  5         [        R                  " UR                  UR
                  S   UR                  UR                  S9U l        [        UR
                  S   SSS9U l	        UR                  U l        g )Nr   kernel_sizestrider   r    r   )
r!   r"   r   Conv2dnum_channelshidden_sizes
patch_sizepatch_embeddingsr   	layernormr$   configr'   s     r(   r"   ConvNextEmbeddings.__init__F   sr     "		!4!4Q!7VEVEV_e_p_p!
 +6+>+>q+AtYij"//r*   pixel_valuesr,   c                     UR                   S   nX R                  :w  a  [        S5      eU R                  U5      nU R	                  U5      nU$ )Nr   zeMake sure that the channel dimension of the pixel values match with the one set in the configuration.)shaperF   
ValueErrorrI   rJ   )r$   rN   rF   
embeddingss       r(   r0   ConvNextEmbeddings.forwardN   sT    #))!,,,,w  **<8
^^J/
r*   )rJ   rF   rI   )r4   r5   r6   r7   r8   r"   r9   FloatTensorr:   r0   r;   r<   r=   s   @r(   r?   r?   A   s/    0E$5$5 %,,  r*   r?   c                      ^  \ rS rSrSrSS\SS4U 4S jjjrS\R                  S\R                  4S jr	S\
4S	 jrS
rU =r$ )ConvNextDropPathZ   zStochastic depth (DropPath) per sample, for residual blocks.

Identity when ``drop_prob`` is 0 or outside training. See `Deep Networks with Stochastic Depth
<https://arxiv.org/abs/1603.09382>`_.
	drop_probr,   Nc                 .   > [         TU ]  5         Xl        g N)r!   r"   rX   )r$   rX   r'   s     r(   r"   ConvNextDropPath.__init__a   s    "r*   hidden_statesc                 V   U R                   S:X  d  U R                  (       d  U$ SU R                   -
  nUR                  S   4SUR                  S-
  -  -   n[        R
                  " X1R                  UR                  S9n[        R                  " XB-   5      nUR                  U5      U-  $ )N        r   r   )r   )dtypedevice)
rX   trainingrP   ndimr9   randr_   r`   floordiv)r$   r\   	keep_probrP   random_tensors        r(   r0   ConvNextDropPath.forwarde   s    >>S   &	$$Q')DM4F4F4J,KK

50C0CML`L`aM$=>  +m;;r*   c                      SU R                    3$ )Nzp=rX   )r$   s    r(   
extra_reprConvNextDropPath.extra_reprn   s    DNN#$$r*   rj   )r^   )r4   r5   r6   r7   r8   floatr"   r9   r:   r0   strrk   r;   r<   r=   s   @r(   rV   rV   Z   sL    #% #$ # #<U\\ <ell <%C % %r*   rV   c                   j   ^  \ rS rSrSrSU 4S jjrS\R                  S\R                  4S jrSr	U =r
$ )	ConvNextLayerr   a  This corresponds to the `Block` class in the original implementation.

There are two equivalent implementations: [DwConv, LayerNorm (channels_first), Conv, GELU,1x1 Conv]; all in (N, C,
H, W) (2) [DwConv, Permute to (N, H, W, C), LayerNorm (channels_last), Linear, GELU, Linear]; Permute back

The authors used (2) as they find it slightly faster in PyTorch.

Args:
    config ([`ConvNextConfig`]): Model configuration class.
    dim (`int`): Number of input channels.
    drop_path (`float`): Stochastic depth rate. Default: 0.0.
c                    > [         TU ]  5         [        R                  " X"SSUS9U l        [        USS9U l        [        R                  " USU-  5      U l        [        UR                     U l        [        R                  " SU-  U5      U l        UR                  S:  a6  [        R                  " UR                  [        R                   " U5      -  SS	9OS U l        US
:  a  [%        U5      U l        g [        R&                  " 5       U l        g )N   r   )rC   paddinggroupsr   r      r   T)requires_gradr^   )r!   r"   r   rE   dwconvr   rJ   Linearpwconv1r   
hidden_actactpwconv2layer_scale_init_value	Parameterr9   oneslayer_scale_parameterrV   Identity	drop_path)r$   rL   dimr   r'   s       r(   r"   ConvNextLayer.__init__   s    iia3O*3D9yya#g.&++,yyS#. ,,q0 LL66CHX\] 	"
 9BC))4R[[]r*   r+   r,   c                 b   UnU R                  U5      nUR                  SSSS5      nU R                  U5      nU R                  U5      nU R	                  U5      nU R                  U5      nU R                  b  U R                  U-  nUR                  SSSS5      nX R                  U5      -   nU$ )Nr   r.   r   r   )ry   r/   rJ   r{   r}   r~   r   r   )r$   r+   residuals      r(   r0   ConvNextLayer.forward   s    ;;x(##Aq!Q/>>(+<<)88H%<<)%%111H<H##Aq!Q/nnX66r*   )r}   r   ry   r   rJ   r{   r~   )r   r3   r=   s   @r(   rp   rp   r   s.    [   r*   rp   c                   j   ^  \ rS rSrSrSU 4S jjrS\R                  S\R                  4S jrSr	U =r
$ )	ConvNextStage   a}  ConvNeXT stage, consisting of an optional downsampling layer + multiple residual blocks.

Args:
    config ([`ConvNextConfig`]): Model configuration class.
    in_channels (`int`): Number of input channels.
    out_channels (`int`): Number of output channels.
    depth (`int`): Number of residual blocks.
    drop_path_rates(`list[float]`): Stochastic depth rates for each layer.
c                   > [         T	U ]  5         X#:w  d  US:  a:  [        R                  " [	        USSS9[        R
                  " X#XES9/5      U l        O[        R                  " 5       U l        U=(       d    S/U-  n[        R                  " [        U5       Vs/ s H  n[        XXx   S9PM     sn5      U l	        g s  snf )Nr   r   r    r   rB   r^   )r   r   )
r!   r"   r   
ModuleListr   rE   downsampling_layerrangerp   layers)
r$   rL   in_channelsout_channelsrC   rD   depthdrop_path_ratesjr'   s
            r(   r"   ConvNextStage.__init__   s    &&1*&(mm%ktIYZIIk[`'D# ')mmoD#):cUU]mm\abg\hi\hWX]6?QR\hi
is   B>r+   r,   c                 r    U R                    H  nU" U5      nM     U R                   H  nU" U5      nM     U$ rZ   r   r   )r$   r+   layers      r(   r0   ConvNextStage.forward   s7    ,,EXH -[[EXH !r*   r   )r.   r.   r.   Nr3   r=   s   @r(   r   r      s-    
"   r*   r   c                   p   ^  \ rS rSr% \\S'   SrSrSrSS/r	\
R                  " 5       U 4S j5       rS	rU =r$ )
ConvNextPreTrainedModel   rL   convnextrN   )imagerp   r   c                    > [         TU ]  U5        [        U[        5      (       aD  UR                  b6  [
        R                  " UR                  U R                  R                  5        ggg)zInitialize the weightsN)	r!   _init_weights
isinstancerp   r   init	constant_rL   r   )r$   moduler'   s     r(   r   %ConvNextPreTrainedModel._init_weights   sS     	f%fm,,++7v;;T[[=_=_` 8 -r*    )r4   r5   r6   r7   r   __annotations__base_model_prefixmain_input_nameinput_modalities_no_split_modulesr9   no_gradr   r;   r<   r=   s   @r(   r   r      s?    "$O!(/:
]]_a ar*   r   c                      ^  \ rS rSrSrS\0rU 4S jr\\	" SS9S\
R                  S\\   S\4S j5       5       rS	rU =r$ )
ConvNextEncoder   r\   c           
      P  > [         TU ]  U5        [        R                  " 5       U l        [
        R                  " SUR                  [        UR                  5      SS9R                  UR                  5       Vs/ s H  nUR                  5       PM     nnUR                  S   n[        UR                  5       HT  nUR                  U   n[        UUUUS:  a  SOSUR                  U   X5   S9nU R                  R!                  U5        UnMV     U R#                  5         g s  snf )Nr   cpu)r`   r.   r   )r   r   rD   r   r   )r!   r"   r   r   stagesr9   linspacedrop_path_ratesumdepthssplittolistrG   r   
num_stagesr   append	post_init)	r$   rL   xr   prev_chsiout_chsstager'   s	           r(   r"   ConvNextEncoder.__init__   s    mmo ^^Av'<'<c&-->PY^_eeflfsfst
t HHJt 	 
 &&q)v(()A))!,G!$$EqqmmA& / 2E KKu%H * 	%
s   :D#F)tie_last_hidden_statesr&   r,   c                 J    U R                    H  nU" U5      nM     [        US9$ )N)last_hidden_state)r   r
   )r$   r\   r&   layer_modules       r(   r0   ConvNextEncoder.forward   s)     !KKL(7M ( .NNr*   )r   )r4   r5   r6   r7   r   r   _can_record_outputsr"   r   r   r9   r:   r   r   r
   r0   r;   r<   r=   s   @r(   r   r      sd    %O*M:.  E2O||O +,O 
(	O 3  Or*   r   c            	       x   ^  \ rS rSrU 4S jr\\ S	S\R                  S-  S\	\
   S\4S jj5       5       rSrU =r$ )
ConvNextModel   c                    > [         TU ]  U5        Xl        [        U5      U l        [        U5      U l        [        R                  " UR                  S   UR                  S9U l        U R                  5         g )Nrv   )r!   r"   rL   r?   rR   r   encoderr   	LayerNormrG   layer_norm_epsrJ   r   rK   s     r(   r"   ConvNextModel.__init__   s^     ,V4&v. f&9&9"&=6CXCXY 	r*   NrN   r&   r,   c                     Uc  [        S5      eU R                  U5      nU R                  " U40 UD6nUR                  nU R	                  UR                  SS/5      5      n[        UUUR                  S9$ )Nz You have to specify pixel_valuesr   )r   pooler_outputr\   )rQ   rR   r   r   rJ   meanr   r\   )r$   rN   r&   embedding_outputencoder_outputsr   pooled_outputs          r(   r0   ConvNextModel.forward	  s    
 ?@@??<8:>,,GW:b[a:b+== '8'='=r2h'GH7/')77
 	
r*   )rL   rR   r   rJ   rZ   )r4   r5   r6   r7   r"   r   r   r9   rT   r   r   r   r0   r;   r<   r=   s   @r(   r   r      sP     7;
!--4
GMN`Ga
	1
  
r*   r   z
    ConvNext Model with an image classification head on top (a linear layer on top of the pooled features), e.g. for
    ImageNet.
    )custom_introc            	          ^  \ rS rSrSrU 4S jr\\ S
S\R                  S-  S\R                  S-  S\4S jj5       5       rS	rU =r$ )ConvNextForImageClassificationi  Fc                 B  > [         TU ]  U5        UR                  U l        [        U5      U l        UR                  S:  a4  [
        R                  " UR                  S   UR                  5      U l        O[
        R                  " 5       U l        U R                  5         g )Nr   r   )r!   r"   
num_labelsr   r   r   rz   rG   
classifierr   r   rK   s     r(   r"   'ConvNextForImageClassification.__init__(  su      ++%f- q  ii(;(;B(?ARARSDO kkmDO 	r*   NrN   labelsr,   c                     U R                   " U40 UD6nUR                  nU R                  U5      nSnUb  U R                  X&U R                  S9n[        UUUR                  S9$ )ab  
labels (`torch.LongTensor` of shape `(batch_size,)`, *optional*):
    Labels for computing the image classification/regression loss. Indices should be in `[0, ...,
    config.num_labels - 1]`. If `config.num_labels == 1` a regression loss is computed (Mean-Square loss), If
    `config.num_labels > 1` a classification loss is computed (Cross-Entropy).
N)r   pooled_logitsrL   )losslogitsr\   )r   r   r   loss_functionrL   r   r\   )r$   rN   r   r&   outputsr   r   r   s           r(   r0   &ConvNextForImageClassification.forward7  su     =AMM,<aZ`<a--/%%VRVR]R]%^D3!//
 	
r*   )r   r   r   )NN)r4   r5   r6   r7   accepts_loss_kwargsr"   r   r   r9   rT   
LongTensorr   r0   r;   r<   r=   s   @r(   r   r     s^       _c
!--4
EJEUEUX\E\
	-
  
r*   r   zQ
    ConvNeXt backbone, to be used with frameworks like DETR and MaskFormer.
    c            	       z   ^  \ rS rSrSrU 4S jr\\\S\	R                  S\\   S\4S j5       5       5       rSrU =r$ )	ConvNextBackboneiQ  Fc                 l  > [         TU ]  U5        [        U5      U l        [	        U5      U l        UR                  S   /UR                  -   U l        0 n[        U R                  U R                  5       H  u  p4[        USS9X#'   M     [        R                  " U5      U l        U R                  5         g )Nr   r    r2   )r!   r"   r?   rR   r   r   rG   num_featureszipout_featureschannelsr   r   
ModuleDicthidden_states_normsr   )r$   rL   r   r   rF   r'   s        r(   r"   ConvNextBackbone.__init__Y  s     ,V4&v.#0034v7J7JJ !#&t'8'8$--#HE):<Ue)f& $I#%==1D#E  	r*   rN   r&   r,   c                 8   U R                  U5      nU R                  " U40 UD6nUR                  n/ n[        U R                  U5       H<  u  pxXpR
                  ;   d  M  U R                  U   " U5      nUR                  U5        M>     [        [        U5      US9$ )aA  
Examples:

```python
>>> from transformers import AutoImageProcessor, AutoBackbone
>>> import torch
>>> from PIL import Image
>>> import httpx
>>> from io import BytesIO

>>> url = "http://images.cocodataset.org/val2017/000000039769.jpg"
>>> with httpx.stream("GET", url) as response:
...     image = Image.open(BytesIO(response.read()))

>>> processor = AutoImageProcessor.from_pretrained("facebook/convnext-tiny-224")
>>> model = AutoBackbone.from_pretrained("facebook/convnext-tiny-224")

>>> inputs = processor(image, return_tensors="pt")
>>> outputs = model(**inputs)
```)feature_mapsr\   )
rR   r   r\   r   stage_namesr   r   r   r	   tuple)	r$   rN   r&   r   r   r\   r   r   hidden_states	            r(   r0   ConvNextBackbone.forwardi  s    8  ??<8:>,,GW:b[a:b'55#&t'7'7#GE)))#77>|L##L1 $H
 5+>m\\r*   )rR   r   r   r   )r4   r5   r6   r7   has_attentionsr"   r   r   r   r9   r:   r   r   r	   r0   r;   r<   r=   s   @r(   r   r   Q  s^     N   #]ll#] +,#] 
	#]  ! #]r*   r   )r   r   r   r   )/r8   r9   r    r   r   activationsr   backbone_utilsr   r   modeling_outputsr	   r
   r   r   modeling_utilsr   processing_utilsr   utilsr   r   r   utils.genericr   r   utils.output_capturingr   configuration_convnextr   
get_loggerr4   loggerr   r   Moduler?   rV   rp   r   r   r   r   r   r   __all__r   r*   r(   <module>r     sD      & ! H  . & @ @ I 5 2 
		H	% 4 2%ryy %0(BII (V!BII !H ao a a %O- %OP !
+ !
 !
H )
%< )
)
X 
9]}&= 9]
9]x mr*   