
    3jK                        S SK JrJr  S SKJr  S SKrS SKJrJr  SSKJ	r
  SSKJr  SSKJrJr  SS	KJr  SS
KJr  SSKJrJrJrJrJr  SSKJrJr  SSKJr  SSKJ r   SSK!J"r"J#r#J$r$  SSK%J&r&J'r'  SSK(J)r)  SSK*J+r+  \#" SS9\ " S S\5      5       5       r, " S S\RZ                  5      r. " S S\RZ                  5      r/ " S S\RZ                  5      r0  SOS\RZ                  S\R                  S \R                  S!\R                  S"\R                  S-  S#\1S-  S$\1S%\\"   4S& jjr2 " S' S(\RZ                  5      r3 " S) S*\RZ                  5      r4 " S+ S,\RZ                  5      r5 " S- S.\5      r6\# " S/ S0\5      5       r7\# " S1 S2\75      5       r8 " S3 S4\RZ                  5      r9\#" S5S9 " S6 S7\75      5       r:\#" S8S9 " S9 S:\75      5       r; " S; S<\RZ                  5      r< " S= S>\RZ                  5      r= " S? S@\RZ                  5      r> " SA SB\RZ                  5      r? " SC SD\RZ                  5      r@ " SE SF\RZ                  5      rA " SG SH\RZ                  5      rB\# " SI SJ\75      5       rC\#" SKS9 " SL SM\\75      5       rD/ SNQrEg)P    )CallableIterable)	dataclassN)Tensornn   )initialization)ACT2FN)BackboneMixinfilter_output_hidden_states)create_bidirectional_mask)GradientCheckpointingLayer)BackboneOutputBaseModelOutputWithPoolingImageClassifierOutputMaskedLMOutputSemanticSegmenterOutput)ALL_ATTENTION_FUNCTIONSPreTrainedModel)Unpack)#compile_compatible_method_lru_cache)TransformersKwargsauto_docstring	torch_int)can_return_tuplemerge_with_config_defaults)capture_outputs   )
BeitConfigz-
    Class for outputs of [`BeitModel`].
    )custom_introc                       \ rS rSrSrSrg)BeitModelOutputWithPooling0   a2  
pooler_output (`torch.FloatTensor` of shape `(batch_size, hidden_size)`):
    Average of the last layer hidden states of the patch tokens (excluding the *[CLS]* token) if
    *config.use_mean_pooling* is set to True. If set to False, then the final hidden state of the *[CLS]* token
    will be returned.
 N)__name__
__module____qualname____firstlineno____doc____static_attributes__r$       `/home/wildlama/miniconda3/lib/python3.13/site-packages/transformers/models/beit/modeling_beit.pyr"   r"   0   s    r+   r"   c                   n   ^  \ rS rSrSrS\4U 4S jjrS\R                  S\R                  4S jr	Sr
U =r$ )	BeitPatchEmbeddings?   z
This class turns `pixel_values` of shape `(batch_size, num_channels, height, width)` into the initial
`hidden_states` (patch embeddings) of shape `(batch_size, seq_length, hidden_size)` to be consumed by a
Transformer.
configc                   > [         TU ]  5         UR                  nUR                  n[	        U[
        5      (       a  UOX"4n[	        U[
        5      (       a  UOX34nUS   US   -  US   US   -  -  U l        X l        X0l        UR                  U l        [        R                  " UR                  UR                  X3S9U l        g )Nr   r   kernel_sizestride)super__init__
image_size
patch_size
isinstancer   num_patchesnum_channelsr   Conv2dhidden_size
projection)selfr0   r7   r8   	__class__s       r,   r6   BeitPatchEmbeddings.__init__F   s    &&
&&
#-j(#C#CZ*Ia
#-j(#C#CZ*Ia
&qMZ]:z!}PZ[\P]?]^$$"//))F$7$79K9KYcwr+   pixel_valuesreturnc                     UR                   S   nX R                  :w  a  [        SU R                   SU S35      eU R                  U5      R	                  S5      R                  SS5      $ )Nr   zoMake sure that the channel dimension of the pixel values match with the one set in the configuration. Expected z	 but got .   )shaper;   
ValueErrorr>   flatten	transpose)r?   rB   r;   s      r,   forwardBeitPatchEmbeddings.forwardS   ss    #))!,,,,!../yaI  |,44Q7AA!QGGr+   )r7   r;   r:   r8   r>   )r%   r&   r'   r(   r)   r   r6   torchr   rK   r*   __classcell__r@   s   @r,   r.   r.   ?   s:    xz xHELL HU\\ H Hr+   r.   c                      ^  \ rS rSrSrS\SS4U 4S jjrS\R                  S\	S	\	S\R                  4S
 jr
 SS\R                  S\R                  S-  S\R                  4S jjrSrU =r$ )BeitEmbeddings]   zZ
Construct the CLS token, position and patch embeddings. Optionally, also the mask token.
r0   rC   Nc                   > [         TU ]  5         [        R                  " [        R
                  " SSUR                  5      5      U l        UR                  (       a6  [        R                  " [        R
                  " SSUR                  5      5      OS U l	        [        U5      U l        UR                  U l        U R                  R                  nUR                  (       a9  [        R                  " [        R
                  " SUS-   UR                  5      5      OS U l        [        R                   " UR"                  5      U l        g )Nr   )r5   r6   r   	ParameterrM   zerosr=   	cls_tokenuse_mask_token
mask_tokenr.   patch_embeddingsr8   r:    use_absolute_position_embeddingsposition_embeddingsDropouthidden_dropout_probdropout)r?   r0   r:   r@   s      r,   r6   BeitEmbeddings.__init__b   s    ekk!Q8J8J&KLQWQfQf",,u{{1a9K9K'LMlp 3F ; ++++77 66 LLQa9K9KLM 	 
 zz&"<"<=r+   
embeddingsheightwidthc                    UR                   S   S-
  nU R                  R                   S   S-
  n[        R                  R	                  5       (       d  XE:X  a  X#:X  a  U R                  $ U R                  SS2SS24   nU R                  SS2SS24   nUR                   S   nX R
                  -  n	X0R
                  -  n
[        US-  5      nUR                  SXU5      nUR                  SSSS5      n[        R                  R                  UX4SS	S
9nUR                  SSSS5      R                  SSU5      n[        R                  " Xg4SS9$ )a  
This method allows to interpolate the pre-trained position encodings, to be able to use the model on higher resolution
images. This method is also adapted to support torch.jit tracing.

Adapted from:
- https://github.com/facebookresearch/dino/blob/de9ee3df6cf39fac952ab558447af1fa1365362a/vision_transformer.py#L174-L194, and
- https://github.com/facebookresearch/dinov2/blob/e1277af2ba9496fbadf7aec6eba56e8d882d1e35/dinov2/models/vision_transformer.py#L179-L211
r   Ng      ?r   r   rF   bicubicFsizemodealign_cornersdim)rG   r[   rM   jit
is_tracingr8   r   reshapepermuter   
functionalinterpolateviewcat)r?   r`   ra   rb   r:   num_positionsclass_pos_embedpatch_pos_embedrk   
new_height	new_widthsqrt_num_positionss               r,   interpolate_pos_encoding'BeitEmbeddings.interpolate_pos_encodingq   sS    !&&q)A-0066q9A= yy##%%+*F6?+++221bqb59221ab59r".
__,	&}c'9:)11!5G]`a)11!Q1=--33(	 4 
 *11!Q1=BB1b#Nyy/;CCr+   rB   bool_masked_posc                    UR                   u    p4nU R                  U5      nUR                  5       u  pxnUbI  U R                  R	                  XxS5      n	UR                  S5      R                  U	5      n
USU
-
  -  X-  -   nU R                  R	                  USS5      n[        R                  " X4SS9nU R                  b  X`R                  XdU5      -   nU R                  U5      nU$ Nrd   r   rj   )rG   rY   rg   rX   expand	unsqueezetype_asrV   rM   rs   r[   rz   r^   )r?   rB   r|   _ra   rb   r`   
batch_sizeseq_lenmask_tokensmask
cls_tokenss               r,   rK   BeitEmbeddings.forward   s    
 +001e**<8
!+!2
Q&//00bIK",,R088ED#q4x0;3EEJ^^**:r2>
YY
7Q?
##/#&C&CJX]&^^J\\*-
r+   )rV   r^   rX   rY   r8   r[   N)r%   r&   r'   r(   r)   r   r6   rM   r   intrz   
BoolTensorrK   r*   rN   rO   s   @r,   rQ   rQ   ]   s    >z >d >&D5<< &D &DUX &D]b]i]i &DV 48ll ))D0 
	 r+   rQ   c                      ^  \ rS rSrS\SS4U 4S jjr\\" SS9S\\	\	4   S\
R                  4S	 j5       5       rSS
\S\
R                  4S jjrSrU =r$ )BeitRelativePositionBias   r0   rC   Nc                   > [         TU ]  5         UR                  n[        U[        [
        45      (       d  X"4nUS   UR                  -  US   UR                  -  4U l        SU R                  S   -  S-
  SU R                  S   -  S-
  -  S-   U l        [        R                  " [        R                  " U R                  UR                  5      5      U l        g Nr   r   rF   r   )r5   r6   r7   r9   tuplelistr8   window_sizenum_relative_distancer   rT   rM   rU   num_attention_headsrelative_position_bias_table)r?   r0   r7   r@   s      r,   r6   !BeitRelativePositionBias.__init__   s    &&
*udm44$1J&qMV->->>
1QWQbQb@bc&'$*:*:1*=&=&Aa$JZJZ[\J]F]`aFa%bef%f",.LLKK22F4N4NO-
)r+   
   )maxsizer   c                    SU S   -  S-
  SU S   -  S-
  -  S-   nU S   U S   -  n[         R                  " [         R                  " [         R                  " [         R                  " U S   5      [         R                  " U S   5      SS95      SS9nUSS2SS2S4   USS2SSS24   -
  R                  SSS5      R                  5       nUSS2SS2S4==   U S   S-
  -  ss'   USS2SS2S4==   U S   S-
  -  ss'   USS2SS2S4==   SU S   -  S-
  -  ss'   [         R                  " US-   4S-  UR                  S	9nUR                  S
5      USS2SS24'   US-
  USSS24'   US-
  USS2S4'   US-
  US'   U$ )z
This method creates the relative position index, modified to support arbitrary window sizes,
as introduced in [MiDaS v3.1](https://huggingface.co/papers/2307.14460).
rF   r   r   r   ij)indexing)	start_dimN)rg   dtyperd   )r   r   )
rM   rI   stackmeshgridarangero   
contiguousrU   r   sum)r   r   window_areacoords_flattenrelative_coordsrelative_position_indexs         r,    generate_relative_position_index9BeitRelativePositionBias.generate_relative_position_index   s    "#[^!3a!7AA<NQR<R SVW W!!n{1~5 KKu||KN'CU\\R]^_R`Ealpqr
 *!Q*5q$PQz8RR[[\]_`bcdooq1a KNQ$66 1a KNQ$66 1a AA$6$:: "'++K!O3E3IQ`QfQf"g*9*=*=b*AAB')>)B12&)>)BA&(=(A%&&r+   rz   c                    SU R                   S   -  S-
  nSU R                   S   -  S-
  nSUS   -  S-
  nSUS   -  S-
  nU R                  nU R                  n	Xg-  S-   n
USU	S-
   nUR                  SXTS5      R	                  SSSS5      n[
        R                  R                  U[        U5      [        U5      4SS9nUR	                  SSSS5      R                  U
S-
  S5      n[        R                  " XU	S-
  S /5      nU R                  U5      nXR                  S5         nUR                  US   US   -  S-   US   US   -  S-   S5      nUR	                  SSS5      R                  5       nU(       a?  [
        R                  R                  UR                  S5      X34SS	S
9R                  S5      nUR                  S5      $ )ze
Modification of timm.models.beit.py: Attention._get_rel_pos_bias to support arbitrary window sizes.
rF   r   r   r   Nrd   bilinear)rg   rh   Frf   )r   r   r   rn   ro   r   rp   rq   r   rM   rs   r   rr   r   r   squeeze)r?   r   rz   dim_size
old_height	old_widthrw   rx    old_relative_position_bias_tableold_num_relative_distancenew_num_relative_distanceold_sub_tablenew_sub_table new_relative_position_bias_tabler   relative_position_biass                   r,   rK    BeitRelativePositionBias.forward   s!    ))!,,q0
((++a/	Q'!+
A&*	+/+L+L($($>$>!$.$:Q$>!89X;TWX;XY%--aKSSTUWXZ[]^_11:!6	)8L MT^ 2 
 &--aAq9AAB[^_B_acd+099=VYZ=Z=\]^,
( #'"G"G"T!AB^B^_aBb!c "8!<!<N[^+a/Q+a.1PST1TVX"
 "8!?!?1a!H!S!S!U#%']]%>%>&003)#	 &? &
 gaj # &//22r+   )r   r   r   )FN)r%   r&   r'   r(   r   r6   staticmethodr   r   r   rM   r   r   boolrK   r*   rN   rO   s   @r,   r   r      sp    	
z 	
d 	
 (4'eCHo '%,, ' 5 '4-3T -3]b]i]i -3 -3r+   r   modulequerykeyvalueattention_maskscalingr^   kwargsc                    Uc  UR                  S5      S-  n[        R                  " XR                  SS5      5      U-  nUb  X-   n[        R
                  R                  US[        R                  S9R                  UR                  5      n[        R
                  R                  XU R                  S9n[        R                  " X5      n	U	R                  SS5      R                  5       n	X4$ )Nrd         rF   r   )rk   r   )ptrainingr   )rg   rM   matmulrJ   r   rp   softmaxfloat32tor   r^   r   r   )
r   r   r   r   r   r   r^   r   attn_weightsattn_outputs
             r,   eager_attention_forwardr     s     **R.D( <<}}Q':;gEL!#4==((2U]](SVVW\WbWbcL==((6??([L,,|3K''1-88:K$$r+   c                      ^  \ rS rSrS\4U 4S jjr SS\R                  S\R                  S-  S\\	   S\
\R                  \R                  4   4S	 jjrS
rU =r$ )BeitAttentioni(  r0   c                   > [         TU ]  5         Xl        UR                  U l        [	        USUR
                  UR                  -  5      U l        UR                  U l        U R                  S-  U l	        SU l
        [        R                  " UR
                  UR                  U R                  -  5      U l        [        R                  " UR
                  UR                  U R                  -  SS9U l        [        R                  " UR
                  UR                  U R                  -  5      U l        [        R                  " UR                  U R                  -  UR
                  5      U l        g )Nhead_dimr   F)bias)r5   r6   r0   r   getattrr=   r   attention_probs_dropout_probattention_dropoutr   	is_causalr   Linearq_projk_projv_projo_projr?   r0   r@   s     r,   r6   BeitAttention.__init__)  s   #)#=#= 
F4F4F&JdJd4de!'!D!D}}d*ii 2 2F4N4NQUQ^Q^4^_ii 2 2F4N4NQUQ^Q^4^ejkii 2 2F4N4NQUQ^Q^4^_ii : :T]] JFL^L^_r+   Nhidden_statesr   r   rC   c                    UR                   S S n/ UQSPU R                  P7nU R                  U5      R                  U5      R	                  SS5      nU R                  U5      R                  U5      R	                  SS5      nU R                  U5      R                  U5      R	                  SS5      n[        R                  " U R                  R                  [        5      n	U	" U UUUU4U R                  (       d  SOU R                  U R                  S.UD6u  pU
R                  " / UQSP76 R!                  5       n
U R#                  U
5      n
X4$ )Nrd   r   rF           )r^   r   )rG   r   r   rr   rJ   r   r   r   get_interfacer0   _attn_implementationr   r   r   r   rn   r   r   )r?   r   r   r   input_shapehidden_shapequery_states
key_statesvalue_statesattention_interfacer   r   s               r,   rK   BeitAttention.forward6  sE    $))#2.88b8$--8{{=166|DNNqRST[[/44\BLLQPQR
{{=166|DNNqRST(?(M(MKK,,.E)
 %8	%
  $}}C$2H2HLL	%
 	%
! "));;;;FFHkk+.((r+   )
r   r0   r   r   r   r   r   r   r   r   r   )r%   r&   r'   r(   r   r6   rM   r   r   r   r   rK   r*   rN   rO   s   @r,   r   r   (  sk    `z `  /3)||) t+) +,	)
 
u||U\\)	*) )r+   r   c                   j   ^  \ rS rSrS\4U 4S jjrS\R                  S\R                  4S jrSr	U =r
$ )BeitMLPiX  r0   c                   > [         TU ]  5         Xl        [        UR                     U l        [        R                  " UR                  UR                  5      U l
        [        R                  " UR                  UR                  5      U l        g r   )r5   r6   r0   r
   
hidden_actactivation_fnr   r   r=   intermediate_sizefc1fc2r   s     r,   r6   BeitMLP.__init__Y  sb    #F$5$5699V//1I1IJ99V55v7I7IJr+   r   rC   c                 l    U R                  U5      nU R                  U5      nU R                  U5      nU$ r   )r   r   r   r?   r   s     r,   rK   BeitMLP.forward`  s4    /**=9/r+   )r   r0   r   r   r%   r&   r'   r(   r   r6   rM   r   rK   r*   rN   rO   s   @r,   r   r   X  s1    Kz KU\\ ell  r+   r   c                      ^  \ rS rSrSrSS\SS4U 4S jjjrS\R                  S\R                  4S jr	S\
4S	 jrS
rU =r$ )BeitDropPathih  zStochastic depth (DropPath) per sample, for residual blocks.

Identity when ``drop_prob`` is 0 or outside training. See `Deep Networks with Stochastic Depth
<https://arxiv.org/abs/1603.09382>`_.
	drop_probrC   Nc                 .   > [         TU ]  5         Xl        g r   )r5   r6   r   )r?   r   r@   s     r,   r6   BeitDropPath.__init__o  s    "r+   r   c                 V   U R                   S:X  d  U R                  (       d  U$ SU R                   -
  nUR                  S   4SUR                  S-
  -  -   n[        R
                  " X1R                  UR                  S9n[        R                  " XB-   5      nUR                  U5      U-  $ )Nr   r   r   )r   )r   device)
r   r   rG   ndimrM   randr   r   floordiv)r?   r   	keep_probrG   random_tensors        r,   rK   BeitDropPath.forwards  s    >>S   &	$$Q')DM4F4F4J,KK

50C0CML`L`aM$=>  +m;;r+   c                      SU R                    3$ )Nzp=r   r?   s    r,   
extra_reprBeitDropPath.extra_repr|  s    DNN#$$r+   r  r   )r%   r&   r'   r(   r)   floatr6   rM   r   rK   strr  r*   rN   rO   s   @r,   r   r   h  sL    #% #$ # #<U\\ <ell <%C % %r+   r   c                      ^  \ rS rSrSrSS\S\4U 4S jjjr   SS\R                  S\R                  S-  S	\
S
\\\4   S-  S\\   S\R                  4S jjrSrU =r$ )	BeitLayeri  z?This corresponds to the Block class in the timm implementation.r0   drop_path_ratec                 R  > [         TU ]  5         [        U5      U l        [        R
                  " UR                  UR                  S9U l        [        R
                  " UR                  UR                  S9U l	        [        U5      U l        [        R                  " UR                  5      U l        UR                  U l        US:  a  [!        U5      O[        R"                  " 5       U l        UR&                  nUS:  a6  [        R(                  " U[*        R,                  " UR                  5      -  SS9OSU l        US:  a6  [        R(                  " U[*        R,                  " UR                  5      -  SS9OSU l        UR2                  (       a  [5        U5      U l        g S U l        g )Nepsr   r   T)requires_gradg      ?)r5   r6   r   	attentionr   	LayerNormr=   layer_norm_epslayernorm_beforelayernorm_afterr   mlpr\   r]   r^   r8   r   Identity	drop_pathlayer_scale_init_valuerT   rM   oneslambda_1lambda_2use_relative_position_biasr   r   )r?   r0   r  init_valuesr@   s       r,   r6   BeitLayer.__init__  s@   &v. "V-?-?VEZEZ [!||F,>,>FDYDYZ6?zz&"<"<= ++9G#9Mn5SUS^S^S`33^ilm^mBLLuzz&2D2D'EEUYZsv 	 _jlm^mBLLuzz&2D2D'EEUYZsv 	 KQJkJk&>v&F#qu#r+   Nr   r   rz   
resolutionr   rC   c                    U R                   bF  Uu  pgX`R                  -  XpR                  -  4nU R                  XUR                  S   S9n	Ub  X-   OU	nUn
U R                  U5      nU R                  " U4SU0UD6u  pU R                  U5      nU R                  U-  nU R                  U5      U
-   nUn
U R                  U5      nU R                  U5      nU R                  U5      nU R                  U-  nU R                  U5      U
-   nU$ )Nr   )r   r   )r   r8   rG   r  r  r^   r  r  r  r  r  )r?   r   r   rz   r!  r   ra   rb   r   r   residualr   s               r,   rK   BeitLayer.forward  s6    &&2&MF!__4e6NOK%)%@%@@S@STU@V &A &" <J;U&7[q 
 !--m<>>
)
 

 ]35}5@ !,,];/]35}5@r+   )
r  r  r^   r  r  r  r  r  r8   r   r  NFN)r%   r&   r'   r(   r)   r   r	  r6   rM   r   r   r   r   r   r   rK   r*   rN   rO   s   @r,   r  r    s    Ivz v5 v v, /3).-1&||& t+& #'	&
 #s(Od*& +,& 
& &r+   r  c                      ^  \ rS rSr% \\S'   SrSrSrSr	S/r
SrSrSrSrSr\\S	.rS
rS/r\R,                  " 5       U 4S j5       rSrU =r$ )BeitPreTrainedModeli  r0   beitrB   )imageTr  F)r   
attentionsrY   z.*relative_position_index.*c                   > [         TU ]  U5        [        U[        5      (       a|  [        R
                  " UR                  5        UR                  b   [        R
                  " UR                  5        UR                  b!  [        R
                  " UR                  5        gg[        U[        5      (       a!  [        R
                  " UR                  5        g[        U[        5      (       a  [        UR                  [        R                  5      (       ak  [        R                  " UR                  U R                   R"                  5        [        R                  " UR$                  U R                   R"                  5        ggg)zInitialize the weightsN)r5   _init_weightsr9   rQ   initzeros_rV   rX   r[   r   r   r  r  r   rT   	constant_r0   r  r  )r?   r   r@   s     r,   r,  !BeitPreTrainedModel._init_weights  s     	f%fn--KK(()  ,F--.))5F667 6 899KK;;<	**&//2<<88v0R0RSv0R0RS 9 +r+   r$   )r%   r&   r'   r(   r   __annotations__base_model_prefixmain_input_nameinput_modalitiessupports_gradient_checkpointing_no_split_modules_supports_sdpa_supports_flash_attn_supports_flex_attn_supports_attention_backend_can_compile_fullgraphr  r   _can_record_outputs_input_embed_layer"_keys_to_ignore_on_load_unexpectedrM   no_gradr,  r*   rN   rO   s   @r,   r'  r'    s~    $O!&*#$N "&!"# ,*H)I&
]]_T Tr+   r'  c                      ^  \ rS rSrSS\S\SS4U 4S jjjr\\" SS9\	   SS	\
R                  S
\
R                  S-  S\S\
R                  S-  S\\   S\4S jj5       5       5       rSrU =r$ )	BeitModeli  r0   add_pooling_layerrC   Nc           
        > [         TU ]  U5        Xl        [        U5      U l        UR
                  (       a  [        U5      OSU l        [        UR                  5       Vs/ s H+  o1R                  U-  [        UR                  S-
  S5      -  PM-     nn[        R                  " U Vs/ s H  n[        XS9PM     sn5      U l        UR                   (       a  [        R"                  " 5       O([        R$                  " UR&                  UR(                  S9U l        U(       a  [-        U5      OSU l        U R1                  5         gs  snf s  snf )z^
add_pooling_layer (bool, *optional*, defaults to `True`):
    Whether to add a pooling layer
Nr   )r  r  )r5   r6   r0   rQ   r`   !use_shared_relative_position_biasr   shared_position_biasrangenum_hidden_layersr  maxr   
ModuleListr  layersuse_mean_poolingr  r  r=   r  	layernorm
BeitPoolerpooler	post_init)r?   r0   rB  idrop_path_ratesrr@   s         r,   r6   BeitModel.__init__  s   
 	 (0060X0X$V,^b 	! W\\b\t\tVu
VuQR!!A%F,D,Dq,H!(LLVu 	 
 mmRa$bRaQYv%HRa$bc $44BKKM",,vGYGY_e_t_t:u 	 ->j(4 	
 %cs   !2E)EF)tie_last_hidden_statesrB   r|   rz   r   r   c                    U R                  XS9nUR                  SS n[        U R                  UUS9nU R                  bZ  Uu  pXR                  R
                  -  XR                  R
                  -  4n
U R	                  XUR                  S   S9nUb  X-   OUnUnU R                   H  nU" U4UUUS.UD6nM     U R                  U5      nU R                  b  U R                  U5      OSn[        XS9$ )	z
bool_masked_pos (`torch.BoolTensor` of shape `(batch_size, num_patches)`, *optional*):
    Boolean masked positions. Indicates which patches are masked (1) and which aren't (0).
)r|   rF   N)r0   inputs_embedsr   r   )rz   r   )r   rz   r!  )last_hidden_statepooler_output)
r`   rG   r   r0   rE  r8   rJ  rL  rN  r"   )r?   rB   r|   rz   r   r   embedding_outputr!  ra   rb   r   shared_relative_position_biasr   layersequence_outputpooled_outputs                   r,   rK   BeitModel.forward   s'     ??<?Y!''+
2;;*)
 $$0&MF![[%;%;;UkkF\F\=\]K,0,E,EYiYoYopqYr -F -)
 "- .>2  )[[E!-)A%	
 M ! ..78<8OO4UY)Oiir+   )r0   r`   rL  rJ  rN  rE  )Tr%  )r%   r&   r'   r(   r   r   r6   r   r   r   rM   r   r   r   r   r"   rK   r*   rN   rO   s   @r,   rA  rA    s    z d d  2  E2 48)..2-jll-j ))D0-j #'	-j
 t+-j +,-j 
$-j  3  -jr+   rA  c                   n   ^  \ rS rSrS\SS4U 4S jjrS\R                  S\R                  4S jrSr	U =r
$ )	rM  i3  r0   rC   Nc                    > [         TU ]  5         UR                  (       a/  [        R                  " UR
                  UR                  S9U l        g S U l        g )Nr  )r5   r6   rK  r   r  r=   r  rL  r   s     r,   r6   BeitPooler.__init__4  sA    KQKbKbBLL++1F1FG 	hl 	r+   r   c                     U R                   b,  U R                  US S 2SS 2S S 24   R                  S5      5      $ US S 2S4   $ )Nr   r   )rL  meanr   s     r,   rK   BeitPooler.forward:  sD    BF..B\t~~mAqr1H5::1=>ubopqstptbuur+   )rL  r   rO   s   @r,   rM  rM  3  s:    
z 
d 
vU\\ vell v vr+   rM  a  
    Beit Model transformer with a 'language' modeling head on top. BEiT does masked image modeling by predicting
    visual tokens of a Vector-Quantize Variational Autoencoder (VQ-VAE), whereas other vision models like ViT and DeiT
    predict RGB pixel values. As a result, this class is incompatible with [`AutoModelForMaskedImageModeling`], so you
    will need to use [`BeitForMaskedImageModeling`] directly if you wish to do masked image modeling with BEiT.
    c                      ^  \ rS rSrS\SS4U 4S jjrS r\\     SS\	R                  S-  S\	R                  S-  S	\	R                  S-  S
\S\	R                  S-  S\\   S\\-  4S jj5       5       rSrU =r$ )BeitForMaskedImageModelingi?  r0   rC   Nc                 @  > [         TU ]  U5        UR                  U l        [        USS9U l        [
        R                  " UR                  UR                  S9U l	        [
        R                  " UR                  UR                  5      U l        U R                  5         g )NFrB  r  )r5   r6   
num_labelsrA  r(  r   r  r=   r  rL  r   
vocab_sizelm_headrO  r   s     r,   r6   #BeitForMaskedImageModeling.__init__H  su      ++f>	 f&8&8f>S>STyy!3!3V5F5FG 	r+   c                     g r   r$   r  s    r,   get_output_embeddings0BeitForMaskedImageModeling.get_output_embeddingsU  s    r+   rB   r|   labelsrz   r   r   c                 (   U R                   " U4UUUS.UD6nUR                  nU R                  U5      nU R                  USS2SS24   5      n	Sn
Ub   [        R
                  " 5       nU" X   U5      n
[        U
U	UR                  UR                  S9$ )a  
bool_masked_pos (`torch.BoolTensor` of shape `(batch_size, num_patches)`):
    Boolean masked positions. Indicates which patches are masked (1) and which aren't (0).
labels (`torch.LongTensor` of shape `(batch_size,)`, *optional*):
    Labels for computing the image classification/regression loss. Indices should be in `[0, ...,
    config.num_labels - 1]`. If `config.num_labels == 1` a regression loss is computed (Mean-Square loss), If
    `config.num_labels > 1` a classification loss is computed (Cross-Entropy).

Examples:

```python
>>> from transformers import AutoImageProcessor, BeitForMaskedImageModeling
>>> import torch
>>> from PIL import Image
>>> import requests

>>> url = "http://images.cocodataset.org/val2017/000000039769.jpg"
>>> image = Image.open(requests.get(url, stream=True).raw)

>>> image_processor = AutoImageProcessor.from_pretrained("microsoft/beit-base-patch16-224-pt22k")
>>> model = BeitForMaskedImageModeling.from_pretrained("microsoft/beit-base-patch16-224-pt22k")

>>> num_patches = (model.config.image_size // model.config.patch_size) ** 2
>>> pixel_values = image_processor(images=image, return_tensors="pt").pixel_values
>>> # create random boolean mask of shape (batch_size, num_patches)
>>> bool_masked_pos = torch.randint(low=0, high=2, size=(1, num_patches)).bool()

>>> outputs = model(pixel_values, bool_masked_pos=bool_masked_pos)
>>> loss, logits = outputs.loss, outputs.logits
>>> list(logits.shape)
[1, 196, 8192]
```)r|   rz   r   Nr   losslogitsr   r*  )	r(  rW  rL  rk  r   CrossEntropyLossr   r   r*  )r?   rB   r|   rp  rz   r   r   outputsr\  prediction_scoresmasked_lm_lossloss_fcts               r,   rK   "BeitForMaskedImageModeling.forwardX  s    X ))
+%=)	

 
 "33..9 LLAB)?@**,H%&7&H&QN$!//))	
 	
r+   )r(  rL  rk  ri  )NNNFN)r%   r&   r'   r(   r   r6   rn  r   r   rM   r   r   r   r   r   r   r   rK   r*   rN   rO   s   @r,   rf  rf  ?  s    z d   -137&*)..2@
llT)@
 ))D0@
 t#	@

 #'@
 t+@
 +,@
 
	@
  @
r+   rf  z
    Beit Model transformer with an image classification head on top (a linear layer on top of the average of the final
    hidden states of the patch tokens) e.g. for ImageNet.
    c                      ^  \ rS rSrS\SS4U 4S jjr\\   SS\R                  S-  S\R                  S-  S\
S	\\   S\\-  4
S
 jj5       5       rSrU =r$ )BeitForImageClassificationi  r0   rC   Nc                 .  > [         TU ]  U5        UR                  U l        [        USS9U l        UR                  S:  a+  [
        R                  " UR                  UR                  5      O[
        R                  " 5       U l	        U R                  5         g )NTrh  r   )r5   r6   ri  rA  r(  r   r   r=   r  
classifierrO  r   s     r,   r6   #BeitForImageClassification.__init__  ss      ++f=	 OUN_N_bcNc"))F$6$68I8IJikititiv 	r+   rB   rp  rz   r   c                     U R                   " U4SU0UD6nUR                  nU R                  U5      nSnUb  U R                  X'U R                  5      n[        UUUR                  UR                  S9$ )ab  
labels (`torch.LongTensor` of shape `(batch_size,)`, *optional*):
    Labels for computing the image classification/regression loss. Indices should be in `[0, ...,
    config.num_labels - 1]`. If `config.num_labels == 1` a regression loss is computed (Mean-Square loss), If
    `config.num_labels > 1` a classification loss is computed (Cross-Entropy).
rz   Nrr  )r(  rX  r~  loss_functionr0   r   r   r*  )	r?   rB   rp  rz   r   rv  r]  rt  rs  s	            r,   rK   "BeitForImageClassification.forward  s     ))
%=
 
  --/%%fdkkBD$!//))	
 	
r+   )r(  r~  ri  NNF)r%   r&   r'   r(   r   r6   r   r   rM   r   r   r   r   r   r   rK   r*   rN   rO   s   @r,   r|  r|    s    
z 
d 
  -1&*).	 
llT) 
 t# 
 #'	 

 +, 
 
&	& 
   
r+   r|  c                      ^  \ rS rSr       SS\S\S\\\\4   -  S\S\\\\4   -  \-  S\S\\\\4   -  S	\S
\4U 4S jjjrS\	R                  S\	R                  4S jrSrU =r$ )BeitConvLayeri  in_channelsout_channelsr3   r4   paddingr   dilationgroups
activationc
                    > [         T
U ]  5         [        R                  " UUUUUUUUS9U l        [        R
                  " U5      U l        U	b  [        U	   U l	        g [        R                  " 5       U l	        g )N)r  r  r3   r4   r  r  r  r   )
r5   r6   r   r<   convolutionBatchNorm2dnormalizationr
   r  r  )r?   r  r  r3   r4   r  r   r  r  r  r@   s             r,   r6   BeitConvLayer.__init__  si     	99#%#	
  ^^L90:0F&,BKKMr+   r   rC   c                 l    U R                  U5      nU R                  U5      nU R                  U5      nU$ r   )r  r  r  r   s     r,   rK   BeitConvLayer.forward  s6    ((7**=96r+   )r  r  r  )r   r   r   Fr   r   relu)r%   r&   r'   r(   r   r   r
  r   r6   rM   r   rK   r*   rN   rO   s   @r,   r  r    s    
 .//0*+ ZZ Z 5c?*	Z
 Z uS#X&,Z Z c3h'Z Z Z Z4U\\ ell  r+   r  c                      ^  \ rS rSrS\S\S\SS4U 4S jjrS\R                  S	\\\4   S\R                  4S
 jr	Sr
U =r$ )BeitPyramidPoolingBlocki  
pool_scaler  channelsrC   Nc                 v   > [         TU ]  5         [        R                  " U5      U l        [        X#SS9U l        g )Nr   r3   )r5   r6   r   AdaptiveAvgPool2dpoolingr  conv)r?   r  r  r  r@   s       r,   r6    BeitPyramidPoolingBlock.__init__  s.    ++J7!+QG	r+   inputrg   c                     U R                  U5      nU R                  U5      n[        R                  R	                  X2SSS9nU$ )Nr   Frf   )r  r  r   rp   rq   )r?   r  rg   hidden_states       r,   rK   BeitPyramidPoolingBlock.forward  s@    ||E*yy.}}00zin0or+   )r  r  )r%   r&   r'   r(   r   r6   rM   r   r   rK   r*   rN   rO   s   @r,   r  r    sX    H3 HS HC HD H
U\\ sCx U\\  r+   r  c                      ^  \ rS rSrSrS\\S4   S\S\SS4U 4S	 jjrS
\R                  S\
\R                     4S jrSrU =r$ )BeitPyramidPoolingModulei  aK  
Pyramid Pooling Module (PPM) used in PSPNet.

Args:
    pool_scales (tuple[int]): Pooling scales used in Pooling Pyramid
        Module.
    in_channels (int): Input channels.
    channels (int): Channels after modules, before conv_seg.

Based on OpenMMLab's implementation, found in https://github.com/open-mmlab/mmsegmentation.
pool_scales.r  r  rC   Nc                    > [         TU ]  5         Xl        X l        X0l        [
        R                  " U Vs/ s H  n[        XBUS9PM     sn5      U l        g s  snf )N)r  r  r  )	r5   r6   r  r  r  r   rI  r  blocks)r?   r  r  r  r  r@   s        r,   r6   !BeitPyramidPoolingModule.__init__  sY    && mm #."-J (:aij"-
s   Ar   c                 r    UR                  5       SS  nU R                   Vs/ s H  o3" XS9PM
     sn$ s  snf )NrF   )rg   )rg   r  )r?   r   original_sizeblocks       r,   rK    BeitPyramidPoolingModule.forward  s8    %**,QR0FJkkRkUm8kRRRs   4)r  r  r  r  )r%   r&   r'   r(   r)   r   r   r6   rM   r   r   rK   r*   rN   rO   s   @r,   r  r    s\    


E#s(O 

# 

QT 

Y] 

SU\\ Sd5<<6H S Sr+   r  c                      ^  \ rS rSrSrS\SS4U 4S jjrS\\R                     S\R                  4S jr
S	\\R                     S\R                  4S
 jrSrU =r$ )BeitUperHeadi"  z
Unified Perceptual Parsing for Scene Understanding. This head is the implementation of
[UPerNet](https://huggingface.co/papers/1807.10221).

Based on OpenMMLab's implementation, found in https://github.com/open-mmlab/mmsegmentation.
r0   rC   Nc           
        > [         TU ]  5         UR                  U l        UR                  /S-  U l        UR                  U l        [        R                  " U R
                  UR                  SS9U l	        [        U R                  U R                  S   U R
                  5      U l        [        U R                  S   [        U R                  5      U R
                  -  -   U R
                  SSS9U l        [        R                  " 5       U l        [        R                  " 5       U l        U R                  S S  Hi  nU R                   R%                  [        X R
                  SS95        U R"                  R%                  [        U R
                  U R
                  SSS95        Mk     [        [        U R                  5      U R
                  -  U R
                  SSS9U l        g )N   r   r  rd   r   r3   r  )r5   r6   r  r=   r  r  r   r<   ri  r~  r  psp_modulesr  lenpsp_bottleneckrI  lateral_convs	fpn_convsappendfpn_bottleneck)r?   r0   r  r@   s      r,   r6   BeitUperHead.__init__*  s|   !--"../!3**))DMM63D3DRST 4R MM

 ,R 3t'7'7#84==#HHMM	
  ]]_++CR0K%%mK\]&^_NN!!-t}}Z[ef"gh 1 ,  !DMM1MM	
r+   r   c                 |    US   n[         R                  " U/U R                  U5      QSS9nU R                  U5      $ r~   )rM   rs   r  r  )r?   r   r  s      r,   psp_forwardBeitUperHead.psp_forwardL  sA    $R(yy,!P1A1A,1O!PVWX""<00r+   encoder_hidden_statesc           	      &   / n[        U R                  U5       H  u  p4UR                  U" U5      5        M     UR                  U R                  U5      5        [	        U5      n[        US-
  SS5       HF  nX&S-
     R                  SS  nX&S-
     [        R                  R                  X&   USSS9-   X&S-
  '   MH     / n[        US-
  5       H)  nUR                  U R                  U   " X&   5      5        M+     UR                  US   5        [        US-
  SS5       H7  n[        R                  R                  X   US   R                  SS  SSS9X'   M9     [        R                  " USS9nU R                  U5      n	U R                  U	5      n	U	$ )	Nr   r   rd   rF   r   Frf   rj   )zipr  r  r  r  rF  rG   r   rp   rq   r  rM   rs   r  r~  )
r?   r  lateralslateral_convr  used_backbone_levelsrP  
prev_shapefpn_outsoutputs
             r,   rK   BeitUperHead.forwardQ  s   *-d.@.@BW*X&LOOL67 +Y 	(()>?@  #8}+a/B7A!a%..qr2J&1uo0I0I*:U 1J 1 HUO 8 +a/0AOODNN1-hk:; 1 	%+a/B7A--33(1+"3"3AB"7jX] 4 HK 8 99X1-$$X.(r+   )	r  r~  r  r  r  r  r  r  r  )r%   r&   r'   r(   r)   r   r6   r   rM   r   r  rK   r*   rN   rO   s   @r,   r  r  "  sa     
z  
d  
D1ell); 1 1
T%,,-? ELL  r+   r  c                      ^  \ rS rSrSr SS\S\S\S\\\\4   -  SS4
U 4S	 jjjrS
\	\
R                     S\
R                  4S jrSrU =r$ )BeitFCNHeadis  a  
Fully Convolution Networks for Semantic Segmentation. This head is implemented of
[FCNNet](https://huggingface.co/papers/1411.4038>).

Args:
    config (BeitConfig): Configuration.
    in_channels
    kernel_size (int): The kernel size for convs in the head. Default: 3.
    dilation (int): The dilation rate for convs in the head. Default: 1.


Based on OpenMMLab's implementation, found in https://github.com/open-mmlab/mmsegmentation.
r0   in_indexr3   r  rC   Nc                 &  > [         TU ]  5         UR                  U l        UR                  U l        UR                  U l        UR                  U l	        X l
        US-  U-  n[        R                  " 5       U l        U R                  S:  a  U R                  R                  [        U R                  U R
                  X5US95        [!        U R                  S-
  5       H=  nU R                  R                  [        U R
                  U R
                  UUUS95        M?     U R                  (       a4  [        U R                  U R
                  -   U R
                  X3S-  S9U l        [        R$                  " U R
                  UR&                  SS9U l        g )NrF   r   )r3   r  r  r   r  r  )r5   r6   r=   r  auxiliary_channelsr  auxiliary_num_convs	num_convsauxiliary_concat_inputconcat_inputr  r   rI  convsr  r  rF  conv_catr<   ri  r~  )r?   r0   r  r3   r  conv_paddingr   r@   s          r,   r6   BeitFCNHead.__init__  sD    	!--1133"99 #q(H4]]_
>>AJJ$$dmmmu
 4>>A-.

!!!$/ ,!) / )  4==0$--[qrbrDM ))DMM63D3DRSTr+   r  c                     XR                      nUnU R                   H  nU" U5      nM     U R                  (       a%  U R                  [        R
                  " X#/SS95      nU R                  U5      nU$ )Nr   rj   )r  r  r  r  rM   rs   r~  )r?   r  r#  r   r  s        r,   rK   BeitFCNHead.forward  se    (7 JJD /M  MM%))X4MST*UVM6r+   )r  r~  r  r  r  r  r  r  )rF   r   r   )r%   r&   r'   r(   r)   r   r   r   r6   r   rM   r   rK   r*   rN   rO   s   @r,   r  r  s  s     no!U !U,/!UBE!UUX[`adfiai[jUj!U	!U !UFT%,,-? ELL  r+   r  c            	       ~   ^  \ rS rSrSrSS\S\S\SS4U 4S jjjrS	\R                  S\R                  4S
 jr	Sr
U =r$ )BeitFPNUpBlocki  uE   4x upsampling block: ConvTranspose → BN → GELU → ConvTranspose.r=   r3   r4   rC   Nc                    > [         TU ]  5         [        R                  " XX#S9U l        [        R
                  " U5      U l        [        R                  " 5       U l        [        R                  " XX#S9U l	        g )Nr2   )
r5   r6   r   ConvTranspose2dconv_transpose1r  r  GELUr  conv_transpose2)r?   r=   r3   r4   r@   s       r,   r6   BeitFPNUpBlock.__init__  sX    !11+Xcs^^K8'')!11+Xcsr+   r   c                     U R                  U5      nU R                  U5      nU R                  U5      nU R                  U5      nU$ r   )r  r  r  r  r   s     r,   rK   BeitFPNUpBlock.forward  sF    ,,];**=96,,];r+   )r  r  r  r  )rF   rF   )r%   r&   r'   r(   r)   r   r6   rM   r   rK   r*   rN   rO   s   @r,   r  r    sS    OtC tc ts tSW t tU\\ ell  r+   r  c                      ^  \ rS rSrSrS\4U 4S jjrS\\R                  S4   S\\R                  S4   4S jr
S	rU =r$ )
BeitFPNNecki  z
4-level feature pyramid neck for BeiT. Produces x4 upsample, x2 upsample,
identity, and x2 downsample outputs from the four selected ViT feature maps.
r0   c                    > [         TU ]  5         [        UR                  5      U l        [
        R                  " UR                  UR                  SSS9U l        [
        R                  " SSS9U l	        g )NrF   r2   )
r5   r6   r  r=   fpn1r   r  fpn2	MaxPool2dfpn4r   s     r,   r6   BeitFPNNeck.__init__  sX    "6#5#56	&&v'9'96;M;M[\efg	LLQq9	r+   feature_maps.rC   c                     U R                  US   5      U R                  US   5      US   U R                  US   5      4$ r   r  r  r  )r?   r  s     r,   rK   BeitFPNNeck.forward  sC    IIl1o&IIl1o&OIIl1o&	
 	
r+   r  )r%   r&   r'   r(   r)   r   r6   r   rM   r   rK   r*   rN   rO   s   @r,   r  r    sI    
:z :
E%,,*;$< 
u||UXGXAY 
 
r+   r  c                      ^  \ rS rSrS\SS4U 4S jjr\\\   SS\	R                  S-  S\	R                  S-  S\S	\\   S\\-  4
S
 jj5       5       5       rSrU =r$ )BeitForSemanticSegmentationi  r0   rC   Nc                 f  > [         TU ]  U5        UR                  U l        [        USS9U l        [        U R                  R                  5      S:w  a  [        S5      e[        U5      U l
        [        U5      U l        UR                  (       a  [        U5      OS U l        U R!                  5         g )NFrh  r  zBeitForSemanticSegmentation requires config.out_indices to be a list of 4 integers, specifying which features to use from the backbone. One can use [3, 5, 7, 11] in case of a base-sized architecture.)r5   r6   ri  rA  r(  r  r0   out_indicesrH   r  fpnr  decode_headuse_auxiliary_headr  auxiliary_headrO  r   s     r,   r6   $BeitForSemanticSegmentation.__init__  s      ++f>	t{{&&'1,- 
 v& (/5;5N5Nk&1TX 	r+   rB   rp  rz   r   c                   ^^^^ Ub%  U R                   R                  S:X  a  [        S5      eU R                  " U4SU0UD6nUR                  mUR
                  u  mpgnXpR                   R                  -  mXR                   R                  -  m[        UUUU4S jU R                   R                   5       5      n	U R                  U	5      n	U R                  U	5      n
SnU R                  b  U R                  U	5      nSnUb;  U R                  U
UU R                   R                  UU R                   R                  S9n[        UU
UR                  UR                   S9$ )a  
labels (`torch.LongTensor` of shape `(batch_size, height, width)`, *optional*):
    Ground truth semantic segmentation maps for computing the loss. Indices should be in `[0, ...,
    config.num_labels - 1]`. If `config.num_labels > 1`, a classification loss is computed (Cross-Entropy).

Examples:

```python
>>> from transformers import AutoImageProcessor, BeitForSemanticSegmentation
>>> from PIL import Image
>>> import requests

>>> url = "http://images.cocodataset.org/val2017/000000039769.jpg"
>>> image = Image.open(requests.get(url, stream=True).raw)

>>> image_processor = AutoImageProcessor.from_pretrained("microsoft/beit-base-finetuned-ade-640-640")
>>> model = BeitForSemanticSegmentation.from_pretrained("microsoft/beit-base-finetuned-ade-640-640")

>>> inputs = image_processor(images=image, return_tensors="pt")
>>> outputs = model(**inputs)
>>> # logits are of shape (batch_size, num_labels, height, width)
>>> logits = outputs.logits
```Nr   z/The number of labels should be greater than onerz   c              3      >#    U  H8  nTUS -
     SS2S S24   R                  S S5      R                  TSTT5      v   M:     g7f)r   NrF   rd   )rJ   rn   ).0rP  r   r  patch_heightpatch_widths     r,   	<genexpr>6BeitForSemanticSegmentation.forward.<locals>.<genexpr>  sP      
, "!a%(AB/99!Q?GG
TVXdfqrr,s   A A)ignore_indexauxiliary_logitsauxiliary_loss_weightrr  )r0   ri  rH   r(  r   rG   r8   r   r  r  r  r  r  semantic_loss_ignore_indexr  r   r*  )r?   rB   rp  rz   r   rv  r   ra   rb   r  rt  r  rs  r   r  r  r  s                @@@@r,   rK   #BeitForSemanticSegmentation.forward  s`   B $++"8"8A"=NOO))
%=
 
 !( 5 5'3'9'9$
Au!7!77{{555  
[[,,
 
 xx-!!,/*#22<@%%![[CC!1&*kk&G&G & D '!//))	
 	
r+   )r  r(  r  r  ri  r  )r%   r&   r'   r(   r   r6   r   r   r   rM   r   r   r   r   r   r   rK   r*   rN   rO   s   @r,   r  r    s    z d *   -1&*).	G
llT)G
 t#G
 #'	G

 +,G
 
(	(G
  ! G
r+   r  zM
    BEiT backbone, to be used with frameworks like DETR and MaskFormer.
    c            	       b   ^  \ rS rSrU 4S jr\\\S\S\	\
   S\4S j5       5       5       rSrU =r$ )BeitBackbonei;  c                 F  > [         TU ]  U5        [        UR                  S-   5       Vs/ s H  o!R                  PM     snU l        [        USS9U l        UR                  (       a  [        U5      O[        R                  " 5       U l        U R                  5         g s  snf )Nr   Frh  )r5   r6   rF  rG  r=   num_featuresrA  r(  add_fpnr  r   r  r  rO  )r?   r0   r   r@   s      r,   r6   BeitBackbone.__init__A  sx     9>v?W?WZ[?[9\]9\A//9\]f>	*0..;v&bkkm 	 ^s   BrB   r   rC   c                 (   UR                   u  p4pVXPR                  R                  -  nX`R                  R                  -  nU R                  " U40 UD6n	U	R                  n
Sn[        U R                  U
5       Hi  u  pXR                  ;   d  M  U R                  R                  (       a3  USS2SS2SS24   nUR                  SS5      nUR                  USXx5      nX4-  nMk     U R                  U5      n[        UU	R                  U	R                  S9$ )a  
Examples:

```python
>>> from transformers import AutoImageProcessor, AutoBackbone
>>> import torch
>>> from PIL import Image
>>> import requests

>>> url = "http://images.cocodataset.org/val2017/000000039769.jpg"
>>> image = Image.open(requests.get(url, stream=True).raw)

>>> processor = AutoImageProcessor.from_pretrained("microsoft/beit-base-patch16-224")
>>> model = AutoBackbone.from_pretrained(
...     "microsoft/beit-base-patch16-224", out_features=["stage1", "stage2", "stage3", "stage4"]
... )

>>> inputs = processor(image, return_tensors="pt")

>>> outputs = model(**inputs)
>>> feature_maps = outputs.feature_maps
>>> list(feature_maps[-1].shape)
[1, 768, 14, 14]
```r$   Nr   rF   rd   )r  r   r*  )rG   r0   r8   r(  r   r  stage_namesout_featuresreshape_hidden_statesrJ   rn   r  r   r*  )r?   rB   r   r   r   ra   rb   r  r  rv  r   r  stager  s                 r,   rK   BeitBackbone.forwardK  s   @ (4'9'9$
v!7!77{{555))L3F3--#&t'7'7#GE)));;44#/12q#9L#/#9#9!Q#?L#/#7#7
B#bL/ $H xx-%!//))
 	
r+   )r(  r  r  )r%   r&   r'   r(   r6   r   r   r   r   r   r   r   rK   r*   rN   rO   s   @r,   r  r  ;  sN      3
3
 +,3
 
	3
  ! 3
r+   r  )r|  rf  r  rA  r'  r  )Nr   )Fcollections.abcr   r   dataclassesr   rM   r   r    r	   r-  activationsr
   backbone_utilsr   r   masking_utilsr   modeling_layersr   modeling_outputsr   r   r   r   r   modeling_utilsr   r   processing_utilsr   pytorch_utilsr   utilsr   r   r   utils.genericr   r   utils.output_capturingr   configuration_beitr   r"   Moduler.   rQ   r   r	  r   r   r   r   r  r'  rA  rM  rf  r|  r  r  r  r  r  r  r  r  r  __all__r$   r+   r,   <module>r     s  * / !   & ! H 6 9  G & @ B B I 5 * 
 !;  H")) H<SRYY SlV3ryy V3~ !%II%<<% 
% <<	%
 LL4'% T\% % '(%8-)BII -)`bii  %299 %0<* <~ "T/ "T "TJ Jj# Jj JjZ	v 	v S
!4 S
S
l /
!4 /
/
dBII D
bii 
Sryy S<N299 Nb:")) :zRYY $
")) 
* `
"5 `
 `
F 
A
="5 A

A
Hr+   