
    3j1H                        S SK JrJr  S SKrS SKJr  SSKJr  SSKJ	r	  SSK
JrJr  SSKJr  SS	KJr  SS
KJrJrJr  SSKJrJr  SSKJr  SSKJrJrJr  SSKJrJ r   SSK!J"r"  SSK#J$r$   " S S\RJ                  5      r& " S S\RJ                  5      r'  S0S\RJ                  S\RP                  S\RP                  S\RP                  S\RP                  S-  S\)S-  S\)S\\   4S jjr* " S S \RJ                  5      r+ " S! S"\RJ                  5      r, " S# S$\RJ                  5      r- " S% S&\5      r.\ " S' S(\5      5       r/\ " S) S*\/5      5       r0\" S+S,9 " S- S.\\/5      5       r1/ S/Qr2g)1    )CallableIterableN)nn   )initialization)ACT2FN)BackboneMixinfilter_output_hidden_states)create_bidirectional_mask)GradientCheckpointingLayer)BackboneOutputBaseModelOutputBaseModelOutputWithPooling)ALL_ATTENTION_FUNCTIONSPreTrainedModel)Unpack)TransformersKwargsauto_docstring
is_tracing)can_return_tuplemerge_with_config_defaults)capture_outputs   )PixioConfigc                   n   ^  \ rS rSrSrS\4U 4S jjrS\R                  S\R                  4S jr	Sr
U =r$ )	PixioPatchEmbeddings(   z
This class turns `pixel_values` of shape `(batch_size, num_channels, height, width)` into the initial
`hidden_states` (patch embeddings) of shape `(batch_size, seq_length, hidden_size)` to be consumed by a
Transformer.
configc                   > [         TU ]  5         UR                  nUR                  n[	        U[
        5      (       a  UOX"4n[	        U[
        5      (       a  UOX34nUS   US   -  US   US   -  -  U l        X l        X0l        UR                  U l        [        R                  " UR                  UR                  X3S9U l        g )Nr   r   )kernel_sizestride)super__init__
image_size
patch_size
isinstancer   num_patchesnum_channelsr   Conv2dhidden_size
projection)selfr   r$   r%   	__class__s       b/home/wildlama/miniconda3/lib/python3.13/site-packages/transformers/models/pixio/modeling_pixio.pyr#   PixioPatchEmbeddings.__init__/   s    &&
&&
#-j(#C#CZ*Ia
#-j(#C#CZ*Ia
&qMZ]:z!}PZ[\P]?]^$$"//))F$7$79K9KYcw    pixel_valuesreturnc                     UR                   S   nX R                  :w  a  [        SU R                   SU S35      eU R                  U5      R	                  S5      R                  SS5      $ )Nr   zoMake sure that the channel dimension of the pixel values match with the one set in the configuration. Expected z	 but got .   )shaper(   
ValueErrorr+   flatten	transpose)r,   r1   r(   s      r.   forwardPixioPatchEmbeddings.forward<   ss    #))!,,,,!../yaI  |,44Q7AA!QGGr0   )r$   r(   r'   r%   r+   )__name__
__module____qualname____firstlineno____doc__r   r#   torchTensorr:   __static_attributes____classcell__r-   s   @r.   r   r   (   s:    x{ xHELL HU\\ H Hr0   r   c                      ^  \ rS rSrSrS\SS4U 4S jjrS\R                  S\	S	\	S\R                  4S
 jr
S\R                  S\R                  4S jrSrU =r$ )PixioEmbeddingsF   z:
Construct the CLS tokens, position and patch embeddings.
r   r2   Nc                 .  > [         TU ]  5         [        R                  " [        R
                  " SUR                  UR                  5      5      U l        S U l	        [        U5      U l        U R                  R                  n[        R                  " [        R
                  " SX!R                  -   UR                  5      5      U l        [        R                  " UR                  5      U l        UR                  U l        UR"                  U l        Xl        g )Nr   )r"   r#   r   	ParameterrA   randnn_cls_tokensr*   	cls_token
mask_tokenr   patch_embeddingsr'   position_embeddingsDropouthidden_dropout_probdropoutr%   r   )r,   r   r'   r-   s      r.   r#   PixioEmbeddings.__init__K   s    ekk!V5H5H&J\J\&]^ 4V <++77#%<<A{M`M`?`bhbtbt0u#v zz&"<"<="// ++r0   
embeddingsheightwidthc                 0   UR                   S   U R                  -
  nU R                  R                   S   U R                  -
  n[        5       (       d  XE:X  a  X#:X  a  U R                  $ U R                  SS2SU R                  24   nU R                  SS2U R                  S24   nUR                   S   nX R                  -  n	X0R                  -  n
[        US-  5      nUR                  SXU5      nUR                  SSSS5      nUR                  n[        R                  R                  UR                  [        R                  5      X4SS	S
9R                  US9nUR                  SSSS5      R                  SSU5      n[        R                   " Xg4SS9$ )a  
This method allows to interpolate the pre-trained position encodings, to be able to use the model on higher
resolution images. This method is also adapted to support tracing and interpolation at torch.float32 precision.

Adapted from:
- https://github.com/facebookresearch/dino/blob/de9ee3df6cf39fac952ab558447af1fa1365362a/vision_transformer.py#L174-L194, and
- https://github.com/facebookresearch/dinov2/blob/e1277af2ba9496fbadf7aec6eba56e8d882d1e35/dinov2/models/vision_transformer.py#L179-L211
r   Ng      ?r   r   r5   bicubicF)sizemodealign_cornersdtypedim)r6   rL   rP   r   r%   intreshapepermuter_   r   
functionalinterpolatetorA   float32viewcat)r,   rU   rV   rW   r'   num_positionsclass_pos_embedpatch_pos_embedra   
new_height	new_widthsqrt_num_positionstarget_dtypes                r.   interpolate_pos_encoding(PixioEmbeddings.interpolate_pos_encodingX   s    !&&q)D,=,==0066q9D<M<MM|| <+++2216I8I8I6I3IJ221d6G6G6I3IJr".
__,	 !34)11!5G]`a)11!Q1=&,,--33u}}-(	 4 

 "<"
  	 *11!Q1=BB1b#Nyy/;CCr0   r1   c                 d   UR                   u  p#pEU R                  R                  R                  R                  nU R                  UR                  US95      nU R                  R                  USS5      n[        R                  " X4SS9nXpR                  XtU5      -   nU R                  U5      nU$ )Nr^   rY   r   r`   )r6   rO   r+   weightr_   rg   rM   expandrA   rj   rr   rS   )	r,   r1   
batch_size_rV   rW   rq   rU   
cls_tokenss	            r.   r:   PixioEmbeddings.forward~   s    '3'9'9$
v,,77>>DD**<???+NO
^^**:r2>
YY
7Q?
"?"?
TY"ZZ
\\*-
r0   )rM   r   rS   rN   rL   rO   r%   rP   )r<   r=   r>   r?   r@   r   r#   rA   rB   rb   rr   r:   rC   rD   rE   s   @r.   rG   rG   F   sn    { t $D5<< $D $DUX $D]b]i]i $DLELL U\\  r0   rG   modulequerykeyvalueattention_maskscalingrS   kwargsc                    Uc  UR                  S5      S-  n[        R                  " XR                  SS5      5      U-  nUb  X-   n[        R
                  R                  US[        R                  S9R                  UR                  5      n[        R
                  R                  XU R                  S9n[        R                  " X5      n	U	R                  SS5      R                  5       n	X4$ )NrY         r5   r   )ra   r_   )ptrainingr   )r[   rA   matmulr9   r   re   softmaxrh   rg   r_   rS   r   
contiguous)
r{   r|   r}   r~   r   r   rS   r   attn_weightsattn_outputs
             r.   eager_attention_forwardr      s     **R.D( <<}}Q':;gEL!#4==((2U]](SVVW\WbWbcL==((6??([L,,|3K''1-88:K$$r0   c                      ^  \ rS rSrS\4U 4S jjr SS\R                  S\R                  S-  S\\	   S\
\R                  \R                  4   4S	 jjrS
rU =r$ )PixioAttention   r   c                   > [         TU ]  5         Xl        UR                  U l        [	        USUR
                  UR                  -  5      U l        UR                  U l        U R                  S-  U l	        SU l
        [        R                  " UR
                  UR                  U R                  -  UR                  S9U l        [        R                  " UR
                  UR                  U R                  -  UR                  S9U l        [        R                  " UR
                  UR                  U R                  -  UR                  S9U l        [        R                  " UR                  U R                  -  UR
                  SS9U l        g )Nhead_dimr   FbiasT)r"   r#   r   num_attention_headsgetattrr*   r   attention_probs_dropout_probattention_dropoutr   	is_causalr   Linearqkv_biasq_projk_projv_projo_projr,   r   r-   s     r.   r#   PixioAttention.__init__   s*   #)#=#= 
F4F4F&JdJd4de!'!D!D}}d*ii 2 2F4N4NQUQ^Q^4^eketetuii 2 2F4N4NQUQ^Q^4^eketetuii 2 2F4N4NQUQ^Q^4^eketetuii : :T]] JFL^L^eijr0   Nhidden_statesr   r   r2   c                    UR                   S S n/ UQSPU R                  P7nU R                  U5      R                  U5      R	                  SS5      nU R                  U5      R                  U5      R	                  SS5      nU R                  U5      R                  U5      R	                  SS5      n[        R                  " U R                  R                  [        5      n	U	" U UUUU4U R                  (       d  SOU R                  U R                  S.UD6u  pU
R                  " / UQSP76 R!                  5       n
U R#                  U
5      n
X4$ )NrY   r   r5           )rS   r   )r6   r   r   ri   r9   r   r   r   get_interfacer   _attn_implementationr   r   r   r   rc   r   r   )r,   r   r   r   input_shapehidden_shapequery_states
key_statesvalue_statesattention_interfacer   r   s               r.   r:   PixioAttention.forward   sE    $))#2.88b8$--8{{=166|DNNqRST[[/44\BLLQPQR
{{=166|DNNqRST(?(M(MKK,,.E)
 %8	%
  $}}C$2H2HLL	%
 	%
! "));;;;FFHkk+.((r0   )
r   r   r   r   r   r   r   r   r   r   N)r<   r=   r>   r?   r   r#   rA   rB   r   r   tupler:   rC   rD   rE   s   @r.   r   r      sk    k{ k" /3)||) t+) +,	)
 
u||U\\)	*) )r0   r   c                   f   ^  \ rS rSrSU 4S jjrS\R                  S\R                  4S jrSrU =r	$ )PixioMLP   r2   c                 z  > [         TU ]  5         UR                  =p#[        UR                  UR                  -  5      n[
        R                  " X$SS9U l        [        UR                  [        5      (       a  [        UR                     U l        OUR                  U l        [
        R                  " XCSS9U l        g )NTr   )r"   r#   r*   rb   	mlp_ratior   r   fc1r&   
hidden_actstrr   
activationfc2)r,   r   in_featuresout_featureshidden_featuresr-   s        r.   r#   PixioMLP.__init__   s    %+%7%77f0063C3CCD99[Ef''--$V%6%67DO$//DO99_Fr0   hidden_statec                 l    U R                  U5      nU R                  U5      nU R                  U5      nU$ r   )r   r   r   )r,   r   s     r.   r:   PixioMLP.forward   s2    xx-|4xx-r0   )r   r   r   )r2   N)
r<   r=   r>   r?   r#   rA   rB   r:   rC   rD   rE   s   @r.   r   r      s)    	GELL U\\  r0   r   c                      ^  \ rS rSrSrSS\SS4U 4S jjjrS\R                  S\R                  4S jr	S\
4S	 jrS
rU =r$ )PixioDropPath   zStochastic depth (DropPath) per sample, for residual blocks.

Identity when ``drop_prob`` is 0 or outside training. See `Deep Networks with Stochastic Depth
<https://arxiv.org/abs/1603.09382>`_.
	drop_probr2   Nc                 .   > [         TU ]  5         Xl        g r   )r"   r#   r   )r,   r   r-   s     r.   r#   PixioDropPath.__init__   s    "r0   r   c                 V   U R                   S:X  d  U R                  (       d  U$ SU R                   -
  nUR                  S   4SUR                  S-
  -  -   n[        R
                  " X1R                  UR                  S9n[        R                  " XB-   5      nUR                  U5      U-  $ )Nr   r   r   )r   )r_   device)
r   r   r6   ndimrA   randr_   r   floordiv)r,   r   	keep_probr6   random_tensors        r.   r:   PixioDropPath.forward   s    >>S   &	$$Q')DM4F4F4J,KK

50C0CML`L`aM$=>  +m;;r0   c                      SU R                    3$ )Nzp=r   )r,   s    r.   
extra_reprPixioDropPath.extra_repr  s    DNN#$$r0   r   )r   )r<   r=   r>   r?   r@   floatr#   rA   rB   r:   r   r   rC   rD   rE   s   @r.   r   r      sL    #% #$ # #<U\\ <ell <%C % %r0   r   c            	          ^  \ rS rSrS\4U 4S jjr SS\R                  S\R                  S-  S\\	   S\R                  4S	 jjr
S
rU =r$ )
PixioLayeri  r   c                   > [         TU ]  5         [        U5      U l        [        R
                  " UR                  UR                  S9U l        [        R
                  " UR                  UR                  S9U l	        [        U5      U l        [        R                  " UR                  5      U l        UR                  S:  a  [!        UR                  5      U l        g [        R"                  " 5       U l        g )Nepsr   )r"   r#   r   	attentionr   	LayerNormr*   layer_norm_epslayernorm_beforelayernorm_afterr   mlprQ   rR   rS   drop_path_rater   Identity	drop_pathr   s     r.   r#   PixioLayer.__init__  s    '/ "V-?-?VEZEZ [!||F,>,>FDYDYZF#zz&"<"<=AGAVAVY\A\v'<'<=bdbmbmbor0   Nr   r   r   r2   c                 2   UnU R                  U5      nU R                  " X40 UD6u  pU R                  U5      nU R                  U5      U-   nUnU R	                  U5      nU R                  U5      nU R                  U5      nU R                  U5      U-   nU$ r   )r   r   rS   r   r   r   )r,   r   r   r   residualrx   s         r.   r:   PixioLayer.forward  s     !--m<>>-R6R]3}5@ ,,];/]3}5@r0   )r   r   rS   r   r   r   r   )r<   r=   r>   r?   r   r#   rA   rB   r   r   r:   rC   rD   rE   s   @r.   r   r     s]    p{ p /3|| t+ +,	
 
 r0   r   c                      ^  \ rS rSr% \\S'   SrSrSrSr	SS/r
SrSrSrSrSr\\S	.rS
r\R*                  " 5       U 4S j5       rSrU =r$ )PixioPreTrainedModeli$  r   pixior1   )imageTrG   r   )r   
attentionsrO   c                   > [         TU ]  U5        [        U[        5      (       a  UR                  b4  [
        R                  " UR                  SU R                  R                  S9  [
        R                  " UR                  SU R                  R                  S9  UR                  b!  [
        R                  " UR                  5        ggg)zInitialize the weightsNr   )meanstd)r"   _init_weightsr&   rG   rP   inittrunc_normal_r   initializer_rangerM   rN   zeros_)r,   r{   r-   s     r.   r   "PixioPreTrainedModel._init_weights7  s     	f%fo..))5""6#=#=CT[[MjMjkv//ct{{?\?\]  ,F--. -	 /r0    )r<   r=   r>   r?   r   __annotations__base_model_prefixmain_input_nameinput_modalitiessupports_gradient_checkpointing_no_split_modules_supports_sdpa_supports_flash_attn_supports_flex_attn_supports_attention_backend_can_compile_fullgraphr   r   _can_record_outputs_input_embed_layerrA   no_gradr   rC   rD   rE   s   @r.   r   r   $  sv    $O!&*#*L9N"&!#$ ,
]]_/ /r0   r   c                      ^  \ rS rSrS\4U 4S jjr\\" SS9\  SS\	R                  S-  S\	R                  S-  S	\\   S
\4S jj5       5       5       rSrU =r$ )
PixioModeliC  r   c                 f  > [         TU ]  U5        Xl        [        U5      U l        [
        R                  " [        UR                  5       Vs/ s H  n[        U5      PM     sn5      U l
        [
        R                  " UR                  UR                  S9U l        U R                  5         g s  snf )Nr   )r"   r#   r   rG   rU   r   
ModuleListrangenum_hidden_layersr   layersr   r*   r   	layernorm	post_initr,   r   rx   r-   s      r.   r#   PixioModel.__init__E  s     )&1mmvG_G_A`$aA`AZ%7A`$abf&8&8f>S>ST	 %bs   B.F)tie_last_hidden_statesNr1   r   r   r2   c                 :   Uc  [        S5      eU R                  U5      n[        U R                  UUS9nUnU R                   H  nU" XR40 UD6nM     U R                  U5      nUS S 2S U R                  R                  2S S 24   R                  SS9n[        UUS9$ )Nz You have to specify pixel_values)r   inputs_embedsr   r   r`   )last_hidden_statepooler_output)	r7   rU   r   r   r  r	  rL   r   r   )r,   r1   r   r   embedding_outputr   layerpooled_outputs           r.   r:   PixioModel.forwardP  s     ?@@??<82;;*)

 )[[E!-J6JM !}5%a)G4??+G+G)G&JKPPUVPW)+'
 	
r0   )r   rU   r	  r  )NN)r<   r=   r>   r?   r   r#   r   r   r   rA   rB   r   r   r   r:   rC   rD   rE   s   @r.   r  r  C  s~    	{ 	  E2 -1.2
llT)
 t+
 +,	

 
$
  3  
r0   r  zN
    Pixio backbone, to be used with frameworks like DETR and MaskFormer.
    )custom_introc                      ^  \ rS rSrS\4U 4S jjr\\\ SS\	R                  S\	R                  S-  S\\   S\4S	 jj5       5       5       rS
rU =r$ )PixioBackbonein  r   c                 8  > [         TU ]  U5        [        UR                  S-   5       Vs/ s H  o!R                  PM     snU l        [        U5      U l        [        R                  " UR                  UR                  S9U l        U R                  5         g s  snf )Nr   r   )r"   r#   r  r  r*   num_featuresr  r   r   r   r   r	  r
  r  s      r.   r#   PixioBackbone.__init__t  sx     9>v?W?WZ[?[9\]9\A//9\]'
f&8&8f>S>ST	 ^s   BNr1   r   r   r2   c                    SUS'   U R                   " X40 UD6nUR                  n/ n[        U R                  U5       H  u  pxXpR                  ;   d  M  U R
                  R                  (       a  U R                  U5      nU R
                  R                  (       a  USS2U R                   R                  R                  S24   nUR                  u  ppU R
                  R                  nUR                  XU-  X-  S5      nUR                  SSSS5      R                  5       nUR!                  U5        M     [#        [%        U5      UR                  UR&                  S	9$ )
a  
Examples:

```python
>>> from transformers import AutoImageProcessor, AutoBackbone
>>> import torch
>>> from PIL import Image
>>> import httpx
>>> from io import BytesIO

>>> url = "http://images.cocodataset.org/val2017/000000039769.jpg"
>>> with httpx.stream("GET", url) as response:
...     image = Image.open(BytesIO(response.read()))

>>> processor = AutoImageProcessor.from_pretrained("facebook/pixio-huge")
>>> model = AutoBackbone.from_pretrained(
...     "facebook/pixio-huge", out_features=["stage7", "stage15", "stage23", "stage31"]
... )

>>> inputs = processor(image, return_tensors="pt")

>>> outputs = model(**inputs)
>>> feature_maps = outputs.feature_maps
>>> list(feature_maps[-1].shape)
[1, 1280, 16, 16]
```Toutput_hidden_statesNrY   r   r   r   r5   )feature_mapsr   r   )r   r   zipstage_namesr   r   apply_layernormr	  reshape_hidden_statesrU   rL   r6   r%   rc   rd   r   appendr   r   r   )r,   r1   r   r   outputr   r  stager   rw   rx   rV   rW   r%   s                 r.   r:   PixioBackbone.forward}  s7   F *.%&"&**\"TV"T,,#&t'7'7#GE)));;..#'>>,#?L;;44#/4::3H3H3U3U3W0W#XL3?3E3E0J6!%!7!7J#/#7#7
jDXZ_Zmoq#rL#/#7#71a#C#N#N#PL##L1 $H |, ..((
 	
r0   )r	  r  r   r   )r<   r=   r>   r?   r   r#   r   r
   r   rA   rB   r   r   r   r:   rC   rD   rE   s   @r.   r  r  n  sq    {    /36
ll6
 t+6
 +,	6

 
6
  ! 6
r0   r  )r  r   r  )Nr   )3collections.abcr   r   rA   r    r   r   activationsr   backbone_utilsr	   r
   masking_utilsr   modeling_layersr   modeling_outputsr   r   r   modeling_utilsr   r   processing_utilsr   utilsr   r   r   utils.genericr   r   utils.output_capturingr   configuration_pixior   Moduler   rG   rB   r   r   r   r   r   r   r   r  r  __all__r   r0   r.   <module>r6     s  * /   & ! H 6 9 [ [ F & C C I 5 ,H299 H<Dbii DZ !%II%<<% 
% <<	%
 LL4'% T\% % '(%8.)RYY .)bryy &%BII %0+ > /? / /< '
% '
 '
T 
C
M#7 C

C
L Br0   