
    3j.                        S r SSKrSSKJr  SSKJr  SSKJrJr  SSKJ	r	  SSK
JrJrJr  SS	KJr  SS
KJrJrJr  SSKJrJr  SSKJr  SSKJr  SSKJr  SSKJr  SSKJ r J!r!J"r"J#r#  \" SS9\ " S S\5      5       5       r$ " S S\"5      r% " S S\RL                  5      r' " S S\ 5      r( " S S\5      r) " S S\5      r* " S  S!\!5      r+ " S" S#\#5      r,\ " S$ S%\,5      5       r-\" S&S'9 " S( S)\\,5      5       r./ S*Qr/g)+zPyTorch Pixio model.    N)strict)nn   )BackboneMixinfilter_output_hidden_states)create_bidirectional_mask)BackboneOutputBaseModelOutputBaseModelOutputWithPooling)Unpack)TransformersKwargsauto_docstring
is_tracing)can_return_tuplemerge_with_config_defaults)capture_outputs   )Dinov2Config)	Dinov2MLP)SwinDropPath)ViTAttentionViTLayerViTPatchEmbeddingsViTPreTrainedModelzfacebook/pixio-huge)
checkpointc                       \ rS rSr% SrSrSr\\S'   Sr	\\S'   Sr
\\S	'   S
r\\S'   Sr\\\   -  \\\4   -  \S'   Sr\\\   -  \\\4   -  \S'   \" 5       r\" 5       r\" 5       rSrg)PixioConfig!   a  
apply_layernorm (`bool`, *optional*, defaults to `True`):
    Whether to apply layer normalization to the feature maps in case the model is used as backbone.
reshape_hidden_states (`bool`, *optional*, defaults to `True`):
    Whether to reshape the feature maps to 4D tensors of shape `(batch_size, hidden_size, height, width)` in
    case the model is used as backbone. If `False`, the feature maps will be 3D tensors of shape `(batch_size,
    seq_len, hidden_size)`.
n_cls_tokens (`int`, *optional*, defaults to 8):
    Number of class tokens in the Transformer encoder.

Example:

```python
>>> from transformers import PixioConfig, PixioModel

>>> # Initializing a Pixio pixio-huge style configuration
>>> configuration = PixioConfig()

>>> # Initializing a model (with random weights) from the pixio-huge style configuration
>>> model = PixioModel(configuration)

>>> # Accessing the model configuration
>>> configuration = model.config
```pixioi   hidden_size    num_hidden_layers   num_attention_heads   n_cls_tokens   
image_size
patch_size N)__name__
__module____qualname____firstlineno____doc__
model_typer    int__annotations__r"   r$   r&   r(   listtupler)   AttributeErrorlayerscale_valueuse_swiglu_ffnuse_mask_token__static_attributes__r*       a/home/wildlama/miniconda3/lib/python3.13/site-packages/transformers/models/pixio/modular_pixio.pyr   r   !   s    2 JKs!!L#47Jd3i%S/1746Jd3i%S/16%'#%N#%Nr:   r   c                       \ rS rSrSrg)PixioPatchEmbeddingsK   r*   Nr+   r,   r-   r.   r9   r*   r:   r;   r=   r=   K       r:   r=   c                      ^  \ rS rSrSrS\SS4U 4S jjrS\R                  S\	S	\	S\R                  4S
 jr
S\R                  S\R                  4S jrSrU =r$ )PixioEmbeddingsO   z:
Construct the CLS tokens, position and patch embeddings.
configreturnNc                 .  > [         TU ]  5         [        R                  " [        R
                  " SUR                  UR                  5      5      U l        S U l	        [        U5      U l        U R                  R                  n[        R                  " [        R
                  " SX!R                  -   UR                  5      5      U l        [        R                  " UR                  5      U l        UR                  U l        UR"                  U l        Xl        g )N   )super__init__r   	Parametertorchrandnr&   r    	cls_token
mask_tokenr=   patch_embeddingsnum_patchesposition_embeddingsDropouthidden_dropout_probdropoutr)   rD   )selfrD   rP   	__class__s      r;   rI   PixioEmbeddings.__init__T   s    ekk!V5H5H&J\J\&]^ 4V <++77#%<<A{M`M`?`bhbtbt0u#v zz&"<"<="// ++r:   
embeddingsheightwidthc                 0   UR                   S   U R                  -
  nU R                  R                   S   U R                  -
  n[        5       (       d  XE:X  a  X#:X  a  U R                  $ U R                  SS2SU R                  24   nU R                  SS2U R                  S24   nUR                   S   nX R                  -  n	X0R                  -  n
[        US-  5      nUR                  SXU5      nUR                  SSSS5      nUR                  n[        R                  R                  UR                  [        R                  5      X4SS	S
9R                  US9nUR                  SSSS5      R                  SSU5      n[        R                   " Xg4SS9$ )a  
This method allows to interpolate the pre-trained position encodings, to be able to use the model on higher
resolution images. This method is also adapted to support tracing and interpolation at torch.float32 precision.

Adapted from:
- https://github.com/facebookresearch/dino/blob/de9ee3df6cf39fac952ab558447af1fa1365362a/vision_transformer.py#L174-L194, and
- https://github.com/facebookresearch/dinov2/blob/e1277af2ba9496fbadf7aec6eba56e8d882d1e35/dinov2/models/vision_transformer.py#L179-L211
rG   Ng      ?r   r   r   bicubicF)sizemodealign_cornersdtypedim)shaper&   rQ   r   r)   r1   reshapepermuterb   r   
functionalinterpolatetorK   float32viewcat)rU   rX   rY   rZ   rP   num_positionsclass_pos_embedpatch_pos_embedrd   
new_height	new_widthsqrt_num_positionstarget_dtypes                r;   interpolate_pos_encoding(PixioEmbeddings.interpolate_pos_encodinga   s    !&&q)D,=,==0066q9D<M<MM|| <+++2216I8I8I6I3IJ221d6G6G6I3IJr".
__,	 !34)11!5G]`a)11!Q1=&,,--33u}}-(	 4 

 "<"
  	 *11!Q1=BB1b#Nyy/;CCr:   pixel_valuesc                 d   UR                   u  p#pEU R                  R                  R                  R                  nU R                  UR                  US95      nU R                  R                  USS5      n[        R                  " X4SS9nXpR                  XtU5      -   nU R                  U5      nU$ )Nra   r\   rG   rc   )re   rO   
projectionweightrb   rj   rM   expandrK   rm   ru   rT   )	rU   rw   
batch_size_rY   rZ   rt   rX   
cls_tokenss	            r;   forwardPixioEmbeddings.forward   s    '3'9'9$
v,,77>>DD**<???+NO
^^**:r2>
YY
7Q?
"?"?
TY"ZZ
\\*-
r:   )rM   rD   rT   rN   r&   rO   r)   rQ   )r+   r,   r-   r.   r/   r   rI   rK   Tensorr1   ru   r   r9   __classcell__rV   s   @r;   rB   rB   O   sn    { t $D5<< $D $DUX $D]b]i]i $DLELL U\\  r:   rB   c                       \ rS rSrSrg)PixioAttention   r*   Nr?   r*   r:   r;   r   r      r@   r:   r   c                       \ rS rSrSrg)PixioMLP   r*   Nr?   r*   r:   r;   r   r      r@   r:   r   c                       \ rS rSrSrg)PixioDropPath   r*   Nr?   r*   r:   r;   r   r      r@   r:   r   c            	          ^  \ rS rSrS\4U 4S jjr SS\R                  S\R                  S-  S\\	   S\R                  4S	 jjr
S
rU =r$ )
PixioLayer   rD   c                    > [         TU ]  U5        UR                  S:  a  [        UR                  5      U l        g [        R
                  " 5       U l        g )Ng        )rH   rI   drop_path_rater   r   Identity	drop_path)rU   rD   rV   s     r;   rI   PixioLayer.__init__   s@     AGAVAVY\A\v'<'<=bdbmbmbor:   Nhidden_statesattention_maskkwargsrE   c                 2   UnU R                  U5      nU R                  " X40 UD6u  pU R                  U5      nU R                  U5      U-   nUnU R	                  U5      nU R                  U5      nU R                  U5      nU R                  U5      U-   nU$ N)layernorm_before	attentionrT   r   layernorm_aftermlp)rU   r   r   r   residualr}   s         r;   r   PixioLayer.forward   s     !--m<>>-R6R]3}5@ ,,];/]3}5@r:   )r   r   )r+   r,   r-   r.   r   rI   rK   r   r   r   r   r9   r   r   s   @r;   r   r      s]    p{ p /3|| t+ +,	
 
 r:   r   c                       \ rS rSrSrg)PixioPreTrainedModel   r*   Nr?   r*   r:   r;   r   r      r@   r:   r   c                      ^  \ rS rSrS\4U 4S jjr\\" SS9\  SS\	R                  S-  S\	R                  S-  S	\\   S
\4S jj5       5       5       rSrU =r$ )
PixioModel   rD   c                 f  > [         TU ]  U5        Xl        [        U5      U l        [
        R                  " [        UR                  5       Vs/ s H  n[        U5      PM     sn5      U l
        [
        R                  " UR                  UR                  S9U l        U R                  5         g s  snf )Neps)rH   rI   rD   rB   rX   r   
ModuleListranger"   r   layers	LayerNormr    layer_norm_eps	layernorm	post_initrU   rD   r}   rV   s      r;   rI   PixioModel.__init__   s     )&1mmvG_G_A`$aA`AZ%7A`$abf&8&8f>S>ST	 %bs   B.F)tie_last_hidden_statesNrw   r   r   rE   c                 :   Uc  [        S5      eU R                  U5      n[        U R                  UUS9nUnU R                   H  nU" XR40 UD6nM     U R                  U5      nUS S 2S U R                  R                  2S S 24   R                  SS9n[        UUS9$ )Nz You have to specify pixel_values)rD   inputs_embedsr   rG   rc   )last_hidden_statepooler_output)	
ValueErrorrX   r   rD   r   r   r&   meanr   )rU   rw   r   r   embedding_outputr   layerpooled_outputs           r;   r   PixioModel.forward   s     ?@@??<82;;*)

 )[[E!-J6JM !}5%a)G4??+G+G)G&JKPPUVPW)+'
 	
r:   )rD   rX   r   r   )NN)r+   r,   r-   r.   r   rI   r   r   r   rK   r   r   r   r   r   r9   r   r   s   @r;   r   r      s~    	{ 	  E2 -1.2
llT)
 t+
 +,	

 
$
  3  
r:   r   zN
    Pixio backbone, to be used with frameworks like DETR and MaskFormer.
    )custom_introc                      ^  \ rS rSrS\4U 4S jjr\\\ SS\	R                  S\	R                  S-  S\\   S\4S	 jj5       5       5       rS
rU =r$ )PixioBackbone   rD   c                 8  > [         TU ]  U5        [        UR                  S-   5       Vs/ s H  o!R                  PM     snU l        [        U5      U l        [        R                  " UR                  UR                  S9U l        U R                  5         g s  snf )NrG   r   )rH   rI   r   r"   r    num_featuresr   r   r   r   r   r   r   r   s      r;   rI   PixioBackbone.__init__   sx     9>v?W?WZ[?[9\]9\A//9\]'
f&8&8f>S>ST	 ^s   BNrw   r   r   rE   c                    SUS'   U R                   " X40 UD6nUR                  n/ n[        U R                  U5       H  u  pxXpR                  ;   d  M  U R
                  R                  (       a  U R                  U5      nU R
                  R                  (       a  USS2U R                   R                  R                  S24   nUR                  u  ppU R
                  R                  nUR                  XU-  X-  S5      nUR                  SSSS5      R                  5       nUR!                  U5        M     [#        [%        U5      UR                  UR&                  S	9$ )
a  
Examples:

```python
>>> from transformers import AutoImageProcessor, AutoBackbone
>>> import torch
>>> from PIL import Image
>>> import httpx
>>> from io import BytesIO

>>> url = "http://images.cocodataset.org/val2017/000000039769.jpg"
>>> with httpx.stream("GET", url) as response:
...     image = Image.open(BytesIO(response.read()))

>>> processor = AutoImageProcessor.from_pretrained("facebook/pixio-huge")
>>> model = AutoBackbone.from_pretrained(
...     "facebook/pixio-huge", out_features=["stage7", "stage15", "stage23", "stage31"]
... )

>>> inputs = processor(image, return_tensors="pt")

>>> outputs = model(**inputs)
>>> feature_maps = outputs.feature_maps
>>> list(feature_maps[-1].shape)
[1, 1280, 16, 16]
```Toutput_hidden_statesNr\   r   r   rG   r   )feature_mapsr   
attentions)r   r   zipstage_namesout_featuresrD   apply_layernormr   reshape_hidden_statesrX   r&   re   r)   rf   rg   
contiguousappendr	   r4   r   )rU   rw   r   r   outputr   r   stagehidden_stater|   r}   rY   rZ   r)   s                 r;   r   PixioBackbone.forward   s7   F *.%&"&**\"TV"T,,#&t'7'7#GE)));;..#'>>,#?L;;44#/4::3H3H3U3U3W0W#XL3?3E3E0J6!%!7!7J#/#7#7
jDXZ_Zmoq#rL#/#7#71a#C#N#N#PL##L1 $H |, ..((
 	
r:   )r   r   r   r   )r+   r,   r-   r.   r   rI   r   r   r   rK   r   r   r   r	   r   r9   r   r   s   @r;   r   r      sq    {    /36
ll6
 t+6
 +,	6

 
6
  ! 6
r:   r   )r   r   r   r   )0r/   rK   huggingface_hub.dataclassesr   r   backbone_utilsr   r   masking_utilsr   modeling_outputsr	   r
   r   processing_utilsr   utilsr   r   r   utils.genericr   r   utils.output_capturingr   dinov2.configuration_dinov2r   dinov2.modeling_dinov2r   swin.modeling_swinr   vit.modeling_vitr   r   r   r   r   r=   ModulerB   r   r   r   r   r   r   r   __all__r*   r:   r;   <module>r      s     .  H 6 [ [ & C C I 5 6 . - ] ] 01%&, %&  2%&P	- 	Dbii DN	\ 		y 		L 	 4	- 	 '
% '
 '
T 
C
M#7 C

C
L Qr:   