
    
3jR                    &   S r SSKrSSKrSSKJr  SSKJrJrJrJ	r	J
r
JrJrJr  SSKrSSKJr  SSKJs  Jr  SSKJrJrJrJr  SSKJrJrJrJrJrJrJ r J!r!J"r"J#r#J$r$J%r%J&r&J'r'J(r(J)r)J*r*J+r+J,r,J-r-J.r.  SSK/J0r0  SS	K1J2r2  SS
K3J4r4  SSK5J6r6J7r7  S/r8 " S S\Rr                  5      r: " S S\Rr                  5      r; " S S\Rr                  5      r< " S S\Rr                  5      r= SS\\>\R~                  4   S\Rr                  S\>S\\>\R~                  4   4S jjr@  SS\\>\R~                  4   S\Rr                  S\>S\AS\\>\R~                  4   4
S jjrBSS\>S\AS\=4S jjrCSS\>S\\>\4   4S  jjrDSS\>S\\>\4   4S! jjrESS\>S\\>\4   4S" jjrF\6" 0 S#\D" S$S%9_S&\D" S$S'S(S)S*9_S+\D" S$\\S'S(S)S,9_S-\D" S$\\S.S(S)S,9_S/\D" S$S0S(S)S*9_S1\D" S$S0S(S)S*9_S2\D" S$S0S(S)S*9_S3\D" S$S'S(S49_S5\D" S$S'S(S49_S6\D" S$S0S(S49_S7\D" S$S0S(S49_S8\D" S$S0S(S49_S9\D" S$S0S(S)S:S;9_S<\D" S$S0S(S)S:S;9_S=\D" S$S0S(S)S:S;9_S>\D" S$SS?9_S@\D" S$SS?9_0 SA\D" S$SS?9_SB\D" S$SS?9_SC\D" S$SS?9_SD\D" S$SES?9_SF\D" S$SES?9_SG\D" S$SHS?9_SI\D" S$SJS?9_SK\D" S$S'S(SJSL9_SM\D" S$SES?9_SN\D" S$SES?9_SO\D" SSP9_SQ\D" S$SRSSSTSTSU9_SV\D" S$SRSSSTSTSU9_SW\D" S$SRSSS49_SX\D" S$SRSSSTSTSU9_SY\E" S$SZSHS[9_S\\E" S$SZSHS[9_E0 S]\E" S$S^SES[9_S_\E" S$S'SES[9_S`\E" S$S0SaS[9_Sb\E" S$S0SS[9_Sc\E" S$S0SS[9_Sd\E" S$S0SS[9_Se\E" S$S0SS[9_Sf\E" S$SgSS[9_Sh\E" S$SgSS[9_Si\E" S$SgSS[9_Sj\E" S$S0SS[9_Sk\E" S$S0SS[9_Sl\D" S$\\SmSn9_So\D" S$\\SmSn9_Sp\D" S$\\SmSn9_Sq\D" S$\\SmSn9_Sr\D" S$\\SmSn9_E0 Ss\D" S$\\SmSn9_St\D" S$\\SmSn9_Su\D" S$\\SmSn9_Sv\D" S$\\SmSn9_Sw\D" S$\\SmSn9_Sx\D" S$\\SmSn9_Sy\D" S$\\SmSn9_Sz\F" S$S%9_S{\F" S$S%9_S|\F" S$S%9_S}\F" S$S%9_S~\F" S$S%9_S\F" S$S%9_S\F" S$S%9_S\F" S$S%9_S\F" S$SSS9_S\F" S$SSS9_E\F" S$S%9\F" S$S%9\F" S$S%9\F" S$SSS9S.E5      rG\7SS\AS\=4S jj5       rH\7SS\AS\=4S jj5       rI\7SS\AS\=4S jj5       rJ\7SS\AS\=4S jj5       rK\7SS\AS\=4S jj5       rL\7SS\AS\=4S jj5       rM\7SS\AS\=4S jj5       rN\7SS\AS\=4S jj5       rO\7SS\AS\=4S jj5       rP\7SS\AS\=4S jj5       rQ\7SS\AS\=4S jj5       rR\7SS\AS\=4S jj5       rS\7SS\AS\=4S jj5       rT\7SS\AS\=4S jj5       rU\7SS\AS\=4S jj5       rV\7SS\AS\=4S jj5       rW\7SS\AS\=4S jj5       rX\7SS\AS\=4S jj5       rY\7SS\AS\=4S jj5       rZ\7SS\AS\=4S jj5       r[\7SS\AS\=4S jj5       r\\7SS\AS\=4S jj5       r]\7SS\AS\=4S jj5       r^\7SS\AS\=4S jj5       r_\7SS\AS\=4S jj5       r`\7SS\AS\=4S jj5       ra\7SS\AS\=4S jj5       rb\7SS\AS\=4S jj5       rc\7SS\AS\=4S jj5       rd\7SS\AS\=4S jj5       re\7SS\AS\=4S jj5       rf\7SS\AS\=4S jj5       rg\7SS\AS\=4S jj5       rh\7SS\AS\=4S jj5       ri\7SS\AS\=4S jj5       rj\7SS\AS\=4S jj5       rk\7SS\AS\=4S jj5       rl\7SS\AS\=4S jj5       rm\7SS\AS\=4S jj5       rn\7SS\AS\=4S jj5       ro\7SS\AS\=4S jj5       rp\7SS\AS\=4S jj5       rq\7SS\AS\=4S jj5       rr\7SS\AS\=4S jj5       rs\7SS\AS\=4S jj5       rt\7SS\AS\=4S jj5       ru\7SS\AS\=4S jj5       rv\7SS\AS\=4S jj5       rw\7SS\AS\=4S jj5       rx\7SS\AS\=4S jj5       ry\7SS\AS\=4S jj5       rz\7SS\AS\=4S jj5       r{\7SS\AS\=4S jj5       r|\7SS\AS\=4S jj5       r}\7SS\AS\=4S jj5       r~g)aJ  EVA

EVA ViT from https://github.com/baaivision/EVA , paper: https://arxiv.org/abs/2211.07636

This file contains a number of ViT variants the utilise ROPE position embeddings, SwiGLU and other additions:
 * EVA & EVA02 model implementations that evolved from BEiT, additional models in vision_transformer.py.
 * `timm` original SBB ViT w/ ROPE position embeddings
 * Perception Encoder (PE) ViT from Meta (https://arxiv.org/abs/2504.13181)
 * ROPE-ViT from Naver AI (https://arxiv.org/abs/2403.13298)
 * DINOv3 from META AI Research (https://arxiv.org/abs/2508.10104)

@article{EVA,
  title={EVA: Exploring the Limits of Masked Visual Representation Learning at Scale},
  author={Fang, Yuxin and Wang, Wen and Xie, Binhui and Sun, Quan and Wu, Ledell and Wang, Xinggang and Huang,
  Tiejun and Wang, Xinlong and Cao, Yue},
  journal={arXiv preprint arXiv:2211.07636},
  year={2022}
}

EVA-02: A Visual Representation for Neon Genesis - https://arxiv.org/abs/2303.11331
@article{EVA02,
  title={EVA-02: A Visual Representation for Neon Genesis},
  author={Fang, Yuxin and Sun, Quan and Wang, Xinggang and Huang, Tiejun and Wang, Xinlong and Cao, Yue},
  journal={arXiv preprint arXiv:2303.11331},
  year={2023}
}

@article{bolya2025perception,
  title={Perception encoder: The best visual embeddings are not at the output of the network},
  author={Bolya, Daniel and Huang, Po-Yao and Sun, Peize and Cho, Jang Hyun and Madotto, Andrea and Wei, Chen and Ma,
    Tengyu and Zhi, Jiale and Rajasegaran, Jathushan and Rasheed, Hanoona and others},
  journal={arXiv preprint arXiv:2504.13181},
  year={2025}
}

@inproceedings{heo2024rotary,
  title={Rotary position embedding for vision transformer},
  author={Heo, Byeongho and Park, Song and Han, Dongyoon and Yun, Sangdoo},
  booktitle={European Conference on Computer Vision},
  pages={289--305},
  year={2024},
  organization={Springer}
}

@article{simeoni2025dinov3,
  title={{DINOv3}},
  author={Sim{'e}oni, Oriane and Vo, Huy V. and Seitzer, Maximilian and Baldassarre, Federico and Oquab, Maxime
    and Jose, Cijo and Khalidov, Vasil and Szafraniec, Marc and Yi, Seungeun and Ramamonjisoa, Micha{"e}l
    and Massa, Francisco and Haziza, Daniel and Wehrstedt, Luca and Wang, Jianyuan and Darcet, Timoth{'e}e
    and Moutakanni, Th{'e}o and Sentana, Leonel and Roberts, Claire and Vedaldi, Andrea and Tolan, Jamie
    and Brandt, John and Couprie, Camille and Mairal, Julien and J{'e}gou, Herv{'e} and Labatut, Patrick
    and Bojanowski, Piotr},
  year={2025},
  eprint={2508.10104},
  url={https://arxiv.org/abs/2508.10104},
}

DINOv3 code was a modification of existing EVA model and support modules, so licensed under Apache-2.0 like timm.
Weights from META remain under DINOv3 License (https://ai.meta.com/resources/models-and-libraries/dinov3-license/).

Modifications by / Copyright 2023 Ross Wightman, original copyrights below
    N)partial)AnyCallableDictListOptionalSetTupleUnion)IMAGENET_DEFAULT_MEANIMAGENET_DEFAULT_STDOPENAI_CLIP_MEANOPENAI_CLIP_STD)
PatchEmbedMlpGluMlpSwiGLU	LayerNormDropPathcalculate_drop_path_ratesPatchDropoutWithIndicescreate_rope_embedapply_rot_embed_catapply_keep_indices_nlctrunc_normal_resample_patch_embedresample_abs_pos_embedglobal_pool_nlc	to_2tupleuse_fused_attnmaybe_add_maskresolve_self_attn_maskAttentionRopeAttentionPoolLatent   )build_model_with_cfg)feature_take_indices)
checkpoint)generate_default_cfgsregister_modelEvac                   <  ^  \ rS rSr% Sr\R                  R                  \   \	S'                 SS\
S\
S\S\S\S	\
S
\S\S\\
   S\\   S\S\S\4U 4S jjjrSS jrSS jr   SS\\R"                     S\\R"                     S\4S jjrSS jrSrU =r$ )EvaAttentioni   zFEVA Attention with ROPE, no k-bias, and fused/unfused qkv options
    
fused_attndim	num_headsqkv_bias	qkv_fusedqkv_bias_separatenum_prefix_tokens	attn_drop	proj_dropattn_head_dim
norm_layerqk_norm
scale_normrotate_halfc                   > XS.n[         TU ]  5         U(       d  U(       a
  U
c   S5       eX l        X-  U l        U	b  Xl        U R                  U R                  -  nU R                  S-  U l        X`l        [        5       U l        XPl        Xl	        U(       a  [        R                  " UUS-  4SS0UD6U l        S=U l        =U l        U l        U(       a  [        R                   " ["        R$                  " U40 UD65      U l        U R)                  S["        R$                  " U40 UD6SS	9  [        R                   " ["        R$                  " U40 UD65      U l        OS=U l        =U l        U l        Oy[        R                  " UU4SU0UD6U l        [        R                  " UU4SS0UD6U l        [        R                  " UU4SU0UD6U l        SU l        S=U l        =U l        U l        U(       a  U
" U R                  40 UD6O[        R.                  " 5       U l        U(       a  U
" U R                  40 UD6O[        R.                  " 5       U l        [        R4                  " U5      U l        U(       a	  U
" U40 UD6O[        R.                  " 5       U l        [        R                  " UU40 UD6U l        [        R4                  " U5      U l        U R?                  5         g)
a  
Args:
    dim: Input dimension of the token embeddings
    num_heads: Number of attention heads
    qkv_bias: Whether to add a bias term to the query, key, and value projections
    qkv_fused: Whether qkv projections are fused into one projection or separate
    qkv_bias_separate: Whether to apply bias to qkv as a separate addition or part of F.linear() call
    num_prefix_tokens: Number of reg/cls tokens at the beginning of the sequence that
        should not have position embeddings applied
    attn_drop: Dropout rate for attention weights
    proj_drop: Dropout rate for the output projection
    attn_head_dim: Dimension of each attention head (if None, computed as dim // num_heads)
    norm_layer: Normalization layer constructor to use for QK and scale normalization
    qk_norm: Enable normalization of query (Q) and key (K) vectors with norm_layer
    scale_norm: Enable normalization (scaling) of attention output with norm_layer
    rotate_half: Use half rotation layout instead of interleaved
devicedtypeNz<norm_layer must be provided if qk_norm or scale_norm is Trueg         biasFk_bias)
persistent) super__init__r1   head_dimscaler5   r    r/   r4   r<   nnLinearqkvq_projk_projv_proj	Parametertorchemptyq_biasregister_bufferv_biasrC   Identityq_normk_normDropoutr6   normprojr7   reset_parameters)selfr0   r1   r2   r3   r4   r5   r6   r7   r8   r9   r:   r;   r<   r?   r@   ddattn_dim	__class__s                     I/home/wildlama/miniconda3/lib/python3.13/site-packages/timm/models/eva.pyrF   EvaAttention.__init__n   sF   F /)i+ii)"($)M==4>>1]]d*
!2(*!2&yyhlEE"EDH6::DK:$+ ll5;;x+F2+FG$$Xu{{8/Jr/JW\$] ll5;;x+F2+FG:>>>dkDK))CGGBGDK))CDDDDK))CGGBGDKDH6::DK:$+9@j5"5bkkm9@j5"5bkkmI.2<Jx.2."++-	IIh2r2	I. 	    c                     U R                   bR  [        R                  R                  U R                   5        [        R                  R                  U R                  5        U R                  5         g)z"Initialize parameters and buffers.N)rR   rI   initzeros_rT   _init_buffersr\   s    r`   r[   EvaAttention.reset_parameters   s@    ;;"GGNN4;;'GGNN4;;'rb   c                 T    U R                   b  U R                   R                  5         gg)z.Compute and fill non-persistent buffer values.N)rC   zero_rg   s    r`   rf   EvaAttention._init_buffers   s!    ;;"KK #rb   rope	attn_mask	is_causalc                 ,   UR                   u  pVnU R                  b  U R                  c  U R                  U5      nO[        R                  " U R                  U R
                  U R                  45      n	U R                  (       a  U R                  U5      nX-  nO)[        R                  " XR                  R                  U	S9nUR                  XVSU R                  S5      R                  SSSSS5      nUR                  S5      u  pnOU R                  U5      R                  XVU R                  S5      R!                  SS5      n
U R#                  U5      R                  XVU R                  S5      R!                  SS5      nU R%                  U5      R                  XVU R                  S5      R!                  SS5      nU R'                  U
5      U R)                  U5      pUb  U R*                  n[-        U S	S
5      n[        R                  " U
SS2SS2SU2SS24   [/        U
SS2SS2US2SS24   X.S9/SS9R1                  U5      n
[        R                  " USS2SS2SU2SS24   [/        USS2SS2US2SS24   X.S9/SS9R1                  U5      nU R2                  (       a@  [        R4                  " XUUU R6                  (       a  U R8                  R:                  OSUS9nO\XR<                  -  n
XR!                  SS5      -  n[?        XoX4S9n[A        UU5      nURC                  SS9nU R9                  U5      nX-  nUR!                  SS5      R                  XVU5      nU RE                  U5      nU RG                  U5      nU RI                  U5      nU$ )a  Forward pass for the attention module.

Args:
    x: Input tensor of shape (batch_size, sequence_length, embedding_dim)
    rope: Rotary position embeddings tensor for position-aware attention
    attn_mask: Optional attention mask to apply during attention computation
    is_causal: If True, use causal (autoregressive) masking

Returns:
    Tensor of shape (batch_size, sequence_length, embedding_dim)
N)weightrB   rA      r   r%      r<   F)halfr0           )rm   	dropout_prn   )rn   )%shaperK   rR   rP   catrC   rT   r4   Flinearrp   reshaper1   permuteunbindrL   	transposerM   rN   rV   rW   r5   getattrr   type_asr/   scaled_dot_product_attentiontrainingr6   prH   r"   r!   softmaxrY   rZ   r7   )r\   xrl   rm   rn   BNCrK   r2   qkvnptrt   attn	attn_biass                    r`   forwardEvaAttention.forward   s   $ ''a88{{"hhqk 99dkk4;;%LM))((1+COC((1XX__8LC++aAt~~r:BB1aAqQCjjmGA!A&&qT^^R@JJ1aPAA&&qT^^R@JJ1aPAA&&qT^^R@JJ1aPA{{1~t{{1~1((C46D		1Q4C4]+-@1aq=AQSW-cdjklttuvwA		1Q4C4]+-@1aq=AQSW-cdjklttuvwA??..a#.2mm$..**#	A JJA{{2r**D.q	WI!$	2D<<B<'D>>$'DAKK1%%aA.IIaLIIaLNN1rb   c                 $    U R                  5         g)z"Initialize non-persistent buffers.N)rf   rg   s    r`   init_non_persistent_buffers(EvaAttention.init_non_persistent_buffers  s    rb   )r6   r/   rG   rC   rW   rM   rY   r1   r5   rZ   r7   rR   rV   rL   rK   r4   r<   rH   rT   rN   )   TTFr%   rv   rv   NNFTFNNreturnNNNF)__name__
__module____qualname____firstlineno____doc__rP   jitFinalbool__annotations__intfloatr   r   rF   r[   rf   Tensorr   r   __static_attributes____classcell__r_   s   @r`   r-   r-   i   sC   		%%
 !"&+%&!!+/-1!# %!I I  I  	I 
 I   $I   #I  I  I  $C=I  !*I  I  I  I  I V  ,004#A 5<<(A  -	A
 AF rb   r-   c            (       P  ^  \ rS rSrSSSSSSSSSSSSSS	\R
                  \S	S	S	4S
\S\S\S\S\	S\S\S\S\S\S\
S\S\	S\	S\	S\\	   S\S\S\\   4&U 4S jjjrS&S jr   S'S \R                   S!\\R                      S"\\R                      S#\S\R                   4
S$ jjrS%rU =r$ )(EvaBlocki  T      @Fr   r%   evarv   Nr0   r1   r2   r3   	mlp_ratio
swiglu_mlpswiglu_align_to	scale_mlpscale_attn_innerr5   	attn_typer<   r7   r6   	drop_pathinit_values	act_layerr9   r8   c                   > UUS.n[         TU ]  5         U" U40 UD6U l        US:X  a  [        O[        nU" U4UUUU
UUUUU	US.
UD6U l        UU l        Ub+  [        R                  " [        R                  " U40 UD65      OSU l        US:  a  [        U5      O[        R                  " 5       U l        U" U40 UD6U l        [!        X-  5      nU(       a`  U(       d  U(       a   [#        SUUU(       a  UOSUUS.UD6U l        OQ['        SUUS-  U(       a  UOS[        R(                  SUS	.UD6U l        O[+        SUUUU(       a  UOSUS
.UD6U l        Ub+  [        R                  " [        R                  " U40 UD65      OSU l        US:  a  [        U5      O[        R                  " 5       U l        U R1                  5         g)a>  Initialize the EVA transformer block.

Args:
  dim: Input dimension of the token embeddings
    num_heads: Number of attention heads
    qkv_bias: Whether to use bias terms in query, key, value projections
    qkv_fused: Whether to use a single projection for query, key, value
    mlp_ratio: Ratio of MLP hidden dimension to input dimension
    swiglu_mlp: Whether to use SwiGLU activation in the MLP
    scale_mlp: Whether to use normalization in the MLP
    scale_attn_inner: Whether to use normalization within the attention mechanism
    num_prefix_tokens: Number of tokens at the beginning of the sequence (class tokens, etc.)
    attn_type: Type of attention module to use ('eva' or 'rope')
    proj_drop: Dropout rate for projection layers
    attn_drop: Dropout rate for attention matrix
    drop_path: Stochastic depth rate
    init_values: Initial value for LayerScale, None = no LayerScale
    act_layer: Activation layer constructor
    norm_layer: Normalization layer constructor
    attn_head_dim: Dimension of each attention head (if None, computed as dim // num_heads)
r>   rl   
r1   r2   r3   r5   r6   r7   r8   r9   r;   r<   Nrv   in_featureshidden_featuresr9   dropalign_torr   Fr   r   r9   r   	gate_lastr   r   r   r   r9   r    )rE   rF   norm1r#   r-   r   r   rI   rO   rP   rQ   gamma_1r   rU   
drop_path1norm2r   r   mlpr   SiLUr   gamma_2
drop_path2r[   )r\   r0   r1   r2   r3   r   r   r   r   r   r5   r   r<   r7   r6   r   r   r   r9   r8   r?   r@   kwargsr]   attn_clsr   r_   s                             r`   rF   EvaBlock.__init__  s   \ /*r*
$-$7=\
/'!'#
 
	 '?J?Vr||EKK$:r$:;\`1:R(9-R[[]*r*
co.O!  #$3-6zD",  "  #$3a$7-6zD gg#"    /#)2: DH @K?Vr||EKK$:r$:;\`1:R(9-R[[] 	rb   r   c                     U R                   bi  [        R                  R                  U R                   U R                  5        [        R                  R                  U R
                  U R                  5        gg)zInitialize parameters.N)r   rI   rd   	constant_r   r   rg   s    r`   r[   EvaBlock.reset_parameters{  sM    <<#GGdllD,<,<=GGdllD,<,<= $rb   r   rl   rm   rn   c                    U R                   cd  XR                  U R                  U R                  U5      X#US95      -   nXR	                  U R                  U R                  U5      5      5      -   nU$ XR                  U R                   U R                  U R                  U5      X#US9-  5      -   nXR	                  U R                  U R                  U R                  U5      5      -  5      -   nU$ Nrl   rm   rn   )r   r   r   r   r   r   r   r   r\   r   rl   rm   rn   s        r`   r   EvaBlock.forward  s     <<OODIIdjjm$gpI$qrrAOODHHTZZ]$;<<A  OODLL499TZZ]QUv9  4A  %A  B  BAOODLL488DJJqM3J$JKKArb   )	r   r   r   r   r   r   r   r   r   r   r   )r   r   r   r   rI   GELUr   r   r   r   strr   r   rF   r[   rP   r   r   r   r   r   s   @r`   r   r     s    ""!$#$#%*%&" %!!!+/"$''#,+/-j j  j  	j 
 j  j  j  !j  j  #j   #j  j  j  j  j   !j " "%#j $  %j & !'j ( $C=)j  j X> ,004#|| 5<<(  -	
  
 rb   r   c            (       ^  ^  \ rS rSrSrSSSSSSSSSSS	S	S	S
\R                  \R                  S
S
S
4S\S\S\	S\	S\
S\S\	S\	S\S\	S\	S\S\
S\
S\
S\\
   S\S\S\\   4&U 4S jjjr   S&S\R                   S \\R                      S!\\R                      S"\	S#\R                   4
S$ jjrS%rU =r$ )'EvaBlockPostNormi  zEEVA block w/ post-norm and support for swiglu, MLP norm scale, ROPE. Tr   r   Fr   r%   rv   Nr0   r1   r2   r3   r   r   r<   r   r   r   r   r5   r7   r6   r   r   r   r9   r8   c                   > UUS.n[         TU ]  5         US:X  a  [        O[        nU" U4UUUUUUUUUUS.
UD6U l        U" U40 UD6U l        US:  a  [        U5      O[        R                  " 5       U l	        [        X-  5      nU(       aY  U
(       a   [        SUUU
(       a  UOSUU	S.UD6U l        OQ[        SUUS-  U
(       a  UOS[        R                  SUS	.UD6U l        O[        SUUUU
(       a  UOSUS
.UD6U l        U" U40 UD6U l        US:  a  [        U5      U l        g[        R                  " 5       U l        g)al  Initialize the post-norm EVA transformer block.

Args:
  dim: Input dimension of the token embeddings
    num_heads: Number of attention heads
    qkv_bias: Whether to use bias terms in query, key, value projections
    qkv_fused: Whether to use a single projection for query, key, value
    mlp_ratio: Ratio of MLP hidden dimension to input dimension
    swiglu_mlp: Whether to use SwiGLU activation in the MLP
    scale_mlp: Whether to use normalization in the MLP
    scale_attn_inner: Whether to use normalization within the attention mechanism
    num_prefix_tokens: Number of tokens at the beginning of the sequence (class tokens, etc.)
    attn_type: Type of attention module to use ('eva' or 'rope')
    proj_drop: Dropout rate for projection layers
    attn_drop: Dropout rate for attention matrix
    drop_path: Stochastic depth rate
    init_values: Initial value for LayerScale, None = no LayerScale (NOTE: ignored for post-norm block)
    act_layer: Activation layer constructor
    norm_layer: Normalization layer constructor
    attn_head_dim: Dimension of each attention head (if None, computed as dim // num_heads)
r>   rl   r   rv   Nr   rr   Fr   r   r   )rE   rF   r#   r-   r   r   r   rI   rU   r   r   r   r   r   r   r   r   r   )r\   r0   r1   r2   r3   r   r   r<   r   r   r   r   r5   r7   r6   r   r   r   r9   r8   r?   r@   r   r]   r   r   r_   s                             r`   rF   EvaBlockPostNorm.__init__  so   \ /$-$7=\
/'!'#
 
	  *r*
1:R(9-R[[]co.!  #$3-6zD",  "  #$3a$7-6zD gg#"    /#)2: DH  *r*
1:R(9-R[[]rb   r   rl   rm   rn   r   c                     XR                  U R                  U R                  XX4S95      5      -   nXR                  U R	                  U R                  U5      5      5      -   nU$ r   )r   r   r   r   r   r   r   s        r`   r   EvaBlockPostNorm.forward  sR     

499QY9+l mnn

488A; 788rb   )r   r   r   r   r   r   r   )r   r   r   r   r   rI   r   r   r   r   r   r   r   r   rF   rP   r   r   r   r   r   s   @r`   r   r     s   P
 ""!" %$#$#%*%&!!!+/"$''#%<<+/-dSdS dS 	dS
 dS dS dS dS dS !dS dS #dS  #dS dS dS  !dS" "%#dS$  %dS& !'dS( $C=)dS dSR ,004#	||	 5<<(	  -		
 	 
	 	rb   r   c            [       0  ^  \ rS rSrSrSSSSSSS	S	S
S
SSSSSSSSSSSS\SS
SSS
SSSSSSSSSSSSSSSSSS4.S\\\\\4   4   S\\\\\4   4   S\S\S\	S\S\S\S\
S\
S\S \
S!\S"\
S#\
S$\	S%\S&\S'\S(\S)\S*\S+\S,\\   S-\
S.\S/\
S0\
S1\
S2\\	   S3\S4\	S5\S6\
S7\
S8\
S9\\
   S:\\
   S;\\   S<\\   S=\
S>\
S?\\\\\4   \4      S@\4XU 4SA jjjrSeSB\
4SC jjrSfSE jrSeSF\R$                  SB\
SDS4SG jjr\R*                  R,                  SD\\	   4SH j5       r\R*                  R,                  SeSI\
SDS4SJ jj5       r\R*                  R,                  SgSK\
SD\\	\4   4SL jj5       r\R*                  R,                  SD\R$                  4SM j5       rShS\S\\	   SDS4SN jjr  SiS\\\\4      S\\\\4      SDS4SO jjrSD\\R@                  \\R@                     4   4SP jr!        SjSQ\R@                  SR\\\\"\   4      SS\
ST\
SU\
SV\	SW\
SX\\R@                     SY\
SD\\"\R@                     \\R@                  \"\R@                     4   4   4SZ jjr#   SkSR\\\"\   4   S[\
S\\
4S] jjr$ShSQ\R@                  S^\\	   SD\R@                  4S_ jjr%  SlSQ\R@                  SX\\R@                     SY\
SD\R@                  4S` jjr&SgSQ\R@                  Sa\
SD\R@                  4Sb jjr'  SlSQ\R@                  SX\\R@                     SY\
SD\R@                  4Sc jjr(Sdr)U =r*$ )mr+   i  a  Eva Vision Transformer w/ Abs & Rotary Pos Embed

This class implements the EVA and EVA02 models that were based on the BEiT ViT variant
  * EVA - abs pos embed, global avg pool
  * EVA02 - abs + rope pos embed, global avg pool, SwiGLU, scale Norm in MLP (ala normformer)
      rA     avg      Tr   Fr   r   rv   Nrz   ijg     @gMbP?img_size
patch_sizein_chansnum_classesglobal_pool	embed_dimdepthr1   r2   r3   r   r   r   r   r   r   	drop_ratepos_drop_ratepatch_drop_rateproj_drop_rateattn_drop_ratedrop_path_rater9   r   class_tokennum_reg_tokensno_embed_classuse_abs_pos_embuse_rot_pos_emb	rope_typerope_grid_offsetrope_grid_indexingrope_temperaturerope_rotate_halfuse_post_normuse_pre_transformer_normuse_post_transformer_normuse_fc_normattn_pool_num_headsattn_pool_mlp_ratiodynamic_img_sizedynamic_img_padref_feat_shapehead_init_scalec/                 	  > [         T;U ]  5         U-U.S.n/US;   d   eX@l        X0l        XPl        U=U l        =U l        U l        U(       a  SOSU-   U l        UU l	        U)U l
        SU l        U$n0U&b  U&n1OUS:H  n1U%b  U%n2OU1(       + n20 n3U)(       a  U3R                  [        SSS	95        [        S+UUUUU*U$(       + S
.U3DU/D6U l        U R                  R                   n4[#        U R                  S5      (       a  U R                  R%                  5       OUn5U(       a-  [&        R(                  " [*        R,                  " SSU40 U/D65      OSU l        U(       a-  [&        R(                  " [*        R,                  " SUU40 U/D65      OSU l        U=(       a    U R0                  SL U l        U(       a  U4OU4U R                  -   n6U(       a-  [&        R(                  " [*        R,                  " SU6U40 U/D65      OSU l        [&        R6                  " US9U l        US:  a  [;        UU R                  S9U l        OSU l        SU l        U(       a  U+b  [A        U+5      OSn+[        S+UUU)(       a  SOU R                  RB                  U!U S.U/D6n7US:X  a   U7R                  [        US95        SU l        O US:X  a  U7R                  [        SUU+S95        [E        S+SU0U7D6U l#        OSU l#        U0(       a	  U" U40 U/D6O[&        RH                  " 5       U l%        [M        UU5      n8U#(       a  [N        O[P        n9[&        RR                  " [U        U5       V:s/ s HK  n:U9" S+0 SU_SU_SU	_SU
_SU_SU_SU_SU_SU_SU_SU"_S U R                  _S!U_S"U_S#U8U:   _S$U_S%U_U/D6PMM     sn:5      U l+        [U        U5       V:s/ s H  n:[        S&U: 3UU5S'9PM     sn:U l,        U2(       a	  U" U40 U/D6O[&        RH                  " 5       U l-        US(:X  aA  []        U R                  4U'=(       d    UU(=(       d    UU[&        R^                  S).U/D6U l0        OSU l0        U1(       a	  U" U40 U/D6O[&        RH                  " 5       U l1        [&        R6                  " U5      U l2        US:  a  [&        Rf                  " Xd40 U/D6O[&        RH                  " 5       U l4        U,U l5        U Rm                  SS*9  gs  sn:f s  sn:f ),a  Initialize the EVA Vision Transformer model.

Args:
    img_size: Input image size (single int for square, or tuple for rectangular)
    patch_size: Patch size to divide image into tokens (single int for square, or tuple)
    in_chans: Number of input image channels
    num_classes: Number of classes (output dim) for classification head (final projection), 0 for pass-through
    global_pool: Type of global pooling for final sequence ('avg', 'token', 'map', etc.)
    embed_dim: Embedding dimension for tokens
    depth: Number of transformer blocks
    num_heads: Number of attention heads
    qkv_bias: Enable bias for query, key, value projections
    qkv_fused: Use a single projection for query, key, value
    mlp_ratio: Ratio of mlp hidden dim to embedding dim
    swiglu_mlp: Use SwiGLU activation in MLP
    scale_mlp: Apply scaling normalization in MLP (normformer style)
    scale_attn_inner: Apply scaling normalization inside attention
    attn_type: Type of attention module to use
    drop_rate: Dropout rate after final projection and pooling
    pos_drop_rate: Dropout rate for positional embeddings
    patch_drop_rate: Rate of dropping patches during training
    proj_drop_rate: Dropout rate for projections
    attn_drop_rate: Dropout rate for attention
    drop_path_rate: Stochastic depth rate
    norm_layer: Normalization layer constructor
    init_values: Initial layer-scale values
    class_token: Use class token
    num_reg_tokens: Number of additional learnable 'register' tokens to add to the sequence
    no_embed_class: Don't include position embeddings for class (or reg) tokens
    use_abs_pos_emb: Use absolute (learned) positional embeddings
    use_rot_pos_emb: Use rotary position embeddings
    rope_type: Type of RoPE to use ('cat', 'mixed', 'dinov3', etc.).
    rope_grid_offset: Offset for rotary position embedding grid
    rope_grid_indexing: Indexing mode for rotary position embeddings ('ij' or 'xy')
    rope_temperature: Temperature parameter for ROPE frequency computation
    rope_rotate_half: Use half rotation layout (rotate D/2 dims), else use interleaved rotation layout
    use_post_norm: Use post-norm transformer block type
    use_pre_transformer_norm: Use normalization layer before transformer blocks
    use_post_transformer_norm: Use normalization layer after transformer blocks
    use_fc_norm: Use normalization layer after pooling, before final classifier
    attn_pool_num_heads: Number of heads in attention pooling
    attn_pool_mlp_ratio: MLP ratio in attention pooling
    dynamic_img_size: Support dynamic image sizes in forward pass
    dynamic_img_pad: Apply dynamic padding for irregular image sizes
    ref_feat_shape: Reference feature shape for rotary position embedding scale
    head_init_scale: Initialization scale for classification head weights
r>   ) r   avgmaxmaxtokenmapr%   r   FNr   NHWC)strict_img_size
output_fmt)r   r   r   r   r  rB   
feat_ratio)r   )r5   )r0   r1   
feat_shapetemperaturegrid_indexingmixed)r   Trz   )	in_pixelsgrid_offsetr  r   r0   r1   r2   r3   r   r   r   r   r   r   r<   r5   r7   r6   r   r9   r   blocks.)modulenum_chs	reductionr
  )r1   r   r9   r   needs_resetr   )7rE   rF   r   r   r   num_featureshead_hidden_sizer   r5   r   r  grad_checkpointingupdatedictr   patch_embednum_patcheshasattrr  rI   rO   rP   rQ   	cls_token	reg_token	cls_embed	pos_embedrX   pos_dropr   
patch_drop
rope_mixedr   	grid_sizer   rl   rU   norm_prer   r   r   
ModuleListrangeblocksfeature_inforY   r$   r   	attn_poolfc_norm	head_droprJ   headr  init_weights)<r\   r   r   r   r   r   r   r   r1   r2   r3   r   r   r   r   r   r   r   r   r   r   r   r   r9   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r  r  r  r  r?   r@   r]   activate_pre_normactivate_fc_normactivate_post_norm
embed_argsr!  rnum_pos_tokensrope_kwargsdprblock_fnir_   s<                                                              r`   rF   Eva.__init__  s   @ 	/JJJJ& &ENNND1DN'2!^!K, 0"' 5"**e3$0!:%5!5
d5VLM% 	
!+--	
 	
 	
 &&22-4T5E5E|-T-TD'')ZdMXekk!Q	&HR&HI^bZhekk!^Y&URT&UVnr$?4)?(6K$J`J`<`Ziekk!^Y&URT&UVos

]3Q5oY]YoYopDO"DO:H:TY~6Z^N  ##349I9I9S9S,0 K G#""4e#45"&e#""4# 0#1$  *MIMMDIDI7H
933bkkm'>'4#(mm* 5\+%#* ")  # " $	
 $ & !0 $ "2 $ - #'"8"8 ) ) a&  &!" (%( "+%# $. QVV[P\^P\1D'!yAFP\^ 4FJy/B/2;;=	%0-:-:%'' DN "DN6Fz)2r2BKKMI.?JQBIIi;;TVT_T_Ta	. 	e,Y%#,^s   
AS 5S%r  c                    U R                  [        U R                  US95        U R                  b  [	        U R                  SS9  U R
                  b  [	        U R
                  SS9  U R                  b  [	        U R                  SS9  U R                  5         U R                  (       a  [        U R                  [        R                  5      (       a  [	        U R                  R                  SS9  [        R                  " 5          U R                  R                  R!                  U R                  5        U R                  R"                  R!                  U R                  5        S S S 5        g g g ! , (       d  f       g = f)Nr  {Gz?std)applyr   _init_weightsr&  r   r#  r$  fix_init_weightr  
isinstancer3  rI   rJ   rp   rP   no_gradmul_rB   )r\   r  s     r`   r4  Eva.init_weights  s    

74--;GH>>%$..c2>>%$..c2>>%$..c2Jtyy"))$D$D$))**4		  %%d&:&:;		##D$8$89 ! %E s   AE..
E<r   c                    [         R                  " 5          [        U R                  5       H  u  p[        R
                  " SUS-   -  5      nUR                  R                  R                  R                  U5        UR                  R                  R                  R                  U5        M     SSS5        g! , (       d  f       g= f)z=Fix initialization weights by rescaling based on layer depth.g       @r%   N)rP   rH  	enumerater.  mathsqrtr   rZ   rp   div_r   fc2)r\   layer_idlayerrH   s       r`   rF  Eva.fix_init_weight  sy    ]]_#,T[[#9		#A"67

&&++E2		$$))%0 $: __s   BB88
Cmc                 8   [        U[        R                  5      (       aL  [        UR                  SS9  UR
                  b*  [        R                  R                  UR
                  5        ggU(       a(  [        US5      (       a  XLa  UR                  5         gggg)zInitialize weights for Linear layers and call reset_parameters on modules.

Args:
    m: Module to initialize.
    needs_reset: Whether to call reset_parameters() on modules.
rA  rB  Nr[   )
rG  rI   rJ   r   rp   rB   rd   re   r"  r[   )r\   rT  r  s      r`   rE  Eva._init_weights  sr     a##!((,vv!qvv& "WQ(:;;  AN;[rb   c                     SS1n[        U SS5      =n(       a7  [        US5      (       a&  XR                  5        Vs1 s H  nSU 3iM
     sn-  $ U$ s  snf )z(Parameters to exclude from weight decay.r&  r#  rl   Nno_weight_decayzrope.)r   r"  rX  )r\   nwdrl   r   s       r`   rX  Eva.no_weight_decay  sc     K(D&$//D/WTCT5U5U/C/C/EF/E!E!+/EFFF
 Gs   Aenablec                     Xl         g)z)Enable or disable gradient checkpointing.N)r  )r\   r[  s     r`   set_grad_checkpointingEva.set_grad_checkpointing  s
     #)rb   coarsec                     [        SSS/S9nU$ )z(Create layer groupings for optimization.z ^cls_token|pos_embed|patch_embed)z^blocks\.(\d+)N)z^norm)i )stemr.  )r  )r\   r_  matchers      r`   group_matcherEva.group_matcher"  s!     4-/CD
 rb   c                     U R                   $ N)r3  rg   s    r`   get_classifierEva.get_classifier+  s    yyrb   c                     Xl         Ub  X l        US:  a'  [        R                  " U R                  U5      U l        g[        R
                  " 5       U l        g)zsReset the classifier head.

Args:
    num_classes: Number of output classes.
    global_pool: Global pooling type.
Nr   )r   r   rI   rJ   r   rU   r3  )r\   r   r   s      r`   reset_classifierEva.reset_classifier/  sB     '"*>IAoBIIdnnk:	SUS^S^S`	rb   c           
      &   U R                   R                  nU R                   R                  XS9  U R                  b  U R                  (       a  SOU R
                  nU R                   R                  U-   nXPR                  R                  S   :w  aD  [        R                  " [        U R                  U R                   R                  UUSS95      U l        U R                  b0  U R                  R                  U R                   R                  5        gg)zUpdate the input image resolution and patch size.

Args:
    img_size: New input resolution, if None current resolution is used.
    patch_size: New patch size, if None existing patch size is used.
)r   r   Nr   r%   T)new_sizeold_sizer5   verbose)r   r*  set_input_sizer&  r   r5   r!  ry   rI   rO   r   rl   update_feat_shape)r\   r   r   prev_grid_sizer5   num_new_tokenss         r`   rp  Eva.set_input_size;  s     ))33'''Q>>%%)%8%8d>T>T!--99<MMN!5!5a!88!#.DNN!--77+&7 / " 99 II''(8(8(B(BC !rb   c                    U R                   (       a  UR                  u  p#pEU R                  bK  U R                  R                  n[        U R                  X44UU R                  (       a  SOU R                  S9nOS nUR                  USU5      nU R                  b  U R                  R                  X44S9OS nO5U R                  nU R                  b  U R                  R                  5       OS n/ n	U R                  b9  U	R                  U R                  R                  UR                  S   SS5      5        U R                  b9  U	R                  U R                  R                  UR                  S   SS5      5        U R                  (       a'  Ub  X-   nU	(       a  [        R                   " X/-   SS9nO&U	(       a  [        R                   " X/-   SS9nUb  X-   nU R#                  U5      nU R$                  b]  U R%                  U5      u  pUbG  U
bD  ['        XU
5      n[)        U SS5      (       a  UR+                  SS5      nX4$ UR-                  S5      nX4$ )	Nr   )rm  rn  r5   rq   )ry   r%   ru   r)  F)r  ry   r&  r   r*  r   r   r5   viewrl   	get_embedr#  appendexpandr$  rP   rz   r'  r(  r   r   r   	unsqueeze)r\   r   r   HWr   rr  r&  rot_pos_embedto_catkeep_indicess              r`   
_pos_embedEva._pos_embedX  s     JA!~~)!%!1!1!;!;2NNV++/+>+>aDDZDZ		 !	q"a AAEAVDII//qf/=\`MI59YY5JDII//1PTM>>%MM$..//
BCD>>%MM$..//
BCD$MIIfsl2 IIfsl2$MMM! ??&"ooa0OA(\-E 6q V4u55$1$;$;Aq$AM
  %2$;$;A$>Mrb   r   indicesreturn_prefix_tokensrY   
stop_earlyr  intermediates_onlyrm   rn   c
           	      `   US;   d   S5       eUS:H  n
/ n[        [        U R                  5      U5      u  pUR                  u  pnnU R	                  U5      nU R                  U5      u  nnU R                  U5      n[        R                  R                  5       (       d  U(       d  U R                  nOU R                  SUS-    n[        U SS5      (       a  Ub  [        U5       H  u  nnU R                  (       a3  [        R                  R                  5       (       d  [        UUUU   XS9nOU" UUU   XS9nUU;   d  M]  UR                  U(       a  U R                  U5      OU5        M     O[        U5       H  u  nnU R                  (       a0  [        R                  R                  5       (       d  [        UUUXS9nOU" UUXS9nUU;   d  MW  UR                  U(       a  U R                  U5      OU5        M     U R                   (       aJ  U Vs/ s H  nUSS2S	U R                   24   PM     nnU Vs/ s H  nUSS2U R                   S24   PM     nnU
(       ad  U R                  R#                  UU45      u  nnU Vs/ s H7  nUR%                  UUUS
5      R'                  S	SSS5      R)                  5       PM9     nn[        R                  R                  5       (       d  U(       a  [+        [-        UW5      5      nU(       a  U$ U R                  U5      nX4$ s  snf s  snf s  snf )ae  Forward features that returns intermediates.
Args:
    x: Input image tensor
    indices: Take last n blocks if an int, if is a sequence, select by matching indices
    return_prefix_tokens: Return both prefix and spatial intermediate tokens
    norm: Apply norm layer to all intermediates
    stop_early: Stop iterating over blocks when last desired intermediate hit
    output_fmt: Shape of intermediate feature outputs
    intermediates_only: Only return intermediate features
    attn_mask: Optional attention mask for masked attention
    is_causal: If True, use causal (autoregressive) masking in attention
)NCHWNLCz>Output format for EVA-ViT features must be one of NCHW or NLC.r  Nr%   r)  Fr   r   rq   rA   rr   )r'   lenr.  ry   r   r  r+  rP   r   is_scriptingr   rL  r  r(   rx  rY   r5   dynamic_feat_sizer}   r~   
contiguouslistzip)r\   r   r  r  rY   r  r  r  rm   rn   r}   intermediatestake_indices	max_indexr   _heightwidthr}  r.  r>  blkyprefix_tokensr{  r|  s                             r`   forward_intermediatesEva.forward_intermediates  s   0 _,n.nn,&"6s4;;7G"Q  ggfeQ??1-=MM!99!!##:[[F[[)a-0F 4u---2K#F+3**5993I3I3K3K"3a0@IkAAM!$4	_A$!((11E , $F+3**5993I3I3K3K"3hAAMY\A$!((11E , !!ERS]Qq!D$:$:"::;]MSDQRMqQq$"8"8"99:MMR##55vuoFDAq^kl^kYZQYYq!Q3;;Aq!QGRRT^kMlyy%%'',@ ]M!BCM  IIaL TR ms   L!9L&>L+
prune_norm
prune_headc                 ,   [        [        U R                  5      U5      u  pEU R                  SUS-    U l        U(       a  [        R                  " 5       U l        U(       a3  SU l        [        R                  " 5       U l        U R                  SS5        U$ )z?Prune layers not required for specified intermediates.
        Nr%   r   r  )	r'   r  r.  rI   rU   rY   r0  r1  rj  )r\   r  r  r  r  r  s         r`   prune_intermediate_layersEva.prune_intermediate_layers  so     #7s4;;7G"Qkk.9q=1DI!DN;;=DL!!!R(rb   	pool_typec                     U R                   b  U R                  U5      nU$ Uc  U R                  OUn[        XU R                  S9nU$ )N)r  r5   )r0  r   r   r5   )r\   r   r  s      r`   poolEva.pool  sH    >>%q!AH(1(9D$$y	AdF\F\]rb   c           	      J   U R                  U5      nU R                  U5      u  pU R                  U5      n[        U SS5      (       ao  Ubl  [	        U R
                  5       HR  u  pVU R                  (       a2  [        R                  R                  5       (       d  [        XaXE   X#S9nMH  U" XU   X#S9nMT     O[U R
                   HK  nU R                  (       a0  [        R                  R                  5       (       d  [        XaXBUS9nMD  U" XX#S9nMM     U R                  U5      nU$ )zForward pass through feature extraction layers.

Args:
    x: Input tensor.
    attn_mask: Optional attention mask for masked attention
    is_causal: If True, use causal (autoregressive) masking in attention.

Returns:
    Feature tensor.
r)  Fr   )r   r  r+  r   rL  r.  r  rP   r   r  r(   rY   )r\   r   rm   rn   r}  r>  r  s          r`   forward_featuresEva.forward_features  s      Q??1-MM!4u---2K $DKK0**5993I3I3K3K"30@IkAA!$4	_A	 1 {{**5993I3I3K3K"3^ghAAY\A	 # IIaLrb   
pre_logitsc                     U R                  U5      nU R                  U5      nU R                  U5      nU(       a  U$ U R                  U5      $ )zForward pass through classifier head.

Args:
    x: Feature tensor.
    pre_logits: Return pre-logits if True.

Returns:
    Output tensor.
)r  r1  r2  r3  )r\   r   r  s      r`   forward_headEva.forward_head  sA     IIaLLLONN1q0DIIaL0rb   c                 H    U R                  XUS9nU R                  U5      nU$ )zForward pass.

Args:
    x: Input tensor.
    attn_mask: Optional attention mask for masked attention
    is_causal: If True, use causal (autoregressive) masking in attention.

Returns:
    Output tensor.
)rm   rn   )r  r  )r\   r   rm   rn   s       r`   r   Eva.forward,  s.      !!!I!Na rb   )r0  r.  r%  r#  r  r   r1  r/  r   r  r3  r2  r  r  r   r   rY   r+  r   r  r5   r(  r   r'  r&  r$  rl   r)  )Tr   Frf  )NN)NFFFr  FNF)r%   FT)NF)+r   r   r   r   r   r   r   r   r
   r   r   r   r   r   rF   r4  rF  rI   ModulerE  rP   r   ignorer	   rX  r]  r   r   rc  rg  rj  rp  r   r  r   r  r  r  r  r  r   r   r   r   s   @r`   r+   r+     sG    5868#$ !"!$#$#%*"!#%%'$&$&$&#,+/ $"##($($)',&(&*&,%*"'-28<*.1537%*$)DH%*__-CsCx01_- c5c?23_- 	_-
 _- _- _- _- _- _- _- _- _- !_- _-  #!_-" #_-$ %_-& !'_-( #)_-* "+_-, "-_-. "/_-0 !1_-2 "%3_-4 5_-6  7_-8 !9_-: ";_-< "=_->  }?_-@ $A_-B !$C_-D $E_-F #G_-H  I_-J '+K_-L (0~M_-N "$O_-P "*#Q_-R "*%S_-T #U_-V "W_-X %U5c?C+?%@AY_-Z #[_- _-B: :"1!ryy !t !t ! YYS   YY)T )T ) ) YYD T#s(^   YY		  
aC 
ahsm 
aW[ 
a 3748DuS#X/D !sCx1D 
	D:5 uU\\8ELL3I%IJ 5 t 8<).$$',04#K ||K  eCcN34K  #'	K 
 K  K  K  !%K   -K  K  
tELL!5tELL7I)I#JJ	KK ^ ./$#	3S	>*  	$ell x}   15#	%||%  -% 	%
 
%N1ell 1 1 1$ 15#	||  - 	
 
 rb   
state_dictmodelprefixr   c                    U R                  SU 5      n U R                  5        VVs0 s H  u  p4UR                  SS5      U_M     n nn0 n/ SQn[        U5      nU R                  5        GH  u  p4U(       a  UR	                  U5      (       d  M%  X7S nU H  nUR                  US   US   5      nM     UR	                  S5      (       a  UR                  S	S5      nUR                  S
S5      nUR                  SS5      nUR	                  S5      (       a`  UR
                  S   S-  n	UR                  S5      (       a  USU	 US'   XIS US'   O%UR                  S5      (       a  USU	 US'   XIS US'   GM  OUS:X  a;  SnUR                  SS5      n[        R                  " UR
                  S   5      US'   O@US:X  a#  SnUR                  S5      R                  S5      nOUS:X  a  UR                  S5      nXEU'   GM     U$ s  snnf )zConvert Perception Encoder weights.

Args:
    state_dict: State dictionary to convert.
    model: Target model instance.
    prefix: Prefix to strip from keys.

Returns:
    Converted state dictionary.
r  zmodule.r  ))conv1patch_embed.proj)positional_embeddingr&  )ztransformer.resblocks.r  )ln_prer+  )ln_postrY   )ln_rY   )z
ls_1.gammar   )z
ls_2.gammar   )in_proj_zqkv.)out_projrZ   )zmlp.c_fcmlp.fc1)z
mlp.c_projmlp.fc2Nr   r%   r0  zattn_pool.attnzattn_pool.layernormzattn_pool.normzattn_pool.probezattn_pool.latentzattn_pool.qkvrA   rp   zattn_pool.q.weightzattn_pool.kv.weightrB   zattn_pool.q.biaszattn_pool.kv.biasrZ   zhead.weightz	head.biasclass_embeddingr#  r&  )getitemsreplacer  
startswithry   endswithr   rP   zerosrz  )
r  r  r  r   r   out_dictswaps
len_prefixspr0   s
             r`   _convert_per  A  s    4J:D:J:J:LM:L$!!))Ir*A-:LJMHE VJ  "<<''+AB		"Q%A'A  <<$$		*K8A		/1ABA		+-?@A||O,,ggajAo::h''56tWH1267gH23ZZ''34Tc7H/045dGH01 - &[AAq!A$)KK
$;H[!##AA((+A+AA? #B Og Ns   Hinterpolation	antialiasc           
      x   0 nU R                  SU 5      n U R                  SU 5      n U R                  SU 5      n U R                  SU 5      n SU ;   a  [        X5      $ SU ;   a
  [        XSS9$ S	U ;   a  S
nOSU ;   a  SnOSnSU ;   nU(       + =(       a    US-   U ;   nUS-   U ;   n[        U5      n	U R                  5        GH  u  pU(       a  U
R	                  U5      (       d  M%  XS n
SU
;   a  U
S:X  d  M7  U(       a  [        S Vs/ s H  oR                  U5      PM     sn5      (       a  Mo  U
R	                  S5      (       a  M  U
R                  S5      (       aR  U
R                  SS5      n UR                  U5        UR                  SSS9u  nnnXU'   UXJR                  SS5      '   M  U
R                  SS5      n
U
R                  SS5      n
U
R                  SS5      n
O/U(       a(  U
S ;   a"  U
S!:X  d  U
S":X  a  U
R                  S#S$5      n
OGMU  S%U
;   ae  UR                  R                  R                  R                  u    nnnUR                  S   U:w  d  UR                  S&   U:w  a  [!        UUU4UUS'S(9nOsU
S):X  am  UR                  S*   UR"                  R                  S*   :w  aC  [%        US+S,5      (       a  S-O[%        US.S*5      n['        UUR                  R(                  UUUS'S/9nU
R                  S0S15      n
U
R                  S2S35      n
U
R                  S4S55      n
U
R                  S6S75      n
U
R                  S8S95      n
U
R                  S:S;5      n
U(       a$  U
R                  SS<5      n
U
R                  SS=5      n
XU
'   GM     U$ s  snf ! [         a  n[        U5         SnAGM  SnAff = f)>a:  Convert patch embedding weight from manual patchify + linear proj to conv.

Args:
    state_dict: Checkpoint state dictionary.
    model: Target model instance.
    interpolation: Interpolation method for resizing.
    antialias: Whether to use antialiasing when resizing.

Returns:
    Filtered state dictionary.
	model_emar  r  r  zvisual.conv1.weightzconv1.weightr  )r  zvisual.trunk.pos_embedzvisual.trunk.zvisual.pos_embedvisual.storage_tokens
mask_tokenzblocks.0.attn.q_proj.weightNrl   z
rope.freqs)z.periodsz
.bias_maskr  local_cls_normzqkv.biasrR   rA   rq   ru   rT   z	ls1.gammar   z	ls2.gammar   r$  )r  zlm_head.weightzlm_head.biasnorm.weight	norm.biasr  r  rY   r1  zpatch_embed.proj.weightrx   T)r  r  ro  r&  r%   r   Fr   r5   )rm  r5   r  r  ro  z
mlp.ffn_lnzmlp.normzattn.inner_attn_lnz	attn.normzmlp.w12r  zmlp.w1z	mlp.fc1_gzmlp.w2z	mlp.fc1_xzmlp.w3r  zq_proj.biaszv_proj.bias)r  r  r  r  r  anyr  r  get_parameter	Exceptionprintchunkr   rZ   rp   ry   r   r&  r   r   r*  )r  r  r  r  r  r  dinov3_weightsmim_weightsno_qkvr  r   r   fq_bias_keqvkvvvr  r{  r|  r5   s                         r`   checkpoint_filter_fnr    s   " HZ8J4J*5Jj9J 
*:--	:	%:R88  :- 	z	)%3N$$L,)>*)LK33zAFVJ  "<<''+AQ;qL0+ST+SaJJqM+STUU||,--zz*%%99Z: ''1 WWQBW/
B%'"<>:x89		+y1A		+y1A		*K8AQ"nnM!Q+%5IIfi0 $)**//66<<JAq!Qwwr{a1772;!#3(F"/'  +!''!*0E0Ea0H"H%,U4De%L%LRYZ_atvwRx&**44"3+#A IIlJ/II*K8IIi+IIh,IIh,IIh	*		(M2A		(M2AY #\ OG U ! !Hs   N
.N
N9"N44N9variant
pretrainedc           	      
   UR                  SS5      n[        R                  R                  SS5      S:H  nUc  UnU(       a  SSKJn  U" X40 UD6$ UR                  SS	5      n[        [        X4[        [        US
S9S.UD6nU$ )zCreate an EVA model.

Args:
    variant: Model variant name.
    pretrained: Load pretrained weights.
    **kwargs: Additional model arguments.

Returns:
    Instantiated Eva model.

use_naflexNTIMM_USE_NAFLEX01r%   )_create_naflexvit_from_evaout_indicesrA   getter)r  feature_cls)pretrained_filter_fnfeature_cfg)
poposenvironr  	naflexvitr  r&   r+   r  r  )r  r  r   r  _USE_NAFLEX_DEFAULTr  r  r  s           r`   _create_evar    s     L$/J**..):C@CG(
9)'HHH**]A.K W1[hG 	E Lrb   urlc                 4    U SSSSSS[         [        SSS	S
.UE$ )zGenerate default configuration for EVA models.

Args:
    url: Model weights URL.
    **kwargs: Additional configuration parameters.

Returns:
    Model configuration dictionary.
r   rA   r   r   Ng?bicubicTr  r3  mitr  r   
input_size	pool_sizecrop_pctr  fixed_input_sizemeanrC  
first_conv
classifierlicense)r   r   r  r   s     r`   _cfgr  "  s6     =t ( # rb   c                 $    U SSSSSSSSSS	S
S.UE$ )zGenerate default configuration for Perception Encoder models.

Args:
    url: Model weights URL.
    **kwargs: Additional configuration parameters.

Returns:
    Model configuration dictionary.
r   r  N      ?r  T      ?r  r  r  r3  
apache-2.0r  r   r  s     r`   _pe_cfgr
  6  s6     D)( $* rb   c                 4    U SSSSSS[         [        SSS	S
.UE$ )a  Generate default configuration for DINOv3 models.

Note: Original DINOv3 uses CLS-token pooling for representations. timm defaults to avg
pooling for the Eva architecture. Pass global_pool='token' at model creation to match
upstream behavior, which may be preferred for tasks like retrieval and few-shot classification.

Args:
    url: Model weights URL.
    **kwargs: Additional configuration parameters.

Returns:
    Model configuration dictionary.
r   rA      r  Nr  r  Tr  r3  zdinov3-licenser  )r   r   r  s     r`   _dinov3_cfgr  J  s7     D)%.B(# (. rb   z"eva_giant_patch14_224.clip_ft_in1kztimm/)	hf_hub_idz"eva_giant_patch14_336.clip_ft_in1k)rA   P  r  r  squash)r  r  r  	crop_modez(eva_giant_patch14_336.m30m_ft_in22k_in1k)r  r  rC  r  r  r  z(eva_giant_patch14_560.m30m_ft_in22k_in1k)rA   0  r  z.eva02_base_patch14_448.mim_in22k_ft_in22k_in1k)rA     r  z/eva02_large_patch14_448.mim_in22k_ft_in22k_in1kz.eva02_large_patch14_448.mim_m38m_ft_in22k_in1kz(eva02_tiny_patch14_336.mim_in22k_ft_in1k)r  r  r  z)eva02_small_patch14_336.mim_in22k_ft_in1kz(eva02_base_patch14_448.mim_in22k_ft_in1kz)eva02_large_patch14_448.mim_in22k_ft_in1kz(eva02_large_patch14_448.mim_m38m_ft_in1kz)eva02_base_patch14_448.mim_in22k_ft_in22kiQU  )r  r  r  r  r   z*eva02_large_patch14_448.mim_in22k_ft_in22kz)eva02_large_patch14_448.mim_m38m_ft_in22kz eva02_tiny_patch14_224.mim_in22k)r  r   z!eva02_small_patch14_224.mim_in22kz eva02_base_patch14_224.mim_in22kz!eva02_large_patch14_224.mim_in22kz eva02_large_patch14_224.mim_m38mz$eva_giant_patch14_clip_224.laion400m   z#eva_giant_patch14_clip_224.merged2bz$eva02_base_patch16_clip_224.merged2b   z%eva02_large_patch14_clip_224.merged2br   z%eva02_large_patch14_clip_336.merged2b)r  r  r  r   z'eva02_enormous_patch14_clip_224.laion2bz,eva02_enormous_patch14_clip_224.laion2b_plusz(eva02_enormous_patch14_clip_224.pretrain)r   z-vit_medium_patch16_rope_reg1_gap_256.sbb_in1kr  gffffff?r  )r  r  r  r  rC  z.vit_mediumd_patch16_rope_reg1_gap_256.sbb_in1kz.vit_betwixt_patch16_rope_reg4_gap_256.sbb_in1kz+vit_base_patch16_rope_reg1_gap_256.sbb_in1kzvit_pe_core_tiny_patch16_384.fb)rA     r  )r  r  r   z vit_pe_core_small_patch16_384.fbzvit_pe_core_base_patch16_224.fbr  z vit_pe_core_large_patch14_336.fbz#vit_pe_core_gigantic_patch14_448.fb   z vit_pe_lang_large_patch14_448.fbz'vit_pe_lang_large_patch14_448.fb_tilingz#vit_pe_lang_gigantic_patch14_448.fbz*vit_pe_lang_gigantic_patch14_448.fb_tilingz"vit_pe_spatial_tiny_patch16_512.fb)rA   r  r  z#vit_pe_spatial_small_patch16_512.fbz"vit_pe_spatial_base_patch16_512.fbz#vit_pe_spatial_large_patch14_448.fbz&vit_pe_spatial_gigantic_patch14_448.fbz%vit_small_patch16_rope_224.naver_in1kr	  )r  r  rC  r  z$vit_base_patch16_rope_224.naver_in1kz%vit_large_patch16_rope_224.naver_in1kz+vit_small_patch16_rope_mixed_224.naver_in1kz*vit_base_patch16_rope_mixed_224.naver_in1kz+vit_large_patch16_rope_mixed_224.naver_in1kz)vit_small_patch16_rope_ape_224.naver_in1kz(vit_base_patch16_rope_ape_224.naver_in1kz)vit_large_patch16_rope_ape_224.naver_in1kz/vit_small_patch16_rope_mixed_ape_224.naver_in1kz.vit_base_patch16_rope_mixed_ape_224.naver_in1kz/vit_large_patch16_rope_mixed_ape_224.naver_in1kz!vit_small_patch16_dinov3.lvd1689mz&vit_small_patch16_dinov3_qkvb.lvd1689mz&vit_small_plus_patch16_dinov3.lvd1689mz+vit_small_plus_patch16_dinov3_qkvb.lvd1689mz vit_base_patch16_dinov3.lvd1689mz%vit_base_patch16_dinov3_qkvb.lvd1689mz!vit_large_patch16_dinov3.lvd1689mz&vit_large_patch16_dinov3_qkvb.lvd1689mz vit_large_patch16_dinov3.sat493m)gQ?gM?gl?)g$C?g+?gM?)r  r  rC  z%vit_large_patch16_dinov3_qkvb.sat493m)z%vit_huge_plus_patch16_dinov3.lvd1689mz*vit_huge_plus_patch16_dinov3_qkvb.lvd1689mzvit_7b_patch16_dinov3.lvd1689mzvit_7b_patch16_dinov3.sat493mc           	      N    [        SSSSSS9n[        SSU 0[        U40 UD6D6nU$ )	,EVA-g model https://arxiv.org/abs/2211.07636     (   r   tE]t@r   r   r   r1   r   r  )eva_giant_patch14_224r  r  r  r   
model_argsr  s       r`   r   r     9     t2WbcJeJe$zJd]cJdeELrb   c           	      N    [        SSSSSS9n[        SSU 0[        U40 UD6D6nU$ )	r  r  r  r  r   r  r  r  )eva_giant_patch14_336r!  r"  s       r`   r&  r&    r$  rb   c           	      N    [        SSSSSS9n[        SSU 0[        U40 UD6D6nU$ )	r  r  r  r  r   r  r  r  )eva_giant_patch14_560r!  r"  s       r`   r(  r(    r$  rb   c                 V    [        SSSSSSSSSS	9	n[        SS
U 0[        U40 UD6D6nU$ )+EVA02 Tiny https://arxiv.org/abs/2303.11331r   r     r   rA   UUUUUU@Tr   r   	r   r   r   r   r1   r   r   r   r  r  )eva02_tiny_patch14_224r!  r"  s       r`   r/  r/    N     
J fZf4PZKe^dKefELrb   c                 V    [        SSSSSSSSSS	9	n[        SS
U 0[        U40 UD6D6nU$ ),EVA02 Small https://arxiv.org/abs/2303.11331r   r  r  r      r,  Tr-  r.  r  )eva02_small_patch14_224r!  r"  s       r`   r4  r4     N     
J gjgDQ[Lf_eLfgELrb   c                 Z    [        SSSSSSSSSSSS	9n[        SS
U 0[        U40 UD6D6nU$ )+EVA02 Base https://arxiv.org/abs/2303.11331r   r  r   r   Fr,  Tr-  r   r   r   r   r1   r3   r   r   r   r   r  r  )eva02_base_patch14_224r!  r"  s       r`   r9  r9  2  T     J fZf4PZKe^dKefELrb   c                 Z    [        SSSSSSSSSSS	S
9n[        SSU 0[        U40 UD6D6nU$ ),EVA02 Large https://arxiv.org/abs/2303.11331r   r  r     r   r,  FTr-  r   r   r   r   r1   r   r3   r   r   r   r  r  )eva02_large_patch14_224r!  r"  s       r`   r?  r?  F  T     J gjgDQ[Lf_eLfgELrb   c                 V    [        SSSSSSSSSS	9	n[        SS
U 0[        U40 UD6D6nU$ )r*  r  r  r+  r   rA   r,  Tr-  r.  r  )eva02_tiny_patch14_336r!  r"  s       r`   rB  rB  Z  r0  rb   c                 V    [        SSSSSSSSSS	9	n[        SS
U 0[        U40 UD6D6nU$ )r2  r  r  r  r   r3  r,  Tr-  r.  r  )eva02_small_patch14_336r!  r"  s       r`   rD  rD  l  r5  rb   c                 Z    [        SSSSSSSSSSSS	9n[        SS
U 0[        U40 UD6D6nU$ )r7  r  r  r   r   Fr,  Tr-  r8  r  )eva02_base_patch14_448r!  r"  s       r`   rF  rF  ~  r:  rb   c                 Z    [        SSSSSSSSSSS	S
9n[        SSU 0[        U40 UD6D6nU$ )r<  r  r  r  r=  r   r,  FTr-  r>  r  )eva02_large_patch14_448r!  r"  s       r`   rH  rH    r@  rb   c                 p    [        SSSSSUR                  SS5      S9n[        S
S	U 0[        U40 UD6D6nU$ )z?EVA-g CLIP model (only difference from non-CLIP is the pooling)r  r  r  r   r  r   r	  )r   r   r   r1   r   r   r  )eva_giant_patch14_clip_224r  r  r  r"  s       r`   rJ  rJ    sL     R2JJ}g68J jjtT^OibhOijELrb   c                 ~    [        SSSSSSSSSSSSUR                  S	S
5      S9n[        SSU 0[        U40 UD6D6nU$ )zUAn EVA-CLIP specific variant that adds additional attn scale layer-norm to eva02_baser   r   r   r   Fr,  Tr-  r   r	  )r   r   r   r   r1   r3   r   r   r   r   r   r  r   r  )eva02_base_patch16_clip_224rK  r"  s       r`   rM  rM    sf     JJ}g6J k*kPTU_PjciPjkELrb   c                 ~    [        SSSSSSSSSSSS	UR                  S
S5      S9n[        SSU 0[        U40 UD6D6nU$ )VAn EVA-CLIP specific variant that adds additional attn scale layer-norm to eva02_larger   r  r  r=  r   r,  FTr-  r   r	  r   r   r   r   r1   r   r3   r   r   r   r   r  r   r  )eva02_large_patch14_clip_224rK  r"  s       r`   rQ  rQ    f     JJ}g6J l:lQUV`QkdjQklELrb   c                 ~    [        SSSSSSSSSSSS	UR                  S
S5      S9n[        SSU 0[        U40 UD6D6nU$ )rO  r  r  r  r=  r   r,  FTr-  r   r	  rP  r  )eva02_large_patch14_clip_336rK  r"  s       r`   rT  rT    rR  rb   c                 t    [        SSSSSSSUR                  SS	5      S
9n[        SSU 0[        U40 UD6D6nU$ )zCAn EVA-CLIP specific variant that uses residual post-norm in blocksr   r  i   @   r   gI$I$!@Tr   r	  )r   r   r   r   r1   r   r   r   r  )eva02_enormous_patch14_clip_224rK  r"  s       r`   rW  rW    sW     JJ}g6	J ojoTXYcTngmTnoELrb   c                 ^    [        SSSSSSSSSS	SSS
S9n[        SSU 0[        U40 UD6D6nU$ )timm SBB ViT with ROPEr  r   r  r   r   Th㈵>Fr%   r-  r   r   r   r   r1   r3   r2   r   r   r   r   r   r  r  )$vit_medium_patch16_rope_reg1_gap_256r!  r"  s       r`   r\  r\    s[     J t:tY]^hYslrYstELrb   c                 ^    [        SSSSSSSSSS	SSS
S9n[        SSU 0[        U40 UD6D6nU$ )rY  r  r   r     r   TFrZ  r%   r-  r[  r  )%vit_mediumd_patch16_rope_reg1_gap_256r!  r"  s       r`   r_  r_    s[     J uJuZ^_iZtmsZtuELrb   c                 ^    [        SSSSSSSSSS	SSS
S9n[        SSU 0[        U40 UD6D6nU$ )rY  r  r   i  r   
   TrZ  Frs   r-  r[  r  )%vit_betwixt_patch16_rope_reg4_gap_256r!  r"  s       r`   rb  rb  /  s[     J uJuZ^_iZtmsZtuELrb   c                 ^    [        SSSSSSSSSSSSS	S
9n[        SSU 0[        U40 UD6D6nU$ )rY  r  r   r   r   TrZ  Fr%   r-  r[  r  )"vit_base_patch16_rope_reg1_gap_256r!  r"  s       r`   rd  rd  E  s[     J rrW[\fWqjpWqrELrb   c                 v    [        SSSSSSSSSS	S
SSS[        [        SS9S9n[        SSU 0[        U40 UD6D6$ )HPerception Encoder (PE) ViT from Meta (https://arxiv.org/abs/2504.13181)r   r+  r   rA   r   r
  rl   Tr=  r=  r  xyr   rZ  epsr   r   r   r1   r   r   r   r   r   r  r   r   r   r   r9   r  )vit_pe_core_tiny_patch16_384r  r   r   r  r  r   r#  s      r`   rl  rl  [  sd     !%9$/J$ k*kPTU_PjciPjkkrb   c                 v    [        SSSSSSSSSS	S
SSS[        [        SS9S9n[        SSU 0[        U40 UD6D6$ )rf  r   r  r   r3  r   r
  rl   Trg  r  rh  r   rZ  ri  rk  r  )vit_pe_core_small_patch16_384rm  rn  s      r`   rp  rp  t  sd     !%9$/J$ l:lQUV`QkdjQkllrb   c                 v    [        SSSSSSSSSSS	S
SS[        [        SS9S9n[        SSU 0[        U40 UD6D6$ )rf  r   r   r   r   r
  rl   T)r  r  r  rh  r   rZ  ri  rk  r  )vit_pe_core_base_patch16_224rm  rn  s      r`   rr  rr    sd     !%9$/J$ k*kPTU_PjciPjkkrb   c                 v    [        SSSSSSSSSS	S
SSS[        [        SS9S9n[        SSU 0[        U40 UD6D6$ )rf  r  r  r=  r   r   r
  rl   Trg  r  rh  r   rZ  ri  rk  r  )vit_pe_core_large_patch14_336rm  rn  s      r`   rt  rt    sd     !%9$/J$ l:lQUV`QkdjQkllrb   c                 v    [        SSSSSSSSS	S	S
SSS[        [        SS9S9n[        SSU 0[        U40 UD6D6$ )rf  r     2   r   UUUUUU@r
  rl   FT    rz  rh  r   r   rZ  ri  )r   r   r   r1   r   r   r   r   r   r   r  r   r   r   r9   r  ) vit_pe_core_gigantic_patch14_448rm  rn  s      r`   r{  r{    sd     !%9$/J$ ojoTXYcTngmTnoorb   c           
          [        S 0 SS_SS_SS_SS_S	S
_SS_SS_SS_SS_SS_SS_SS_SS_SS_SS_S[        [        SS9_6n[        S!SU 0[        U40 UD6D6$ )"rf  r   r  r   r  r      r1   r   r   r   r   rl   r   Tr   r  ry  r   r  r   rh  r   r   Fr   r   皙?r9   rZ  ri  r  r   )vit_pe_lang_large_patch14_448rm  rn  s      r`   r  r    s        	
          "& #(    9$/!J& l:lQUV`QkdjQkllrb   c                 v    [        SSSSSSSSS	S
SSSS[        [        SS9S9n[        SSU 0[        U40 UD6D6$ )rf  r  rv  /   r   rx  rl   FTry  rh  r~  rZ  ri  r   r   r   r1   r   r   r   r   r  r   r   r   r   r   r9   r  ) vit_pe_lang_gigantic_patch14_448rm  rn  s      r`   r  r    sd     !%"'9$/J$ ojoTXYcTngmTnoorb   c                 t    [        SSSSSSSSSSS	S
S[        [        SS9S9n[        SSU 0[        U40 UD6D6$ )rf  r   r+  r   rA   r   rl   TFry  r  rh  rZ  ri  r   r   r   r1   r   r   r   r   r   r   r  r   r   r9   r  )vit_pe_spatial_tiny_patch16_512rm  rn  s      r`   r  r  	  sa     !%"'9$/J" nZnSWXbSmflSmnnrb   c                 t    [        SSSSSSSSSSS	S
S[        [        SS9S9n[        SSU 0[        U40 UD6D6$ )rf  r   r  r   r3  r   rl   TFry  r  rh  rZ  ri  r  r  ) vit_pe_spatial_small_patch16_512rm  rn  s      r`   r  r  	  sa     !%"'9$/J" ojoTXYcTngmTnoorb   c                 t    [        SSSSSSSSSSSS	S
[        [        SS9S9n[        SSU 0[        U40 UD6D6$ )rf  r   r   r   r   rl   TFry  r  rh  rZ  ri  r  r  )vit_pe_spatial_base_patch16_512rm  rn  s      r`   r  r  3	  sa     !%"'9$/J" nZnSWXbSmflSmnnrb   c                 t    [        SSSSSSSSSSS	S
S[        [        SS9S9n[        SSU 0[        U40 UD6D6$ )rf  r  r  r=  r   r   rl   TFry  r  rh  rZ  ri  r  r  ) vit_pe_spatial_large_patch14_448rm  rn  s      r`   r  r  J	  sa     !%"'9$/J" ojoTXYcTngmTnoorb   c                 v    [        SSSSSSSSS	S
SSSS[        [        SS9S9n[        SSU 0[        U40 UD6D6$ )rf  r  rv  rw  r   rx  rl   FTry  rh  r~  rZ  ri  r  r  )#vit_pe_spatial_gigantic_patch14_448rm  rn  s      r`   r  r  a	  sd     !%"'9$/J$ rrW[\fWqjpWqrrrb   c                 `    [        SSSSSSSSSS	S
SSSS9n[        SSU 0[        U40 UD6D6nU$ )z=RoPE-Axial ViT-S/16 from https://github.com/naver-ai/rope-vitr   r  r   r3  rs   rl   TrZ  r	  Frh        Y@r   r   r   r1   r   r   r2   r   r   r   r   r   r   r   r  )vit_small_patch16_rope_224r!  r"  s       r`   r  r  z	  s]     J  jjtT^OibhOijELrb   c                 b    [        SSSSSSSSSSS	SSS
SS9n[        SSU 0[        U40 UD6D6nU$ )z=RoPE-Axial ViT-B/16 from https://github.com/naver-ai/rope-vitr   r   r   rs   rl   FTrZ  r	  rh  r  )r   r   r   r1   r   r   r   r2   r   r   r   r   r   r   r   r  )vit_base_patch16_rope_224r!  r"  s       r`   r  r  	  s`     J" i
idS]NhagNhiELrb   c                 `    [        SSSSSSSSSSS	SS
SS9n[        SSU 0[        U40 UD6D6nU$ )z=RoPE-Axial ViT-L/16 from https://github.com/naver-ai/rope-vitr   r  r=  rs   rl   TrZ  r	  Frh  r  r  r  )vit_large_patch16_rope_224r!  r"  s       r`   r  r  	  s]     J  jjtT^OibhOijELrb   c                 b    [        SSSSSSSSSS	S
SSSSS9n[        SSU 0[        U40 UD6D6nU$ )z=RoPE-Mixed ViT-S/16 from https://github.com/naver-ai/rope-vitr   r  r   r3  rs   rl   TrZ  r	  Frh        $@r  r   r   r   r1   r   r   r2   r   r   r   r   r   r   r   r   r  ) vit_small_patch16_rope_mixed_224r!  r"  s       r`   r  r  	  sa     J" pzpUYZdUohnUopELrb   c                 b    [        SSSSSSSSSSS	SS
SSS9n[        SSU 0[        U40 UD6D6nU$ )z=RoPE-Mixed ViT-B/16 from https://github.com/naver-ai/rope-vitr   r   r   rs   Trl   rZ  r	  Frh  r  r  )r   r   r   r1   r   r2   r   r   r   r   r   r   r   r   r   r  )vit_base_patch16_rope_mixed_224r!  r"  s       r`   r  r  	  sa     J" ojoTXYcTngmTnoELrb   c                 b    [        SSSSSSSSSSS	SS
SSS9n[        SSU 0[        U40 UD6D6nU$ )z=RoPE-Mixed ViT-L/16 from https://github.com/naver-ai/rope-vitr   r  r=  rs   rl   TrZ  r	  Frh  r  r  r  r  ) vit_large_patch16_rope_mixed_224r!  r"  s       r`   r  r  	  sa     J" pzpUYZdUohnUopELrb   c                 b    [        SSSSSSSSSS	SSSS
SS9n[        SSU 0[        U40 UD6D6nU$ )zCRoPE-Axial + APE ViT-S/16 from https://github.com/naver-ai/rope-vitr   r  r   r3  rs   rl   TrZ  r	  rh  r  r   r   r   r1   r   r   r2   r   r   r   r   r   r   r   r   r  )vit_small_patch16_rope_ape_224r!  r"  s       r`   r  r  	
  sa     J" nZnSWXbSmflSmnELrb   c                 b    [        SSSSSSSSSSSSSS	S
S9n[        SSU 0[        U40 UD6D6nU$ )zCRoPE-Axial + APE ViT-B/16 from https://github.com/naver-ai/rope-vitr   r   r   rs   rl   TrZ  r	  rh  r  r  r  )vit_base_patch16_rope_ape_224r!  r"  s       r`   r  r  !
  sa     J$ mJmRVWaRlekRlmELrb   c                 b    [        SSSSSSSSSSSSSS	S
S9n[        SSU 0[        U40 UD6D6nU$ )zCRoPE-Axial + APE ViT-L/16 from https://github.com/naver-ai/rope-vitr   r  r=  rs   rl   TrZ  r	  rh  r  r  r  )vit_large_patch16_rope_ape_224r!  r"  s       r`   r  r  :
  sa     J$ nZnSWXbSmflSmnELrb   c           	          [        S0 SS_SS_SS_SS_S	S
_SS_SS_SS_SS_SS_SS_SS_SS_SS_SS_SS_6n[        SSU 0[        U40 UD6D6nU$ ) zCRoPE-Mixed + APE ViT-S/16 from https://github.com/naver-ai/rope-vitr   r   r   r  r   r   r1   r3  r   rs   r   rl   r2   Tr   rZ  r   r   r	  r   r   r   r   rh  r   r  r   r  r  r   )$vit_small_patch16_rope_mixed_ape_224r!  r"  s       r`   r  r  S
  s        	
              !J& t:tY]^hYslrYstELrb   c           	          [        S0 SS_SS_SS_SS_SS	_S
S_SS_SS_SS_SS_SS_SS_SS_SS_SS_SS_6n[        SSU 0[        U40 UD6D6nU$ )zCRoPE-Mixed + APE ViT-B/16 from https://github.com/naver-ai/rope-vitr   r   r   r   r   r   r1   r   rs   r   rl   r2   Tr   rZ  r   r   r	  r   r   r   r   rh  r   r  r   r  r  r   )#vit_base_patch16_rope_mixed_ape_224r!  r"  s       r`   r  r  m
  s        	
              !J$ s*sX\]gXrkqXrsELrb   c           	          [        S0 SS_SS_SS_SS_SS	_S
S_SS_SS_SS_SS_SS_SS_SS_SS_SS_SS_6n[        SSU 0[        U40 UD6D6nU$ )zCRoPE-Mixed + APE ViT-L/16 from https://github.com/naver-ai/rope-vitr   r   r   r  r   r=  r1   r   rs   r   rl   r2   Tr   rZ  r   r   r	  r   r   r   r   rh  r   r  r   r  r  r   )$vit_large_patch16_rope_mixed_ape_224r!  r"  s       r`   r  r  
  s        	
              !J$ t:tY]^hYslrYstELrb   c                 z    [        SSSSSSSSS	SSSS
S[        [        SS9S9n[        SSU 0[        U40 UD6D6nU$ )zDINOv3 S/16 https://arxiv.org/abs/2508.10104
NOTE: Pass global_pool='token' to use CLS-token pooling (matches upstream DINOv3).
r   Tr  r   r3  FrZ  dinov3d   rs   ri  r   r  r   r   r1   r2   r   r   r   r   r   r   r   r   r9   r  )vit_small_patch16_dinov3rm  r"  s       r`   r  r  
  sg    
 9$/!J$ hzhTR\Mg`fMghELrb   c                 z    [        SSSSSSSSSSSS	S
S	[        [        SS9S9n[        SSU 0[        U40 UD6D6nU$ )zDINOv3 S/16 w/ QKV bias enabled (but zero) https://arxiv.org/abs/2508.10104
NOTE: Pass global_pool='token' to use CLS-token pooling (matches upstream DINOv3).
r   Tr  r   r3  rZ  r  r  Frs   ri  r  r  )vit_small_patch16_dinov3_qkvbrm  r"  s       r`   r  r  
  sh    
 9$/!J$ mJmRVWaRlekRlmELrb   c           
          [        S0 SS_SS_SS_SS_S	S
_SS_SS_SS_SS_SS_SS_SS_SS_SS_SS_SS_S[        [        SS9_6n[        S SU 0[        U40 UD6D6nU$ )!zDINOv3 S/16 Plus https://arxiv.org/abs/2508.10104
NOTE: Pass global_pool='token' to use CLS-token pooling (matches upstream DINOv3).
r   r   r  Tr   r  r   r   r1   r3  r2   Fr   rZ  r   r  r   r  r   r   r   r   r   r   r   rs   r   r9   ri  r  r   )vit_small_plus_patch16_dinov3rm  r"  s       r`   r  r  
  s    
    	
            !" #$ 9$/%J( mJmRVWaRlekRlmELrb   c           
          [        S0 SS_SS_SS_SS_S	S
_SS_SS_SS_SS_SS_SS_SS_SS_SS_SS_SS_S[        [        SS9_6n[        S SU 0[        U40 UD6D6nU$ )!zDINOv3 S/16 Plus w/ QKV bias enabled (but 0) https://arxiv.org/abs/2508.10104
NOTE: Pass global_pool='token' to use CLS-token pooling (matches upstream DINOv3).
r   r   r  Tr   r  r   r   r1   r3  r2   r   rZ  r   r  r   r  r   r   r   Fr   r   r   r   rs   r   r9   ri  r  r   )"vit_small_plus_patch16_dinov3_qkvbrm  r"  s       r`   r  r  
  s    
    	
            !" #$ 9$/%J( rrW[\fWqjpWqrELrb   c                 z    [        SSSSSSSSSSSSS	S[        [        SS
9S9n[        SSU 0[        U40 UD6D6nU$ )zDINOv3 B/16 https://arxiv.org/abs/2508.10104
NOTE: Pass global_pool='token' to use CLS-token pooling (matches upstream DINOv3).
r   Tr   r   FrZ  r  r  rs   ri  r  r  )vit_base_patch16_dinov3rm  r"  s       r`   r  r    sg    
 9$/!J$ gjgDQ[Lf_eLfgELrb   c                 z    [        SSSSSSSSSSSSS	S[        [        SS
9S9n[        SSU 0[        U40 UD6D6nU$ )zDINOv3 B/16 w/ QKV bias enabled (but zero) https://arxiv.org/abs/2508.10104
NOTE: Pass global_pool='token' to use CLS-token pooling (matches upstream DINOv3).
r   Tr   r   rZ  r  r  Frs   ri  r  r  )vit_base_patch16_dinov3_qkvbrm  r"  s       r`   r  r  *  sh    
 9$/!J$ l:lQUV`QkdjQklELrb   c                 z    [        SSSSSSSSSSSSS	S[        [        SS
9S9n[        SSU 0[        U40 UD6D6nU$ )zDINOv3 L/16 https://arxiv.org/abs/2508.10104
NOTE: Pass global_pool='token' to use CLS-token pooling (matches upstream DINOv3).
r   Tr  r=  FrZ  r  r  rs   ri  r   r  r   r   r1   r2   r   r   r   r   r   r   r   r   r9   r  )vit_large_patch16_dinov3rm  r"  s       r`   r  r  E  sg    
 9$/!J$ hzhTR\Mg`fMghELrb   c                 z    [        SSSSSSSSSSSSS	S[        [        SS
9S9n[        SSU 0[        U40 UD6D6nU$ )zDINOv3 w/ QKV bias enabled (but zero) https://arxiv.org/abs/2508.10104
NOTE: Pass global_pool='token' to use CLS-token pooling (matches upstream DINOv3).
r   Tr  r=  rZ  r  r  Frs   ri  r  r  )vit_large_patch16_dinov3_qkvbrm  r"  s       r`   r  r  `  sh    
 9$/!J$ mJmRVWaRlekRlmELrb   c           
          [        S0 SS_SS_SS_SS_S	S
_SS_SS_SS_SS_SS_SS_SS_SS_SS_SS_SS_S[        [        SS9_6n[        S SU 0[        U40 UD6D6nU$ )!zDINOv3 H/16 Plus https://arxiv.org/abs/2508.10104
NOTE: Pass global_pool='token' to use CLS-token pooling (matches upstream DINOv3).
r   r   r  Tr   r  r   rz  r1   r^  r2   Fr   rZ  r   r  r   r  r   r   r   r   r   r   r   rs   r   r9   ri  r  r   )vit_huge_plus_patch16_dinov3rm  r"  s       r`   r  r  {  s    
    	
            !" #$ 9$/%J* l:lQUV`QkdjQklELrb   c           
          [        S0 SS_SS_SS_SS_S	S
_SS_SS_SS_SS_SS_SS_SS_SS_SS_SS_SS_S[        [        SS9_6n[        S SU 0[        U40 UD6D6nU$ )!zDINOv3 H/16 Plus w/ QKV bias enabled (but zero) https://arxiv.org/abs/2508.10104
NOTE: Pass global_pool='token' to use CLS-token pooling (matches upstream DINOv3).
r   r   r  Tr   r  r   rz  r1   r^  r2   r   rZ  r   r  r   r  r   r   Fr   r   r   r   r   rs   r   r9   ri  r  r   )!vit_huge_plus_patch16_dinov3_qkvbrm  r"  s       r`   r  r    s    
    	
            !" #$ 9$/%J* q
qVZ[eVpioVpqELrb   c           
          [        S!0 SS_SS_SS_SS_S	S
_SS_SS_SS_SS_SS_SS_SS_SS_SS_SS_SS_SS_S[        [        SS9_6n[        S"S U 0[        U40 UD6D6nU$ )#zDINOv3 7B/16 https://arxiv.org/abs/2508.10104
NOTE: Pass global_pool='token' to use CLS-token pooling (matches upstream DINOv3).
r   r   r  Tr   i   r   r  r1   rz  r2   Fr   rr   r   rZ  r   r  r   r  r   r   r   r   r   rV  r   rs   r   r9   ri  r  r   )vit_7b_patch16_dinov3rm  r"  s       r`   r  r    s    
    	
           " #$ %& 9$/'J, eJe$zJd]cJdeELrb   )r  )r  Tr  )r  )r   rM  r  	functoolsr   typingr   r   r   r   r   r	   r
   r   rP   torch.nnrI   torch.nn.functional
functionalr{   	timm.datar   r   r   r   timm.layersr   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r    r!   r"   r#   r$   _builderr&   	_featuresr'   _manipulater(   	_registryr)   r*   __all__r  r-   r   r   r+   r   r   r  r   r  r  r  r
  r  default_cfgsr   r&  r(  r/  r4  r9  r?  rB  rD  rF  rH  rJ  rM  rQ  rT  rW  r\  r_  rb  rd  rl  rp  rr  rt  r{  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r   rb   r`   <module>r     sl  =@  	  I I I     d d     , + + # <'a299 aHAryy AHqryy qhy")) y~ CS%,,&'C99C C 
#u||
	CR '	yell*+yyyy y 	y
 
#u||
yx $ S >c T#s(^ ( S#X (S DcN . % R& )$+R& )$ 3(+DR& /"(< 3(	1DR&$ /"(< 3(	1D%R&2 5d 3(73R&< 6t 3(8=R&F 5d 3(7GR&T / 31UR&^ 0 32_R&h / 31iR&r 0 32sR&| / 31}R&J 0 3(PU2KR&T 1$ 3(PU3UR&^ 0 3(PU2_R&l ')mR&v (*wR&@ ')AR&J (*KR&T ')UR&b +D -cR&p *4 ,qR&~ +D -R&L ,T .MR&Z ,T  3.[R&j .t 0kR&x 3D 5yR&F /1GR&P 4T 4/6QR&Z 5d 4/7[R&d 5d 47eR&l 24 4/4mR&z &w !({R&H ' !)IR&V &w !(WR&d ' !)eR&r *7 !,sR&B ' !)CR&P .w !0QR&^ *7 !,_R&l 1' !3mR&| )' !+}R&J *7 !,KR&X )' !+YR&f *7 !,gR&t -g !/uR&F	 ,T"(<.G	R&P	 +D"(<-Q	R&Z	 ,T"(<.[	R&d	 24"(<4e	R&n	 1$"(<3o	R&x	 24"(<4y	R&B
 0"(<2C
R&L
 /"(<1M
R&V
 0"(<2W
R&`
 6t"(<8a
R&j
 5d"(<7k
R&t
 6t"(<8u
R&H (*IR&N -k/OR&T -k/UR&Z 2;4[R&` ')aR&f ,[.gR&l (*mR&r -k/sR&x '"(=)yR&@ ,["(=.AR&H .9. 3>3 '2' &1"(=&[R& Rj d    d    d    t #  "  3  " t #  &  3  & t #  "  3  " t #  &  3  & 4 c   D s  * T   * T   *  3    T PS  * d QT  * d QT  * 4 c  * lT l l l0 md m m m. lT l l l. md m m m. p pC p p. md m m m0 p pC p p. o o3 o o, p pC p p, o o3 o o, p pC p p, sD ss s s0 4 c  , $ S  . 4 c  ,  C  .  3  .  C  0 t #  . d   0 t #  0 T PS  2 D s  0 T PS  0  C  4 d   4 d   8 4 c  8  3  4 T   4  C  4 d   4 T   : $ S  8 d   rb   