
    3juf                    d   S SK Jr  S SKrS SKJrJrJr  S SKrS SKJ	s  J
r  S SKJ	r	  S SKJr  S SKJr  S SKJrJrJrJr  S!S jr " S	 S
\	R,                  5      r " S S\	R,                  5      r " S S\	R2                  5      r " S S\	R2                  5      r " S S\	R2                  5      r " S S\	R,                  5      r " S S\	R2                  5      r " S S\	R2                  5      r " S S\	R2                  5      r  " S S\	R2                  5      r!S"S jr"S#S$S jjr#S#S$S jjr$S#S$S  jjr%g)%    )annotationsN)AnyOptionalSequence)nn)
checkpoint)KORNIA_CHECK)DropPathLayerNorm2dwindow_partitionwindow_unpartitionc                6    [        U [        5      (       a  X 4$ U $ N)
isinstanceint)xs    P/home/wildlama/miniconda3/lib/python3.13/site-packages/kornia/models/tiny_vit.py
_make_pairr   #   s    3''A6.Q.    c                  n   ^  \ rS rSrSrSSS\R                  4               SU 4S jjjrSrU =r	$ )ConvBN'   a  Implement a sequential block containing a convolution followed by Batch Normalization.

Args:
    in_channels: The number of input channels.
    out_channels: The number of output channels.
    kernel_size: The size of the convolving kernel. Default: 1.
    stride: The stride of the convolution. Default: 1.
    padding: The zero-padding added to both sides of the input. Default: 0.
    groups: The number of blocked connections from input to output. Default: 1.
   r   c           
        > [         TU ]  5         [        R                  " XX4XVSS9U l        [        R
                  " U5      U l        U" 5       U l        g )NF)groupsbias)super__init__r   Conv2dcBatchNorm2dbnact)	selfin_channelsout_channelskernel_sizestridepaddingr   
activation	__class__s	           r   r   ConvBN.__init__3   s@     	;k7hmn...<r   )r#   r"   r    )r%   r   r&   r   r'   r   r(   r   r)   r   r   r   r*   type[nn.Module]returnNone)
__name__
__module____qualname____firstlineno____doc__r   Identityr   __static_attributes____classcell__r+   s   @r   r   r   '   sr    	  &(kk     	 
       $  
   r   r   c                  J   ^  \ rS rSrSr\R                  4SU 4S jjjrSrU =r	$ )
PatchEmbedC   aX  Perform patch embedding using a series of convolutions.

This module divides the input image into patches and projects them into
the embedding dimension.

Args:
    in_channels: The number of input image channels.
    embed_dim: The dimension of the resulting embeddings.
    activation: The activation layer to use. Default: :class:`nn.GELU`.
c                   > [         TU ]  5         [        R                  " [	        XS-  SSS5      U" 5       [	        US-  USSS5      5      U l        g )N      r   )r   r   r   
Sequentialr   seq)r$   r%   	embed_dimr*   r+   s       r   r   PatchEmbed.__init__O   sL    ==;Q1a8*,y\]~_hjkmnpqHr
r   )r@   )r%   r   rA   r   r*   r-   r.   r/   
r0   r1   r2   r3   r4   r   GELUr   r6   r7   r8   s   @r   r:   r:   C   s    	 XZW^W^ 
 
r   r:   c                  l   ^  \ rS rSrSr\R                  S4           SU 4S jjjrSS jrSr	U =r
$ )	MBConvV   az  Implement the Mobile Inverted Residual Bottleneck Convolution layer.

Args:
    in_channels: The number of input channels.
    out_channels: The number of output channels.
    expansion: The expansion ratio for the hidden dimension.
    kernel_size: The convolution kernel size.
    stride: The stride of the convolution.
    dropout: The dropout rate for the stochastic depth.
        c           	        > [         TU ]  5         [        X-  5      n[        XSUS9U l        [        XfSSSXd5      U l        [        XbS5      U l        [        U5      U l        U" 5       U l	        g )Nr   r*   r>   )
r   r   r   r   conv1conv2conv3r
   	drop_pathr#   )r$   r%   r&   expansion_ratior*   rN   hidden_channelsr+   s          r   r   MBConv.__init__b   sd     	k;<K!
S
OaAc
O1=
!),<r   c                    U R                  XR                  U R                  U R                  U R	                  U5      5      5      5      -   5      $ r   )r#   rN   rM   rL   rK   r$   r   s     r   forwardMBConv.forwardr   s7    xxNN4::djjA6O+PQQRRr   )r#   rK   rL   rM   rN   )r%   r   r&   r   rO   floatr*   r-   rN   rV   r.   r/   r   torch.Tensorr.   rX   r0   r1   r2   r3   r4   r   rD   r   rT   r6   r7   r8   s   @r   rF   rF   V   sd    	  ')gg     	 
 $    
    S Sr   rF   c                  j   ^  \ rS rSrSr\R                  4           SU 4S jjjrSS jrSr	U =r
$ )PatchMergingv   a  Implement the patch merging layer for downsampling in TinyViT.

Args:
    input_resolution: The height and width of the input feature map.
    dim: The number of input channels.
    out_dim: The number of output channels.
    act: The activation function class. Default: nn.GELU.
c           
        > [        US;   S5        [        TU ]	  5         [        U5      U l        [        X#SUS9U l        [        X3SUSX5S9U l        [        X3S5      U l        g )N)r   r=   zstride must be either 1 or 2r   rJ   r>   )r   r*   )	r	   r   r   r   input_resolutionr   rK   rL   rM   )r$   r^   dimout_dimr(   r*   r+   s         r   r   PatchMerging.__init__   s`     	Vv%'EF *+; <C!
C
Ga7b
Ga0
r   c                   UR                   S:X  a,  UR                  SS5      R                  SU R                  5      nU R	                  U R                  U R                  U5      5      5      nUR                  S5      R                  SS5      nU$ )Nr>   r   r=   )ndim	transpose	unflattenr^   rM   rL   rK   flattenrS   s     r   rT   PatchMerging.forward   sn    66Q;Aq!++At/D/DEAJJtzz$**Q-01IIaL""1a(r   )rK   rL   rM   r^   )r^   int | tuple[int, int]r_   r   r`   r   r(   r   r*   r-   r.   r/   rW   rY   r8   s   @r   r[   r[   v   s_     ')gg1/1 1 	1
 1 $1 
1 1 r   r[   c                  z   ^  \ rS rSrSr\R                  SSSS4               S
U 4S jjjrSS jrS	r	U =r
$ )	ConvLayer   a1  Implement a convolutional layer with optional checkpointing and downsample.

Args:
    dim: The number of input channels.
    depth: The number of blocks in the convolutional layer.
    activation: The activation function to use. Default: :class:`nn.GELU`.
    drop_path: The dropout rate for the stochastic depth. Default: 0.0.
    downsample: The downsample module to use. Default: None.
    use_checkpoint: Whether to use checkpointing for memory efficiency. Default: False.
    conv_expand_ratio: The expansion ratio for the hidden dimension. Default: 4.0.
rH   NF      @c                   > [         T	U ]  5         X`l        [        U[        5      (       d  U/U-  n[
        R                  " [        U5       Vs/ s H  n[        XXsXH   5      PM     sn5      U l	        XPl
        g s  snf r   )r   r   use_checkpointr   listr   
ModuleListrangerF   blocks
downsample)
r$   r_   depthr*   rN   rs   rn   conv_expand_ratioir+   s
            r   r   ConvLayer.__init__   st     	, )T**"e+ImmTYZ_T`aT`qVC/Y\JT`a

 %	 bs   A:c                    U R                    H2  nU R                  (       a  [        R                  " X!5      OU" U5      nM4     U R                  b  U R                  U5      nU$ r   rr   rn   r   rs   r$   r   blks      r   rT   ConvLayer.forward   K    ;;C151D1D
%%c-#a&A ??&"Ar   rr   rs   rn   )r_   r   rt   r   r*   r-   rN   float | list[float]rs   Optional[nn.Module]rn   boolru   rV   r.   r/   rW   rY   r8   s   @r   rj   rj      s    
  ')gg),*.$#&%% % $	%
 '% (% % !% 
% %. r   rj   c                  b   ^  \ rS rSrSr\R                  S4           SU 4S jjjrSrU =r	$ )MLP   aP  Implement a multi-layer perceptron (MLP) with optional dropout.

Args:
    in_features: The number of input features.
    hidden_features: The number of hidden features.
    out_features: The number of output features.
    activation: The activation function to use. Default: :class:`nn.GELU`.
    drop: The dropout rate. Default: 0.0.
rH   c                H  > [         TU ]  5         [        R                  " U5      U l        [        R
                  " X5      U l        U" 5       U l        [        R                  " U5      U l	        [        R
                  " X#5      U l
        [        R                  " U5      U l        g r   )r   r   r   	LayerNormnormLinearfc1act1Dropoutdrop1fc2drop2)r$   in_featureshidden_featuresout_featuresr*   dropr+   s         r   r   MLP.__init__   si     	LL-	99[:L	ZZ%
99_;ZZ%
r   )r   r   r   r   r   r   )r   r   r   r   r   r   r*   r-   r   rV   r.   r/   rC   r8   s   @r   r   r      sX     ')gg&& & 	&
 $& & 
& &r   r   c                     ^  \ rS rSrSr   S           S	U 4S jjjr\S
S j5       r\R                  " 5       SSU 4S jjj5       r
SS jrSrU =r$ )	Attention   aj  Implement an attention mechanism with optional relative position encoding.

Args:
    dim: The number of input channels.
    key_dim: The dimension of the key.
    num_heads: The number of attention heads. Default: 8.
    attn_ratio: The ratio of the attention dimension. Default: 4.0.
    resolution: The resolution of the input feature map. Default: (14, 14).
c                b  > [         T	U ]  5         X0l        US-  U l        X l        X#-  U l        [        XB-  5      U l        [        XB-  5      U-  U l        X@l	        U R                  U R
                  S-  -   n[        R                  " U5      U l        [        R                  " X5      U l        [        R                  " U R                  U5      U l        U R!                  U5      u  px[        R"                  " [$        R&                  " X85      5      U l        U R+                  SUSS9  U   S U l        g )Ng      r=   attention_bias_idxsF)
persistent)r   r   	num_headsscalekey_dimnh_kdr   ddh
attn_ratior   r   r   r   qkvprojbuild_attention_bias	Parametertorchzerosattention_biasesregister_bufferab)
r$   r_   r   r   r   
resolutionhindicesattn_offset_sizer+   s
            r   r   Attention.__init__   s     	"d]
(
Z)*j*+i7$GGdjj1n$LL%	99S$IIdggs+	$($=$=j$I! "U[[-U V2GN*.r   c                   U u  p[         R                  " U5      n[         R                  " U5      nUR                  U5      nUR                  U5      nUS S 2S 4   US S S 24   -
  R	                  5       nUS S 2S 4   US S S 24   -
  R	                  5       nXr-  U-   n	[         R
                  " U	SS9u  pUR                  X-  X-  5      nU
R                  5       nX4$ )NT)return_inverse)r   arangerepeat_interleaverepeatabsuniqueviewnumel)r   HWrowscolsrrccdrdckeysunique_keysinverser   r   s                 r   r   Attention.build_attention_bias	  s    ||A||A##A&[[^DkBtQwK',,.DkBtQwK',,.v{$||DF,,quae,&,,.((r   c                   > [         TU ]  U5        U(       a  U R                  b	  S U l        U $ U R                  S S 2U R                  4   U l        U $ r   )r   trainr   r   r   )r$   moder+   s     r   r   Attention.train  sM    dDGG$7$ ?C>S>STUW[WoWoTo>pr   c                   UR                   u  p#nU R                  U5      nU R                  U5      nUR                  X#U R                  S5      R                  SSSS5      nUR                  U R                  U R                  U R                  /SS9u  pgnU R                  (       a  U R                  S S 2U R                  4   OU R                  n	XgR                  SS5      -  U R                  -  U	-   n
U
R                  SS9n
X-  R                  SS5      R!                  X#U R"                  5      nU R%                  U5      nU$ )Nr   r=   r   r>   )r_   )shaper   r   r   r   permutesplitr   r   trainingr   r   r   rd   r   softmaxreshaper   r   )r$   r   BN_r   qkvr   attns              r   rT   Attention.forward  s   ''aIIaLhhqkhhqT^^R088Aq!D))T\\4<<@a)HaEI]]t$$Q(@(@%@AX\X_X_KKB''4::5<|||#X  A&..qTWW=IIaLr   )r   r   r   r   r   r   r   r   r   r   r   r   )   rl   )   r   )r_   r   r   r   r   r   r   rV   r   tuple[int, int]r.   r/   )r   r   r.   ztuple[torch.Tensor, int])T)r   r   r.   r   rW   )r0   r1   r2   r3   r4   r   staticmethodr   r   no_gradr   rT   r6   r7   r8   s   @r   r   r      s     &.// / 	/
 / $/ 
/ /8 ) ) ]]_  
 r   r   c                     ^  \ rS rSrSrSSSSS\R                  4                   S
U 4S jjjrSS jrS	r	U =r
$ )TinyViTBlocki/  ao  Implement a single block of the TinyViT architecture.

This block consists of multi-head self-attention and a feed-forward network
with residual connections.

Args:
    dim: The input dimension size.
    input_resolution: The height and width of the input feature map.
    num_heads: The number of attention heads.
    window_size: The size of the local attention window.
    mlp_ratio: The ratio of MLP hidden dimension to embedding dimension.
    drop: The dropout rate. Default: 0.0.
    drop_path: The stochastic depth rate. Default: 0.0.
    layer_scale_init_value: The initial value for layer scaling. Default: 1e-5.
   rl   rH   r>   c
                J  > [        X-  S:H  S5        [        TU ]	  5         [        U5      U l        X@l        X-  n
[        XUSXD45      U l        [        U5      U l	        [        XUSUS-  U5      U l        [        U[        X-  5      XU5      U l        [        U5      U l        g )Nr   z!dim must be divislbe by num_headsg      ?r   r=   )r	   r   r   r   r^   window_sizer   r   r
   
drop_path1r   
local_convr   r   mlp
drop_path2)r$   r_   r^   r   r   	mlp_ratior   rN   local_conv_sizer*   head_dimr+   s              r   r   TinyViTBlock.__init__@  s     	S_)+NO *+; <&#cYk=WX	"9- ?ARS?SUXYsC0#4H"9-r   c                @   U R                   u  p#UR                  u  pEnUnUR                  XBX65      n[        XR                  5      u  pU R                  UR                  SS5      5      n[        XR                  XU45      nUR                  XEU5      nXpR                  U5      -   nUR                  SS5      R                  XFX#5      nU R                  U5      nUR                  XFU5      R                  SS5      nXR                  U R                  U5      5      -   nU$ )Nr   r=   )r^   r   r   r   r   r   rf   r   r   rd   r   r   r   r   )	r$   r   r   r   r   LCres_xpad_hws	            r   rT   TinyViTBlock.forwardX  s    $$''aFF1$Q(8(89	IIaii1o&q"2"2FFCFF1OOOA&&KK1%%aA1OOAFF1O%%a+,,r   )r   r   r   r^   r   r   r   )r_   r   r^   rh   r   r   r   r   r   rV   r   rV   rN   rV   r   r   r*   r-   r.   r/   rW   rY   r8   s   @r   r   r   /  s    *  &(gg.. 0. 	.
 . . . . . $. 
. .0 r   r   c                     ^  \ rS rSrSrSSSSSS\R                  4                         SU 4S jjjrSS	 jrS
r	U =r
$ )
BasicLayerim  ao  Implement a basic layer of the TinyViT architecture.

This layer consists of a series of TinyViT blocks, each followed by a patch
merging operation.

Args:
    dim: The input dimension size.
    input_resolution: The height and width of the input feature map.
    depth: The number of blocks in the layer.
    num_heads: The number of attention heads.
    window_size: The size of the local attention window.
    mlp_ratio: The ratio of MLP hidden dimension to embedding dimension.
    drop: The dropout rate. Default: 0.0.
    drop_path: The stochastic depth rate. Default: 0.0.
    downsample: The downsample module to use. Default: None.
    use_checkpoint: Whether to use checkpointing for memory efficiency. Default: False.
    local_conv_size: The size of the local convolution kernel. Default: 3.
    activation: The activation function to use. Default: :class:`nn.GELU`.
rl   rH   NFr>   c                  > [         TU ]  5         Xl        [        R                  " [        U5       Vs/ s H/  n[        UUUUUU[        U[        5      (       a  X   OUUU5	      PM1     sn5      U l	        Xl
        g s  snf r   )r   r   rn   r   rp   rq   r   r   ro   rr   rs   )r$   r_   r^   rt   r   r   r   r   rN   rs   rn   r   r*   rv   r+   s                 r   r   BasicLayer.__init__  s     	,mm u &A $$.y$$?$?ILY#
 &
$ %#s   6A<c                    U R                    H2  nU R                  (       a  [        R                  " X!5      OU" U5      nM4     U R                  b  U R                  U5      nU$ r   ry   rz   s      r   rT   BasicLayer.forward  r}   r   r~   )r_   r   r^   rh   rt   r   r   r   r   r   r   rV   r   rV   rN   r   rs   r   rn   r   r   r   r*   r-   r.   r/   rW   rY   r8   s   @r   r   r   m  s    6 ),*.$ &(gg$%$% 0$% 	$%
 $% $% $% $% '$% ($% $% $% $$% 
$% $%L r   r   c                     ^  \ rS rSrSrSSSSSSS	S
SSSS
S\R                  S4                               SU 4S jjjrSS jr\	SSS jj5       r
SrU =r$ )TinyViTi  a  TinyViT model, as described in https://arxiv.org/abs/2207.10666.

Args:
    img_size: Size of input image.
    in_chans: Number of input image's channels.
    num_classes: Number of output classes.
    embed_dims: List of embedding dimensions.
    depths: List of block count for each downsampling stage
    num_heads: List of attention heads used in self-attention for each downsampling stage.
    window_sizes: List of self-attention's window size for each downsampling stage.
    mlp_ratio: Ratio of MLP dimension to embedding dimension in self-attention.
    drop_rate: Dropout rate.
    drop_path_rate: Stochastic depth rate.
    use_checkpoint: Whether to use activation checkpointing to trade compute for memory.
    mbconv_expand_ratio: Expansion ratio used in MBConv block.
    local_conv_size: Kernel size of convolution used in TinyViTBlock
    activation: activation function.
    mobile_same: Whether to use modifications for MobileSAM.

   r>   i  )`        i   r=   r=      r=   )r>   r         r   r   r   r   rl   rH   Fc                8  > [         T U ]  5         Xl        Xl        U   U(       ad  / SQn[        R
                  " [        R                  " US   SSSS9[        S5      [        R                  " SSSSSSS9[        S5      5      U l        O/ SQnS U l        [        X$S	   U5      U l
        US
-  n[        R                  " S	U
[        U5      5       Vs/ s H  nUR                  5       PM     nn[        U5      n/ n[!        [#        XEXgU5      5       H  u  nu  nnnnnU[%        US-   [        U5      S-
  5         nUUS-
  :  a  ['        UUUUU5      OS nUUU[        US U 5      [        US US-    5       UUUS.nUS	:X  a  [)        SSU0UD6nO[+        SUUUUU	US.UD6nUR-                  U5        UU-  nM     [        R
                  " U6 U l        UU l        [        R2                  " US   5      U l        [        R6                  " US   U5      U l        g s  snf )N)r=   r=   r   r   r      r   F)r   r>   )r=   r=   r=   r   r      )r_   rt   rN   rs   rn   r*   ru   )r^   r   r   r   r   r    )r   r   img_size
mobile_samr   r?   r   r   neckr:   patch_embedr   linspacesumitemlen	enumeratezipminr[   rj   r   appendlayers	feat_sizer   	norm_headr   head)!r$   r  in_chansnum_classes
embed_dimsdepthsr   window_sizesr   	drop_ratedrop_path_ratern   mbconv_expand_ratior   r*   r  stridesr^   r   dprn_layersr  i_layerrA   rt   num_heads_ir   r(   r`   rs   kwargslayerr+   s!                                   r   r   TinyViT.__init__  s?   & 	 $ #G		*R.#qu=C 		#sAq!%8C 	DI #GDI%h1zJ#q= "'>3v;!OP!OAqvvx!OP v;MV
IWEN
IGIi[& !Wq[#j/A2E!FGG hl* -y'6:V  ! VHW%5!6VMgPQk=R9ST("0(&F !|!R4GR6R" %5) +'"$3  MM% '?N
@ mmV,) jn5IIjnk:	[ Qs   Hc                h   U R                  U5      nU R                  U5      nU R                  (       aM  UR                  SU R                  U R                  45      R                  SSSS5      nU R                  U5      nU$ UR                  S5      nU R                  U R                  U5      5      nU$ )zUClassify images if ``mobile_sam=False``, produce feature maps if ``mobile_sam=True``.r   r   r>   r=   )
r	  r  r  re   r  r   r  meanr  r  rS   s     r   rT   TinyViT.forward  s    QKKN??A?@HHAqRSTA		!A
  q	A		$..+,Ar   c                X    [        U S;   S5        [        [        [        S.U    " U40 UD6$ )a  Create a TinyViT model from pre-defined variants.

Args:
    variant: TinyViT variant. Possible values: ``'5m'``, ``'11m'``, ``'21m'``.
    pretrained: whether to use pre-trained weights. Possible values: ``False``, ``True``, ``'in22k'``,
        ``'in1k'``. For TinyViT-21M (``variant='21m'``), ``'in1k_384'``, ``'in1k_512'`` are also available.
    **kwargs: other keyword arguments that will be passed to :class:`TinyViT`.

.. note::
    When ``img_size`` is different from the pre-trained size, bicubic interpolation will be performed on
    attention biases. When using ``pretrained=True``, ImageNet-1k checkpoint (``'in1k'``) is used.
    For feature extraction or fine-tuning, ImageNet-22k checkpoint (``'in22k'``) is preferred.

)5m11m21mz+Only variant 5m, 11m, and 21m are supported)r	   _tiny_vit_5m_tiny_vit_11m_tiny_vit_21m)variant
pretrainedr#  s      r   from_configTinyViT.from_config.  s4      	W 446cd"=OPWXYcngmnnr   )r  r  r  r  r  r  r  r	  ) r  r   r  r   r  r   r  Sequence[int]r  r4  r   r4  r  r4  r   rV   r  rV   r  rV   rn   r   r  rV   r   r   r*   r-   r  r   r.   r/   rW   F)r0  strr1  
bool | strr#  r   r.   r   )r0   r1   r2   r3   r4   r   rD   r   rT   r   r2  r6   r7   r8   s   @r   r   r     s   . $7 ,#1&3 #$%( &(gg #W;W; W; 	W;
 "W; W; !W; $W; W; W; W; W; #W; W;  $!W;" #W;$ 
%W; W;r o or   r   c                j   U R                  5       n[        R                  R                  U5      nSU;   a  US   nUR	                  5        Vs/ s H  nSU;   d  M  UPM     nnU H  nX4   R
                  u  pgX$   R
                  u  p[        Xh:H  SU SU 35        Xy:w  d  M>  [        US-  5      n
[        U	S-  5      nX4   R                  SXjU
5      n[        R                  " XU4SS9nUR                  X5      X4'   M     US	   R
                  S
   U R                  R                  :w  aq  Sn[        R                  " USS9  [        R                  " U R                  R                   5      US	'   [        R                  " U R                  R"                  5      US'   U R%                  U5        U $ s  snf )Nmodelr   zFail to load z/. Pre-trained checkpoint should have num_heads=g      ?r   bicubic)sizer   zhead.weightr   zaNumber of classes does not match pre-trained checkpoint's. Resetting classification head to zeros)
stacklevelz	head.bias)
state_dictr   hubload_state_dict_from_urlr   r   r	   r   r   Finterpolater  r   warningswarn
zeros_likeweightr   load_state_dict)r9  urlmodel_state_dictr=  r   ab_keysn_heads1L1n_heads2L2S1S2r   msgs                 r   _load_pretrainedrQ  B  s   '')33C8J *(
 %//+G+Q/AQ/Fq+GG!}**'*00X)]1#=lmulv+wx8RWBRWB)}11!X2F }}-=HS\],11(?JM  - &&q)UZZ-D-DDqca($)$4$4UZZ5F5F$G
=!"'"2"25::??"C
;	*%L) Hs   
F0F0c           	     z    [        S/ SQ/ SQ/ SQ/ SQSS.UD6nU (       a  U SL a  Sn S	S
S.U    n[        X#5      nU$ )N)@         i@  r   )r=   r     
   r  rH   r  r  r   r  r  Tin1kzchttps://github.com/wkcn/TinyViT-model-zoo/releases/download/checkpoints/tiny_vit_5m_22k_distill.pthzghttps://github.com/wkcn/TinyViT-model-zoo/releases/download/checkpoints/tiny_vit_5m_22kto1k_distill.pthin22krY  r  r   rQ  r1  r#  r9  rG  s       r   r-  r-  c  sg     &" E J v}	

  !,Lr   c           	     z    [        S/ SQ/ SQ/ SQ/ SQSS.UD6nU (       a  U SL a  Sn S	S
S.U    n[        X#5      nU$ )N)rS  rT  r  i  r   )r=   r  r   r   r  g?rX  TrY  zdhttps://github.com/wkcn/TinyViT-model-zoo/releases/download/checkpoints/tiny_vit_11m_22k_distill.pthzhhttps://github.com/wkcn/TinyViT-model-zoo/releases/download/checkpoints/tiny_vit_11m_22kto1k_distill.pthrZ  r  r\  r]  s       r   r.  r.  |  sg     &" E J w~	

  !,Lr   c           	         [        S/ SQ/ SQ/ SQ/ SQSS.UD6nU (       a>  U SL a$  Sn UR                  S	S
5      nUS:  a  Sn US:  a  Sn SSSSS.U    n[        X$5      nU$ )N)r   r   r   i@  r   )r>   r   r      r  g?rX  TrY  r  r   r   in1k_384i   in1k_512zdhttps://github.com/wkcn/TinyViT-model-zoo/releases/download/checkpoints/tiny_vit_21m_22k_distill.pthzhhttps://github.com/wkcn/TinyViT-model-zoo/releases/download/checkpoints/tiny_vit_21m_22kto1k_distill.pthzlhttps://github.com/wkcn/TinyViT-model-zoo/releases/download/checkpoints/tiny_vit_21m_22kto1k_384_distill.pthzlhttps://github.com/wkcn/TinyViT-model-zoo/releases/download/checkpoints/tiny_vit_21m_22kto1k_512_distill.pth)r[  rY  ra  rb  r  )r   getrQ  )r1  r#  r9  r  rG  s        r   r/  r/    s     & " E Jzz*c2H3'
3'
 w~ G G
  !,Lr   )r   rh   r.   r   )r9  r   rG  r6  r.   r   r5  )r1  r7  r#  r   r.   r   )&
__future__r   rB  typingr   r   r   r   torch.nn.functionalr   
functionalr@  torch.utilsr   kornia.core.checkr	   kornia.models.commonr
   r   r   r   r   r?   r   r:   ModulerF   r[   rj   r   r   r   r   r   rQ  r-  r.  r/  r  r   r   <module>rl     s   * #  * *     " * \ \/ R]]  8
 
&SRYY S@299 B)		 )X&"-- &>J		 JZ;299 ;|@ @FOobii OodB22r   