
    
3j                        S r SSKrSSKrSSKJr  SSKJrJrJrJ	r	J
r
JrJrJr  SSKrSSKJr  SSKJs  Jr  SSKJrJr  SSKJrJrJrJrJrJrJr  SSKJ r   SS	K!J"r"  SS
K#J$r$  SSK%J&r&J'r'  SSK(J)r)J*r*  S/r+\RX                  " \-5      r.S\R^                  S\R^                  4S jr0S\R^                  S\R^                  4S jr1S\R^                  S\\2\24   S\R^                  4S jr3\$S\R^                  S\\2\24   S\\2\24   S\R^                  4S j5       r4 " S S\Rj                  5      r6 " S S\Rj                  5      r7 " S S\Rj                  5      r8 " S S\Rj                  5      r9 " S S \Rj                  5      r: " S! S\Rj                  5      r;SZS#\Rj                  S$\<S%\=4S& jjr>S' r?S[S( jr@S\S)\<S\\<\4   4S* jjrA\)" 0 S+\A" S"S,S-S.S/9_S0\A" S"S1S2S39_S4\A" S5S6S1S2S79_S8\A" S"S,S-S.S/9_S9\A" S5S:S1S2S79_S;\A" S5S<S1S2S79_S=\A" S"S>S-S?S/9_S@\A" S"S,S-S.S/9_SA\A" S"S1S2S39_SB\A" S"S1S2S39_SC\A" S"S,S-S.S/9_SD\A" S"S1S2S39_SE\A" S"S,S-S.S/9_SF\A" S"S1S2S39_SG\A" S"S,S-S.S/9_SH\A" S"S1S2S39_5      rB\*S[SI\=S\;4SJ jj5       rC\*S[SI\=S\;4SK jj5       rD\*S[SI\=S\;4SL jj5       rE\*S[SI\=S\;4SM jj5       rF\*S[SI\=S\;4SN jj5       rG\*S[SI\=S\;4SO jj5       rH\*S[SI\=S\;4SP jj5       rI\*S[SI\=S\;4SQ jj5       rJ\*S[SI\=S\;4SR jj5       rK\*S[SI\=S\;4SS jj5       rL\*S[SI\=S\;4ST jj5       rM\*S[SI\=S\;4SU jj5       rN\*S[SI\=S\;4SV jj5       rO\*S[SI\=S\;4SW jj5       rP\*S[SI\=S\;4SX jj5       rQ\*S[SI\=S\;4SY jj5       rRg)]a  Swin Transformer V2

A PyTorch impl of : `Swin Transformer V2: Scaling Up Capacity and Resolution`
    - https://arxiv.org/pdf/2111.09883

Code adapted from https://github.com/ChristophReich1996/Swin-Transformer-V2, original copyright/license info below

This implementation is experimental and subject to change in manners that will break weight compat:
* Size of the pos embed MLP are not spelled out in paper in terms of dim, fixed for all models? vary with num_heads?
  * currently dim is fixed, I feel it may make sense to scale with num_heads (dim per head)
* The specifics of the memory saving 'sequential attention' are not detailed, Christoph Reich has an impl at
  GitHub link above. It needs further investigation as throughput vs mem tradeoff doesn't appear beneficial.
* num_heads per stage is not detailed for Huge and Giant model variants
* 'Giant' is 3B params in paper but ~2.6B here despite matching paper dim + block counts
* experiments are ongoing wrt to 'main branch' norm layer use and weight init scheme

Noteworthy additions over official Swin v1:
* MLP relative position embedding is looking promising and adapts to different image/window sizes
* This impl has been designed to allow easy change of image size with matching window size changes
* Non-square image size and window size are supported

Modifications and additions for timm hacked together by / Copyright 2022, Ross Wightman
    N)partial)AnyCallableDictListOptionalTupleTypeUnionIMAGENET_DEFAULT_MEANIMAGENET_DEFAULT_STD)DropPathcalculate_drop_path_ratesMlpClassifierHead	to_2tuple_assertndgrid   )build_model_with_cfg)feature_take_indices)register_notrace_function)named_apply
checkpoint)generate_default_cfgsregister_modelSwinTransformerV2Crxreturnc                 *    U R                  SSSS5      $ )z>Permutes a tensor from the shape (B, C, H, W) to (B, H, W, C).r         r   permuter   s    \/home/wildlama/miniconda3/lib/python3.13/site-packages/timm/models/swin_transformer_v2_cr.pybchw_to_bhwcr(   4       99Q1a      c                 *    U R                  SSSS5      $ )z>Permutes a tensor from the shape (B, H, W, C) to (B, C, H, W).r   r#   r   r"   r$   r&   s    r'   bhwc_to_bchwr,   9   r)   r*   window_sizec                     U R                   u  p#pEU R                  X#US   -  US   XAS   -  US   U5      n U R                  SSSSSS5      R                  5       R                  SUS   US   U5      nU$ )zPartition into non-overlapping windows.

Args:
    x: Input tensor of shape (B, H, W, C).
    window_size: Window size (height, width).

Returns:
    Windows tensor of shape (num_windows*B, window_size[0], window_size[1], C).
r   r   r#   r"         shapeviewr%   
contiguous)r   r-   BHWCwindowss          r'   window_partitionr;   >   s     JA!	q{1~%{1~qN7JKXYN\]^Aii1aAq)446;;BAP[\]P^`abGNr*   r:   img_sizec                     Uu  p4U R                   S   nU R                  SX1S   -  XAS   -  US   US   U5      nUR                  SSSSSS5      R                  5       R                  SX4U5      nU$ )a  Merge windows back to feature map.

Args:
    windows: Windows tensor of shape (num_windows * B, window_size[0], window_size[1], C).
    window_size: Window size (height, width).
    img_size: Image size (height, width).

Returns:
    Feature map tensor of shape (B, H, W, C).
r1   r   r   r#   r"   r/   r0   r2   )r:   r-   r<   r7   r8   r9   r   s          r'   window_reverser>   N   s     DAbAR!n,aq>.A;q>S^_`SacdeA			!Q1a#..055b!BAHr*   c                   ,  ^  \ rS rSrSr      SS\S\S\\\4   S\S\S	\S
\SS4U 4S jjjr	SS jr
SS jrSS jrS\\\4   SS4S jrS\R                  4S jrSS\R                  S\\R                     S\R                  4S jjrSS jrSrU =r$ )WindowMultiHeadAttentiona   a  This class implements window-based Multi-Head-Attention with log-spaced continuous position bias.

Args:
    dim (int): Number of input features
    window_size (int): Window size
    num_heads (int): Number of attention heads
    drop_attn (float): Dropout rate of attention map
    drop_proj (float): Dropout rate after projection
    meta_hidden_dim (int): Number of hidden features in the two layer MLP meta network
    sequential_attn (bool): If true sequential self-attention is performed
Ndim	num_headsr-   	drop_attn	drop_projmeta_hidden_dimsequential_attnr    c
                   > XS.n
[         TU ]  5         X-  S:X  d   S5       eXl        [        U5      U l        X l        Xpl        [        R                  " SXS-  SS.U
D6U l	        [        R                  " U5      U l        [        R                  " SXSS.U
D6U l        [        R                  " U5      U l        [         SUU[        R                  SS	.U
D6U l        [        R"                  " [$        R&                  " U40 U
D65      U l        U R                  u  pU R+                  S
[$        R&                  " X-  U-  U-  S40 U
D6SS9  U R-                  5         g )Ndevicedtyper   z`The number of input features (in_features) are not divisible by the number of heads (num_heads).r#   Tin_featuresout_featuresbiasr"   )g      ?        )hidden_featuresrN   	act_layerdroprelative_coordinates_logF
persistent )r"   )super__init__rM   r   r-   rC   rG   nnLinearqkvDropout	attn_dropproj	proj_dropr   ReLUmeta_mlp	Parametertorchemptylogit_scaleregister_bufferreset_parameters)selfrB   rC   r-   rD   rE   rF   rG   rJ   rK   ddwin_hwin_w	__class__s                r'   rY   !WindowMultiHeadAttention.__init__n   sO    /!# 	on	o# #,5k,B'%499T7TQSTI.IIQ#dQbQ	I.
+"gg
 
 <<I(D(DE ''&KK-5q?B? 	 	
 	r*   c                     [         R                  R                  U R                  [        R
                  " S5      5        U R                  5         g)"Initialize parameters and buffers.
   N)rZ   init	constant_rf   mathlog_init_buffersri   s    r'   rh   )WindowMultiHeadAttention.reset_parameters   s0    
$**DHHRL9r*   c                 $    U R                  5         g).Compute and fill non-persistent buffer values.N)"_make_pair_wise_relative_positionsrw   s    r'   rv   &WindowMultiHeadAttention._init_buffers   s    //1r*   c                    U R                   R                  n[        R                  " [	        [        R
                  " U R                  S   U[        R                  S9[        R
                  " U R                  S   U[        R                  S95      5      R                  S5      nUSS2SS2S4   USS2SSS24   -
  nUR                  SSS5      R                  SS5      R                  5       n[        R                  " U5      [        R                  " SUR                  5       -   5      -  nU R                  R!                  UR#                  U R                   R$                  5      5        g)zMInitialize the pair-wise relative positions to compute the positional biases.r   rI   r   Nr"   r1         ?)rf   rJ   rd   stackr   aranger-   float32flattenr%   reshapefloatsignru   absrT   copy_torK   )ri   rJ   coordinatesrelative_coordinatesrT   s        r'   r{   ;WindowMultiHeadAttention._make_pair_wise_relative_positions   s"   !!((kk&LL))!,V5==QLL))!,V5==Q#
  71: 	  +1a:6QaZ9PP3;;Aq!DLLRQRSYY[#(::.B#Ceii&**,,G. $. %%++,D,G,GHXHXH^H^,_`r*   c                 f    [        U5      nXR                  :w  a  Xl        U R                  5         gg)zjUpdate window size and regenerate relative position coordinates.

Args:
    window_size: New window size.
N)r   r-   r{   )ri   r-   s     r'   set_window_size(WindowMultiHeadAttention.set_window_size   s1      ,****335 +r*   c                     U R                   S   U R                   S   -  nU R                  U R                  5      nUR                  SS5      R	                  U R
                  X5      nUR                  S5      nU$ )zCompute the relative positional encodings.

Returns:
    Relative positional encodings of shape (1, num_heads, window_size**2, window_size**2).
r   r   )r-   rb   rT   	transposer   rC   	unsqueeze)ri   window_arearelative_position_biass      r'   _relative_positional_encodings7WindowMultiHeadAttention._relative_positional_encodings   sy     &&q)D,<,<Q,??!%t/L/L!M!7!A!A!Q!G!O!ONNK"
 "8!A!A!!D%%r*   r   maskc                    UR                   u  p4nU R                  U5      R                  X4SU R                  XPR                  -  5      R	                  SSSSS5      nUR                  S5      u  pxn	[        R                  " USS9[        R                  " USS9R                  SS5      -  n
[        R                  " U R                  R                  SU R                  SS5      [        R                  " S	5      S
9R                  5       nX-  n
XR!                  5       -   n
Ubm  UR                   S   nU
R                  X<-  XR                  XD5      n
XR#                  S5      R#                  S5      -   n
U
R                  SU R                  XD5      n
U
R%                  SS9n
U R'                  U
5      n
X-  R                  SS5      R                  X4S5      nU R)                  U5      nU R+                  U5      nU$ )zForward pass of window multi-head self-attention.

Args:
    x: Input tensor of shape (B * windows, N, C).
    mask: Attention mask for the shift case.

Returns:
    Output tensor of shape (B * windows, N, C).
r#   r"   r   r   r/   r1   )rB   g      Y@)max)r3   r\   r4   rC   r%   unbindF	normalizer   rd   clamprf   r   rt   ru   expr   r   softmaxr^   r_   r`   )ri   r   r   BwLr9   r\   querykeyvalueattnrf   num_wins                r'   forward WindowMultiHeadAttention.forward   s    77qhhqkrann9LMUUVWYZ\]_`bcdJJqME Er*Q[["-E-O-OPRTV-WWkk$"2"2":":1dnnaQR"SY]YaYabkYlmqqs!99;;::a=G99R]G^^QJD..+55a88D99R6D|||#~~d#\$$Q*222"=IIaLNN1r*   c                 $    U R                  5         gz"Initialize non-persistent buffers.Nrv   rw   s    r'   init_non_persistent_buffers4WindowMultiHeadAttention.init_non_persistent_buffers       r*   )
r^   rM   rf   rb   rC   r_   r`   r\   rG   r-   )rP   rP     FNNr    NN)__name__
__module____qualname____firstlineno____doc__intr	   r   boolrY   rh   rv   r{   r   rd   Tensorr   r   r   r   __static_attributes____classcell__rm   s   @r'   r@   r@   a   s    
"  #"#&$). .  .  sCx	. 
 .  .  !.  ".  
.  . `
2a	65c? 	6t 	6& &! !Xell-C !u|| !F r*   r@   c                    *  ^  \ rS rSrSrSSSSSSSSSS\R                  SS4S	\S
\S\\\4   S\\\4   S\\\4   S\	S\	S\
S\\
   S\
S\
S\
S\	S\	S\\R                     4U 4S jjjrS'S jrS'S jrS\\\4   S\\\\4   \\\4   4   4S jr   S(S\\R&                     S\\R(                     S \\R*                     S\\R&                     4S! jjrS\\\4   S\\\4   SS4S" jrS# rS\R&                  S\R&                  4S$ jrS'S% jrS&rU =r$ ))SwinTransformerV2CrBlock   a  This class implements the Swin transformer block.

Args:
    dim (int): Number of input channels
    num_heads (int): Number of attention heads to be utilized
    feat_size (Tuple[int, int]): Input resolution
    window_size (Tuple[int, int]): Window size to be utilized
    shift_size (int): Shifting size to be used
    mlp_ratio (int): Ratio of the hidden dimension in the FFN to the input channels
    proj_drop (float): Dropout in input mapping
    drop_attn (float): Dropout rate of attention map
    drop_path (float): Dropout in main path
    extra_norm (bool): Insert extra norm on 'main' branch if True
    sequential_attn (bool): If true sequential self-attention is performed
    norm_layer (Type[nn.Module]): Type of normalization layer to be utilized
)r   r   F      @r   rP   NrB   rC   	feat_sizer-   
shift_sizealways_partitiondynamic_mask	mlp_ratioinit_valuesr`   rD   	drop_path
extra_normrG   
norm_layerc           
         > UUS.n[         TU ]  5         Xl        X0l        [	        U5      U l        X`l        Xpl        U R                  U5      u  U l	        U l
        U R                  S   U R                  S   -  U l        Xl        [        SUUU R                  UU
US.UD6U l        U" U40 UD6U l        US:  a	  [!        US9O["        R$                  " 5       U l        [)        SU[+        X-  5      U
US.UD6U l        U" U40 UD6U l        US:  a	  [!        US9O["        R$                  " 5       U l        U(       a	  U" U40 UD6O["        R$                  " 5       U l        U R5                  SS S	S
9  U R7                  5         g )NrI   r   r   )rB   rC   r-   rD   rE   rG   rP   )	drop_prob)rM   rQ   rS   rN   	attn_maskFrU   rW   )rX   rY   rB   r   r   target_shift_sizer   r   _calc_window_shiftr-   r   r   r   r@   r   norm1r   rZ   Identity
drop_path1r   r   mlpnorm2
drop_path2norm3rg   rh   )ri   rB   rC   r   r-   r   r   r   r   r   r`   rD   r   r   rG   r   rJ   rK   rj   rm   s                      r'   rY   !SwinTransformerV2CrBlock.__init__  s   ( /*32;J2G 0(,0,C,CK,P)$/++A.1A1A!1DD,7 - 
((+
 
	  *r*
;Ds?(Y7PRP[P[P]  
0	

 
  *r*
;Ds?(Y7PRP[P[P] /9Z*r*bkkm
 	[$5A 	r*   r    c                 8   U R                  5         U R                  b}  [        R                  R	                  U R
                  R                  U R                  5        [        R                  R	                  U R                  R                  U R                  5        gg)rp   N)rv   r   rZ   rr   rs   r   weightr   rw   s    r'   rh   )SwinTransformerV2CrBlock.reset_parametersH  sg    'GGdjj//1A1ABGGdjj//1A1AB (r*   c                     U R                   (       da  U R                  R                  R                  nU R                  R                  R                  nU R                  XS9nU R                  SUSS9  gg)rz   rI   r   FrU   N)r   r   r   rJ   rK   get_attn_maskrg   )ri   rJ   rK   r   s       r'   rv   &SwinTransformerV2CrBlock._init_buffersP  sb      ZZ&&--FJJ%%++E**&*FI  iE J	 !r*   target_window_sizec                    [        U5      nU R                  n[        U5      (       a  US   S-  US   S-  4nU R                  (       a  X4$ [	        U R
                  U5       VVs/ s H  u  p4X4::  a  UOUPM     nnn[	        U R
                  XR5       VVVs/ s H  u  p4ocU::  a  SOUPM     nnnn[        U5      [        U5      4$ s  snnf s  snnnf )Nr   r"   r   )r   r   anyr   zipr   tuple)ri   r   r   fwr-   sr   s           r'   r   +SwinTransformerV2CrBlock._calc_window_shiftX  s     ''9: 22 !!!3A!6!!;=OPQ=RVW=W X  %8869$..J\6]^6]daAFq)6]^8;DNNK8kl8kWQ16aq(8k
l[!5#444 _ls   %CCr   rJ   rK   c           	         [        U R                  5      (       Ga  Uc&  [        R                  " S/U R                  QSP7X#S9nOH[        R                  " SUR
                  S   UR
                  S   S4UR                  UR                  S9nSnSU R                  S   * 4U R                  S   * U R                  S   * 4U R                  S   * S44 Hn  nSU R                  S   * 4U R                  S   * U R                  S   * 4U R                  S   * S44 H$  nXTSS2US   US   2US   US   2SS24'   US-  nM&     Mp     [        X@R                  5      nUR                  SU R                  5      nUR                  S5      UR                  S5      -
  n	U	R                  U	S:g  [        S5      5      R                  U	S:H  [        S5      5      n	U	$ Sn	U	$ )	z7Method generates the attention mask used in shift case.Nr   rI   r"   r   r1   g      YrP   )r   r   rd   zerosr   r3   rJ   rK   r-   r;   r4   r   r   masked_fillr   )
ri   r   rJ   rK   img_maskcnthr   mask_windowsr   s
             r'   r   &SwinTransformerV2CrBlock.get_attn_maski  s    ty ;;'>DNN'>A'>v[ ;;1771:qwwqz1'Eahh^_^e^efC))!,,-&&q))DOOA,>+>?ooa(($/ T--a001**1--0B/BC//!,,d3A
 <?Q!QqT	1Q4!9a781HC ,H6F6FGL',,R1A1ABL$..q1L4J4J14MMI!--i1neFmLXXYbfgYginoristI  Ir*   c                    Xl         U R                  [        U5      5      u  U l        U l        U R                  S   U R                  S   -  U l        U R                  R                  U R                  5        U R                  b  U R                  R                  OSnU R                  b  U R                  R                  OSnU R                  SU R                  (       a  SOU R                  X4S9SS9  g)zMethod updates the image resolution to be processed and window size and so the pair-wise relative positions.

Args:
    feat_size (Tuple[int, int]): New input resolution
    window_size (int): New window size
r   r   Nr   rI   FrU   )r   r   r   r-   r   r   r   r   r   rJ   rK   rg   r   r   )ri   r   r-   rJ   rK   s        r'   set_input_size'SwinTransformerV2CrBlock.set_input_size  s     +4,0,C,CIkDZ,[)$/++A.1A1A!1DD		!!$"2"23*...*D&&$(,(B$$%%D4+=+=V+=+Y 	 	
r*   c           	         UR                   u  p#pEU R                  u  pg[        U R                  5      nU(       a  [        R                  " X* U* 4SS9nU R
                  S   X0R
                  S   -  -
  U R
                  S   -  n	U R
                  S   X@R
                  S   -  -
  U R
                  S   -  n
[        R                  R                  R                  USSSU
SU	45      nUR                   u  pp[        XR
                  5      nUR                  SU R
                  S   U R
                  S   -  U5      n[        U SS5      (       a  U R                  U5      nOU R                  nU R                  XS9nUR                  SU R
                  S   U R
                  S   U5      n[        UU R
                  X45      nUS S 2S U2S U2S S 24   R!                  5       nU(       a  [        R                  " XU4SS9nU$ )	N)r   r"   )shiftsdimsr   r   r1   r   F)r   )r3   r   r   rd   rollr-   rZ   
functionalpadr;   r4   getattrr   r   r   r>   r5   )ri   r   r6   r7   r8   r9   shswdo_shiftpad_hpad_w_HpWp	x_windowsr   attn_windowss                    r'   _shifted_window_attn-SwinTransformerV2CrBlock._shifted_window_attn  s   WW
a T__- 

1cB3Zf=A!!!$q+;+;A+>'>>$BRBRSTBUU!!!$q+;+;A+>'>>$BRBRSTBUUHH##A1a5'ABwwr %Q(8(89	NN2t'7'7':T=M=Ma=P'PRST	 4//**1-IIyyy; $((T-=-=a-@$BRBRSTBUWXY<)9)9B8Da!RaRlO&&(  

1"XF;Ar*   c                 T   XR                  U R                  U R                  U5      5      5      -   nUR                  u  p#pEUR	                  USU5      nXR                  U R                  U R                  U5      5      5      -   nU R                  U5      nUR	                  X#XE5      nU$ )zForward pass of Swin Transformer V2 block.

Args:
    x: Input tensor of shape [B, C, H, W].

Returns:
    Output tensor of shape [B, C, H, W].
r1   )	r   r   r  r3   r   r   r   r   r   )ri   r   r6   r7   r8   r9   s         r'   r    SwinTransformerV2CrBlock.forward  s     

4+D+DQ+G HIIWW
aIIaQ

488A; 788JJqMIIaA!r*   c                 $    U R                  5         gr   r   rw   s    r'   r   4SwinTransformerV2CrBlock.init_non_persistent_buffers  r   r*   )r   r   rB   r   r   r   r   r   r   r   r   r   r   r   r   r-   r   )NNN)r   r   r   r   r   rZ   	LayerNormr   r	   r   r   r   r
   ModulerY   rh   rv   r   rd   r   rJ   rK   r   r   r  r   r   r   r   r   s   @r'   r   r      s   . +1%*!&"+,"""$$)*,,,%? ?  ?  S#X	? 
 sCx?  c3h?  #?  ?  ?  "%?  ?  ?  ?  ?  "?   RYY!?  ? BCK5 %c3h5 
uS#Xc3h/	05& )--1+/	!%! U\\*! EKK(	!
 
%,,	!F
c3h 
eCQTHo 
Z^ 
((T %,, & r*   r   c                      ^  \ rS rSrSr\R                  SS4S\S\\R                     SS4U 4S jjjr
S\R                  S\R                  4S	 jrS
rU =r$ )PatchMergingi  ztPatch merging layer.

This class implements the patch merging as a strided convolution with a normalization before.
NrB   r   r    c                    > X4S.n[         TU ]  5         U" SU-  40 UD6U l        [        R                  " SSU-  SU-  SS.UD6U l        g)zInitialize patch merging layer.

Args:
    dim: Number of input channels.
    norm_layer: Type of normalization layer to be utilized.
rI   r/   r"   FrL   NrW   )rX   rY   normrZ   r[   	reduction)ri   rB   r   rJ   rK   rj   rm   s         r'   rY   PatchMerging.__init__  sS     /q3w-"-	_q3wQWSX_\^_r*   r   c                 \   UR                   u  p#pESSSUS-  SUS-  4n[        R                  R                  X5      nUR                   u  pspGUR	                  X#S-  SUS-  SU5      R                  SSSSSS5      R                  S5      nU R                  U5      nU R                  U5      nU$ )zForward pass of patch merging.

Args:
    x: Input tensor of shape [B, C, H, W].

Returns:
    Output tensor of shape [B, 2 * C, H // 2, W // 2].
r   r"   r   r#   r/   r0   )	r3   rZ   r   r   r   r%   r   r  r  )ri   r   r6   r7   r8   r9   
pad_valuesr   s           r'   r   PatchMerging.forward  s     WW
aAq1uaQ/
MMa,WW
aIIaaAFAq199!Q1aKSSTUVIIaLNN1r*   )r  r  )r   r   r   r   r   rZ   r  r   r
   r	  rY   rd   r   r   r   r   r   s   @r'   r  r    sj     +-,,`` RYY` 
` `$ %,,  r*   r  c                      ^  \ rS rSrSr        SS\\\\\4   4   S\\\\\4   4   S\S\S\\	\
R                        S	\S
S4U 4S jjjrS\\\4   S
S4S jrS\R                   S
\R                   4S jrSrU =r$ )
PatchEmbedi  z2D Image to Patch Embedding.Nr<   
patch_sizein_chans	embed_dimr   strict_img_sizer    c	                   > XxS.n	[         T
U ]  5         [        U5      n[        U5      nXl        X l        US   US   -  US   US   -  4U l        U R
                  S   U R
                  S   -  U l        X`l        [        R                  " X44X"S.U	D6U l
        U(       a  U" U40 U	D6U l        g[        R                  " 5       U l        g)a   Initialize patch embedding.

Args:
    img_size: Input image size.
    patch_size: Patch size.
    in_chans: Number of input channels.
    embed_dim: Embedding dimension.
    norm_layer: Normalization layer.
    strict_img_size: Enforce strict image size.
rI   r   r   )kernel_sizestrideN)rX   rY   r   r<   r  	grid_sizenum_patchesr  rZ   Conv2dr_   r   r  )ri   r<   r  r  r  r   r  rJ   rK   rj   rm   s             r'   rY   PatchEmbed.__init__  s    * /X&z*
 $"1+A6zRS}8TU>>!,t~~a/@@.IIhczc`bc	3=Jy/B/	2;;=	r*   c                     [        U5      nXR                  :w  aZ  Xl        US   U R                  S   -  US   U R                  S   -  4U l        U R                  S   U R                  S   -  U l        gg)z>Update input image size.

Args:
    img_size: New image size.
r   r   N)r   r<   r  r  r  )ri   r<   s     r'   r   PatchEmbed.set_input_size3  su     X&}}$$M&qkT__Q-??!PTP_P_`aPbAbcDN#~~a04>>!3DDD %r*   r   c                    UR                   u  p#pEU R                  (       a^  [        X@R                  S   :H  SU SU R                  S    S35        [        XPR                  S   :H  SU SU R                  S    S35        U R	                  U5      nU R                  UR                  SSSS5      5      R                  SSSS5      nU$ )	zForward pass of patch embedding.

Args:
    x: Input tensor of shape [B, C, H, W].

Returns:
    Output tensor of shape [B, C', H', W'].
r   zInput image height (z) doesn't match model (z).r   zInput image width (r"   r#   )r3   r  r   r<   r_   r  r%   )ri   r   r6   r9   r7   r8   s         r'   r   PatchEmbed.forward?  s     WW
aAq))-A!D[\`\i\ijk\l[mmo+pqAq))-@CZ[_[h[hij[kZlln+opIIaLIIaii1a+,44Q1a@r*   )r  r<   r  r  r  r_   r  )      r#   i   NTNN)r   r   r   r   r   r   r   r	   r   r
   rZ   r	  r   rY   r   rd   r   r   r   r   r   s   @r'   r  r    s    & 5868 48$( QCsCx01 Q c5c?23 Q 	 Q
  Q !bii1 Q " Q 
 Q  QD
EuS#X 
E4 
E %,,  r*   r  c            #       P  ^  \ rS rSrSrSSSSSSS\R                  SSSSS4S\S	\S
\S\S\	\\4   S\	\\4   S\S\S\
S\\
   S\
S\
S\\\
   \
4   S\\R                     S\S\S\4"U 4S jjjr SS\	\\4   S\S\\   4S jjrS\R&                  S\R&                  4S jrSrU =r$ ) SwinTransformerV2CrStageiQ  a  This class implements a stage of the Swin transformer including multiple layers.

Args:
    embed_dim (int): Number of input channels
    depth (int): Depth of the stage (number of layers)
    downscale (bool): If true input is downsampled (see Fig. 3 or V1 paper)
    feat_size (Tuple[int, int]): input feature map size (H, W)
    num_heads (int): Number of attention heads to be utilized
    window_size (int): Window size to be utilized
    mlp_ratio (int): Ratio of the hidden dimension in the FFN to the input channels
    proj_drop (float): Dropout in input mapping
    drop_attn (float): Dropout rate of attention map
    drop_path (float): Dropout in main path
    norm_layer (Type[nn.Module]): Type of normalization layer to be utilized. Default: nn.LayerNorm
    extra_norm_period (int): Insert extra norm layer on main branch every N (period) blocks
    extra_norm_stage (bool): End each stage with an extra norm layer in main branch
    sequential_attn (bool): If true sequential self-attention is performed
Fr   rP   r   Nr  depth	downscalerC   r   r-   r   r   r   r   r`   rD   r   r   extra_norm_periodextra_norm_stagerG   c                 `  >^^^ UUS.n[         TU ]  5         X0l        SU l        U(       a  US   S-  US   S-  4OUU l        U(       a  [        U4SU0UD6U l        US-  nO[        R                  " 5       U l        UUU4S jn[        R                  " [        T5       VVs/ s Hp  n[        S	UUU R                  UUU[        U Vs/ s H  nUS-  S:X  a  SOUS-  PM     sn5      U	U
UU[        U[        5      (       a  UU   OUU" U5      UUS.UD6PMr     snn6 U l        g s  snf s  snnf )
NrI   Fr   r"   r   r   c                 L   > U S-   nT(       a
  UT-  S:X  a  gT(       a  UT:H  $ S$ )Nr   r   TFrW   )indexir(  r*  r+  s     r'   _extra_norm6SwinTransformerV2CrStage.__init__.<locals>._extra_norm  s0    	A Q):%:a%?!11:<u<r*   )rB   rC   r   r-   r   r   r   r   r   r`   rD   r   r   rG   r   rW   )rX   rY   r)  grad_checkpointingr   r  
downsamplerZ   r   
Sequentialranger   r   
isinstancelistblocks)ri   r  r(  r)  rC   r   r-   r   r   r   r   r`   rD   r   r   r*  r+  rG   rJ   rK   rj   r0  r.  r   rm   s     `            ``       r'   rY   !SwinTransformerV2CrStage.__init__e  sE   , /((-T]9Q<1+<ila>O*Pcl*9RRrRDO!AI kkmDO	= mm& u'&'& &% % #..'!1) {![{!q(8!qAv"E{![\#'##.8D.I.I)E*y&u- /%  !$ &'&' 
 "\&'s   #%D*
D%&"8D*
%D*
c                     U R                   (       a  US   S-  US   S-  4OUU l        U R                   H  nUR                  U R                  US9  M     g)zUpdates the resolution to utilize and the window size and so the pair-wise relative positions.

Args:
    window_size (int): New window size
    feat_size (Tuple[int, int]): New input resolution
r   r"   r   )r   r-   N)r)  r   r8  r   )ri   r   r-   r   blocks        r'   r   'SwinTransformerV2CrStage.set_input_size  sX     DH>>)A,!+Yq\Q->?W`[[E  ..' !  !r*   r   r    c                    [        U5      nU R                  U5      nU R                   HL  nU R                  (       a0  [        R
                  R                  5       (       d  [        X!5      nMD  U" U5      nMN     [        U5      nU$ )zForward pass.
Args:
    x (torch.Tensor): Input tensor of the shape [B, C, H, W] or [B, L, C]
Returns:
    output (torch.Tensor): Output tensor of the shape [B, 2 * C, H // 2, W // 2]
)	r(   r3  r8  r2  rd   jitis_scriptingr   r,   )ri   r   r;  s      r'   r    SwinTransformerV2CrStage.forward  sg     OOOA[[E&&uyy/E/E/G/Gu(!H ! Or*   )r8  r3  r)  r   r2  r   )r   r   r   r   r   rZ   r  r   r   r	   r   r   r   r   r
   r	  rY   r   rd   r   r   r   r   r   s   @r'   r'  r'  Q  sd   6 &+!&"+.""36*,,,%&%*$))<
<
 <
 	<

 <
 S#X<
 sCx<
 #<
 <
 <
 "%<
 <
 <
 T%[%/0<
 RYY<
   #!<
" ##<
$ "%<
 <
D 04	S#X  'tn	& %,,  r*   r'  c            3         ^  \ rS rSrSrSSSSSSS	S
SSSSSSSSS\R                  SSSSSSS4S\\\4   S\S\	\   S\S\
S\
S\S\S\S\\S4   S\\S4   S\S \	\   S!\S"\S#\S$\S%\\R                     S&\S'\
S(\
S)\S*\S+\S,S42U 4S- jjjrSDS.\
S,S4S/ jjr    SES\	\\\4      S\	\\\4      S\S\	\
   S,S4
S0 jjr\R(                  R*                  SFS1 j5       r\R(                  R*                  SDS2 j5       r\R(                  R+                  5       S,\R                  4S3 j5       rSGS\S)\	\   S,S4S4 jjr     SHS5\R4                  S6\	\\\\   4      S7\
S8\
S9\S:\
S,\\\R4                     \\R4                  \\R4                     4   4   4S; jjr   SIS6\\\\   4   S<\
S=\
4S> jjrS5\R4                  S,\R4                  4S? jrSFS@\
4SA jjr S5\R4                  S,\R4                  4SB jr!SCr"U =r#$ )Jr   i  aS  Swin Transformer V2
    A PyTorch impl of : `Swin Transformer V2: Scaling Up Capacity and Resolution`  -
      https://arxiv.org/pdf/2111.09883

Args:
    img_size: Input resolution.
    window_size: Window size. If None, grid_size // window_div
    window_ratio: Window size to patch grid ratio.
    patch_size: Patch size.
    in_chans: Number of input channels.
    depths: Depth of the stage (number of layers).
    num_heads: Number of attention heads to be utilized.
    embed_dim: Patch embedding dimension.
    num_classes: Number of output classes.
    mlp_ratio:  Ratio of the hidden dimension in the FFN to the input channels.
    drop_rate: Dropout rate.
    proj_drop_rate: Projection dropout rate.
    attn_drop_rate: Dropout rate of attention map.
    drop_path_rate: Stochastic depth rate.
    norm_layer: Type of normalization layer to be utilized.
    extra_norm_period: Insert extra norm layer on main branch every N (period) blocks in stage
    extra_norm_stage: End each stage with an extra norm layer in main branch
    sequential_attn: If true sequential self-attention is performed.
)r$  r$  r/   N   FTr#     `   r"   r"      r"   r#   rF        r   rP   r   avgresetr<   r  r-   window_ratior   r  r  num_classesr  depths.rC   r   r   	drop_rateproj_drop_rateattn_drop_ratedrop_path_rater   r*  r+  rG   global_poolweight_initkwargsr    c                 &  > [         T$U ]  5         UUS.n[        U5      nXl        Xpl        X l        Xl        [        U	S[        U
5      S-
  -  -  5      =U l	        U l
        / U l        [        S!UUUU	UUS.UD6U l        U R                  R                  nUc%  [        U Vs/ s H  nUU-  PM
     sn5      U l        O[        U5      U l        [#        UU
SS9n/ nU	n Sn![%        ['        X5      5       H  u  n"u  n#nU[)        S!0 SU _SU#_S	U"S
:g  _SUS
   U!-  US   U!-  4_SU_SU R                   _SU_SU(       + _SU_SU_SU_SU_SUU"   _SU_SU=(       d    U"S-   [        U
5      :H  _SU_SU_UD6/-  nU"S
:w  a
  U S-  n U!S-  n!U =R                  [+        U SU!-  SU" 3S9/-  sl        M     [,        R.                  " U6 U l        [3        U R                  U4UUS.UD6U l        US:X  a  SOUU l        US:w  a  U R9                  SS 9  g g s  snf )"NrI   r"   r   )r<   r  r  r  r   r  T)	stagewiser  r(  r)  r   r   rC   r-   r   r   r   r   r`   rD   r   r*  r+  rG   r   r/   zstages.)num_chsr  module)	pool_typerO  skiprK  Fneeds_resetrW   )rX   rY   r   rM  r  r  r<   r   lennum_featureshead_hidden_sizefeature_infor  patch_embedr  r   r-   r   	enumerater   r'  dictrZ   r4  stagesr   headweight_init_modeinit_weights)%ri   r<   r  r-   rL  r   r  r  rM  r  rN  rC   r   r   rO  rP  rQ  rR  r   r*  r+  rG   rS  rT  rJ   rK   rU  rj   r  r   dprre  in_dimin_scale	stage_idxr(  rm   s%                                       r'   rY   SwinTransformerV2Cr.__init__  s   : 	/X& +%))147	A#f+XY/DZ8Z4[[D1% 
!!+
 
 $$..	$%KAa<&7%KLD(5D'$O-6s67M-N)I)y/   $q. %Q<83Yq\X5MN	
 $ !,, "2 "10 $ ( ) ) i. #4 "2!Si!mF5S  !0!" &%  F( A~!A$vXX_`i_jVk"l!mm1 .O2 mmV,"
 "	

 
	 ,7&+@k& %0 ![ &Ls   !Hr]  c                     U R                   S:X  a  S nU(       a  [        X 5        gg[        [        [        US9U 5        g)zInitialize model weights.

Args:
    needs_reset: If True, call reset_parameters() on modules (default for after to_empty()).
        If False, skip reset_parameters() (for __init__ where modules already self-initialized).
rK  c                 H    [        U S5      (       a  U R                  5         g g )Nrh   )hasattrrh   )rY  names     r'   _reset0SwinTransformerV2Cr.init_weights.<locals>._resetN  s!    6#566++- 7r*   r\  N)rg  r   r   init_weights_swin)ri   r]  rr  s      r'   rh   SwinTransformerV2Cr.init_weightsE  s=       G+. F)   1{KTRr*   c                 X   Ub/  U R                   R                  US9  U R                   R                  nUc!  Ub  [        W Vs/ s H  ofU-  PM	     sn5      n[	        U R
                  5       H6  u  pxS[        US-
  S5      -  n	UR                  WS   U	-  US   U	-  4UUS9  M8     gs  snf )a  Updates the image resolution, window size and so the pair-wise relative positions.

Args:
    img_size (Optional[Tuple[int, int]]): New input resolution, if None current resolution is used
    window_size (Optional[int]): New window size, if None based on new_img_size // window_div
    window_ratio (int): divisor for calculating window size from patch grid size
    always_partition: always partition / shift windows even if feat size is < window
N)r<   r"   r   r   )r   r-   r   )rb  r   r  r   rc  re  r   )
ri   r<   r-   rL  r   r  r   r.  stagestage_scales
             r'   r   "SwinTransformerV2Cr.set_input_sizeV  s     ++X+>((22I<#;I FIql!2I FGK%dkk2LEs519a00K  $Q<;6	!8ST'!1 !  3 !Gs   B'c                 0    [        SU(       a  SS9$ SS/S9$ )Nz^patch_embedz^stages\.(\d+))z^stages\.(\d+).downsample)r   )z^stages\.(\d+)\.\w+\.(\d+)N)stemr8  )rd  )ri   coarses     r'   group_matcher!SwinTransformerV2Cr.group_matchert  s/     (.$
 	
 555
 	
r*   c                 6    U R                    H	  nXl        M     g r   )re  r2  )ri   enabler   s      r'   set_grad_checkpointing*SwinTransformerV2Cr.set_grad_checkpointing~  s    A#)  r*   c                 .    U R                   R                  $ )zpMethod returns the classification head of the model.
Returns:
    head (nn.Module): Current classification head
)rf  fcrw   s    r'   get_classifier"SwinTransformerV2Cr.get_classifier  s     yy||r*   c                 F    Xl         U R                  R                  X5        g)zMethod results the classification head

Args:
    num_classes (int): Number of classes to be predicted
    global_pool (str): Unused
N)rM  rf  rK  )ri   rM  rS  s      r'   reset_classifier$SwinTransformerV2Cr.reset_classifier  s     '		1r*   r   indicesr  
stop_early
output_fmtintermediates_onlyc                    US;   d   S5       e/ n[        [        U R                  5      U5      u  pU R                  U5      n[        R
                  R                  5       (       d  U(       d  U R                  n
OU R                  SU	S-    n
[        U
5       H%  u  pU" U5      nX;   d  M  UR                  U5        M'     U(       a  U$ X4$ )a  Forward features that returns intermediates.

Args:
    x: Input image tensor
    indices: Take last n blocks if int, all if None, select matching indices if sequence
    norm: Apply norm layer to compatible intermediates
    stop_early: Stop iterating over blocks when last desired intermediate hit
    output_fmt: Shape of intermediate feature outputs
    intermediates_only: Only return intermediate features
Returns:

)NCHWzOutput shape must be NCHW.Nr   )	r   r^  re  rb  rd   r>  r?  rc  append)ri   r   r  r  r  r  r  intermediatestake_indices	max_indexre  r/  rw  s                r'   forward_intermediates)SwinTransformerV2Cr.forward_intermediates  s    * Y&D(DD&"6s4;;7G"Q Q99!!##:[[F[[)a-0F!&)HAaA $$Q' *
   r*   
prune_norm
prune_headc                     [        [        U R                  5      U5      u  pEU R                  SUS-    U l        U(       a  U R                  SS5        U$ )z?Prune layers not required for specified intermediates.
        Nr   r    )r   r^  re  r  )ri   r  r  r  r  r  s         r'   prune_intermediate_layers-SwinTransformerV2Cr.prune_intermediate_layers  sK     #7s4;;7G"Qkk.9q=1!!!R(r*   c                 J    U R                  U5      nU R                  U5      nU$ r   )rb  re  ri   r   s     r'   forward_features$SwinTransformerV2Cr.forward_features  s$    QKKNr*   
pre_logitsc                 R    U(       a  U R                  USS9$ U R                  U5      $ )NT)r  )rf  )ri   r   r  s      r'   forward_head SwinTransformerV2Cr.forward_head  s$    0:tyyty,L		!Lr*   c                 J    U R                  U5      nU R                  U5      nU$ r   )r  r  r  s     r'   r   SwinTransformerV2Cr.forward  s'    !!!$a r*   )ra  rf  r`  r<   r  rM  r_  rb  r  re  rg  r-   )T)NNrB  NFr   )NFFr  F)r   FT)$r   r   r   r   r   rZ   r  r	   r   r   r   r   r
   r	  strr   rY   rh  r   rd   r>  ignorer}  r  r  r  r   r   r   r  r  r  r  r   r   r   r   s   @r'   r   r     s   6 )3)- !%*$(#&2)7"+-"$'$'$'*,,,%&%*$)$&5`1CHo`1 `1 "#	`1
 `1 #`1 "`1 `1 `1 `1 #s(O`1 S#X`1 `1 "%`1 `1  "!`1" "#`1$ "%`1& RYY'`1(  #)`1* #+`1, "-`1. /`10 1`16 7`18 
9`1 `1DS S S& 3759 !/3uS#X/ "%S/2 	
 'tn 
< YY
 
 YY* * YY		  2C 2hsm 2W[ 2 8<$$',( ||(  eCcN34(  	( 
 (  (  !%(  
tELL!5tELL7I)I#JJ	K( X ./$#	3S	>*  	%,, 5<< 
M$ M %,,  r*   r  rY  rq  r]  c                    [        U [        R                  5      (       Ga  SU;   a  [        R                  " S[        U R                  R                  S   S-  U R                  R                  S   -   5      -  5      n[        R                  R                  U R                  U* U5        OYSU;   a*  [        R                  R                  U R                  5        O)[        R                  R                  U R                  5        U R                  b*  [        R                  R                  U R                  5        g g [        U S5      (       a  U R                  5         g U(       a#  [        U S5      (       a  U R                  5         g g g )	Nr\   g      @r   r#   r   rf  rh  rh   )r6  rZ   r[   rt   sqrtr   r   r3   rr   uniform_zeros_xavier_uniform_rO   rp  rh  rh   )rY  rq  r]  vals       r'   rt  rt    s   &"))$$D=))Bv}}':':1'='BV]]EXEXYZE['[!\\]CGGV]]SD#6t^GGNN6==)GG##FMM2;;"GGNN6;;' #		(	(	);<<! =r*   c                 "   U R                  SU 5      n U R                  SU 5      n SU ;   a  U $ 0 nU R                  5        HL  u  p4SU;   a+  [        R                  " SU-  5      nUR	                  SS5      nUR	                  SS5      nXBU'   MN     U$ )	zIconvert patch embedding weight from manual patchify + linear proj to convmodel
state_dictzhead.fc.weighttaur   rf   zhead.zhead.fc.)getitemsrd   ru   replace)r  r  out_dictkvs        r'   checkpoint_filter_fnr    s    4Jj9J:%H  "A:		!a% A		%/AIIgz* # Or*   c           	          [        S [        UR                  SS5      5       5       5      nUR                  SU5      n[	        [
        X4[        [        SUS9S.UD6nU$ )Nc              3   *   #    U  H	  u  pUv   M     g 7fr   rW   ).0r/  r   s      r'   	<genexpr>1_create_swin_transformer_v2_cr.<locals>.<genexpr>   s     \.[da.[s   rN  )r   r   r   r   out_indicesT)flatten_sequentialr  )pretrained_filter_fnfeature_cfg)r   rc  r  popr   r   r  rd  )variant
pretrainedrU  default_out_indicesr  r  s         r'   _create_swin_transformer_v2_crr    sf    \i

8\8Z.[\\**],?@K W1DkJ 	E Lr*   urlc                 4    U SSSSSS[         [        SSS	S
.UE$ )zCreate a default configuration dictionary.

Args:
    url: Model weights URL.
    **kwargs: Additional configuration parameters.

Returns:
    Configuration dictionary.
rC  r#   r$  r$  )   r  ?bicubicTzpatch_embed.projzhead.fcz
apache-2.0)r  rM  
input_size	pool_sizecrop_pctinterpolationfixed_input_sizemeanstd
first_conv
classifierlicenser   )r  rU  s     r'   _cfgr    s<     #" %#(  r*   zswinv2_cr_tiny_384.untrained)r#   r   r   r~   )rH  rH  )r  r  r  r  zswinv2_cr_tiny_224.untrainedr  r  )r  r  r  zswinv2_cr_tiny_ns_224.sw_in1kztimm/z{https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights-swinv2/swin_v2_cr_tiny_ns_224-ba8166c6.pth)	hf_hub_idr  r  r  zswinv2_cr_small_384.untrainedzswinv2_cr_small_224.sw_in1kzyhttps://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights-swinv2/swin_v2_cr_small_224-0813c165.pthzswinv2_cr_small_ns_224.sw_in1kzhttps://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights-swinv2/swin_v2_cr_small_ns_224_iv-2ce90f8e.pthz swinv2_cr_small_ns_256.untrained)r#      r  )rB  rB  zswinv2_cr_base_384.untrainedzswinv2_cr_base_224.untrainedzswinv2_cr_base_ns_224.untrainedzswinv2_cr_large_384.untrainedzswinv2_cr_large_224.untrainedzswinv2_cr_huge_384.untrainedzswinv2_cr_huge_224.untrainedzswinv2_cr_giant_384.untrainedzswinv2_cr_giant_224.untrainedr  c           	      F    [        SSSS9n[        SSU 0[        U40 UD6D6$ )z,Swin-T V2 CR @ 384x384, trained ImageNet-1k.rD  rE  rG  r  rN  rC   r  )swinv2_cr_tiny_384rd  r  r  rU  
model_argss      r'   r  r  Q  9      J
 *t:tY]^hYslrYsttr*   c           	      F    [        SSSS9n[        SSU 0[        U40 UD6D6$ )z,Swin-T V2 CR @ 224x224, trained ImageNet-1k.rD  rE  rG  r  r  )swinv2_cr_tiny_224r  r  s      r'   r  r  \  r  r*   c           	      H    [        SSSSS9n[        SSU 0[        U40 UD6D6$ )zSwin-T V2 CR @ 224x224, trained ImageNet-1k w/ extra stage norms.

** Experimental, may make default if results are improved. **
rD  rE  rG  Tr  rN  rC   r+  r  )swinv2_cr_tiny_ns_224r  r  s      r'   r  r  g  s<      	J *wjw\`ak\vou\vwwr*   c           	      F    [        SSSS9n[        SSU 0[        U40 UD6D6$ )z,Swin-S V2 CR @ 384x384, trained ImageNet-1k.rD  r"   r"      r"   rG  r  r  )swinv2_cr_small_384r  r  s      r'   r  r  v  9      J
 *uJuZ^_iZtmsZtuur*   c           	      F    [        SSSS9n[        SSU 0[        U40 UD6D6$ ),Swin-S V2 CR @ 224x224, trained ImageNet-1k.rD  r  rG  r  r  )swinv2_cr_small_224r  r  s      r'   r  r    r  r*   c           	      H    [        SSSSS9n[        SSU 0[        U40 UD6D6$ )r  rD  r  rG  Tr  r  )swinv2_cr_small_ns_224r  r  s      r'   r  r    <      	J *xzx]abl]wpv]wxxr*   c           	      H    [        SSSSS9n[        SSU 0[        U40 UD6D6$ )z,Swin-S V2 CR @ 256x256, trained ImageNet-1k.rD  r  rG  Tr  r  )swinv2_cr_small_ns_256r  r  s      r'   r  r    r  r*   c           	      F    [        SSSS9n[        SSU 0[        U40 UD6D6$ )z,Swin-B V2 CR @ 384x384, trained ImageNet-1k.   r  r/   rB  r%      r  r  )swinv2_cr_base_384r  r  s      r'   r  r    9      J
 *t:tY]^hYslrYsttr*   c           	      F    [        SSSS9n[        SSU 0[        U40 UD6D6$ ),Swin-B V2 CR @ 224x224, trained ImageNet-1k.r  r  r  r  r  )swinv2_cr_base_224r  r  s      r'   r  r    r  r*   c           	      H    [        SSSSS9n[        SSU 0[        U40 UD6D6$ )r  r  r  r  Tr  r  )swinv2_cr_base_ns_224r  r  s      r'   r  r    s<      	J *wjw\`ak\vou\vwwr*   c           	      F    [        SSSS9n[        SSU 0[        U40 UD6D6$ )z,Swin-L V2 CR @ 384x384, trained ImageNet-1k.   r  rF  rH  rI  0   r  r  )swinv2_cr_large_384r  r  s      r'   r  r    9     !J
 *uJuZ^_iZtmsZtuur*   c           	      F    [        SSSS9n[        SSU 0[        U40 UD6D6$ )z,Swin-L V2 CR @ 224x224, trained ImageNet-1k.r   r  r  r  r  )swinv2_cr_large_224r  r  s      r'   r  r    r  r*   c           	      H    [        SSSSS9n[        SSU 0[        U40 UD6D6$ )z,Swin-H V2 CR @ 384x384, trained ImageNet-1k.`  r  )      ,   X   rF  r  rN  rC   r*  r  )swinv2_cr_huge_384r  r  s      r'   r  r    s<     "	J *t:tY]^hYslrYsttr*   c           	      H    [        SSSSS9n[        SSU 0[        U40 UD6D6$ )z,Swin-H V2 CR @ 224x224, trained ImageNet-1k.r  r  )rB  r%  r  @   rF  r  r  )swinv2_cr_huge_224r  r  s      r'   r  r    s<     !	J *t:tY]^hYslrYsttr*   c           	      H    [        SSSSS9n[        SSU 0[        U40 UD6D6$ )z,Swin-G V2 CR @ 384x384, trained ImageNet-1k.   r"   r"   *   r"   r%  r  r  r  rF  r  r  )swinv2_cr_giant_384r  r  s      r'   r  r    <     #	J *uJuZ^_iZtmsZtuur*   c           	      H    [        SSSSS9n[        SSU 0[        U40 UD6D6$ )z,Swin-G V2 CR @ 224x224, trained ImageNet-1k.r  r  r  rF  r  r  )swinv2_cr_giant_224r  r  s      r'   r  r     r  r*   )r  Tr  )r  )Sr   loggingrt   	functoolsr   typingr   r   r   r   r   r	   r
   r   rd   torch.nnrZ   torch.nn.functionalr   r   	timm.datar   r   timm.layersr   r   r   r   r   r   r   _builderr   	_featuresr   _features_fxr   _manipulater   r   	_registryr   r   __all__	getLoggerr   _loggerr   r(   r,   r   r;   r>   r	  r@   r   r  r  r'  r   r  r   rt  r  r  r  default_cfgsr  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  rW   r*   r'   <module>r+     s  :    J J J     A l l l * + 3 0 < 
!


H
%!ELL !U\\ !
!ELL !U\\ !
 5c? u||   ELL uS#X RWX[]`X`Ra fkfrfr  $Qryy Qhjryy jZ*299 *Z? ?Duryy upO")) Od"bii "s "d "&"
c T#s(^ 6 % '&"D=3(%L'& #D=3%8'&
 $T J 3&0'& $T=3(&L'& "4 H 3$0'& %d N 3'0'&& '=3&)J''&* #D=3(%L+'&. #D=3%8/'&2 &t=3(83'&6 $T=3(&L7'&: $T=3&8;'&> #D=3(%L?'&B #D=3%8C'&F $T=3(&LG'&J $T=3&8K'& 'T u4 u>Q u u u4 u>Q u u xd xAT x x vD v?R v v vD v?R v v yt yBU y y yt yBU y y u4 u>Q u u u4 u>Q u u xd xAT x x vD v?R v v vD v?R v v u4 u>Q u u u4 u>Q u u vD v?R v v vD v?R v vr*   