
    
3j]-                    <   S r SSKrSSKrSSKrSSKrSSKJr  SSKJr  SSK	J
r
JrJrJrJrJrJrJrJr   SSK	Jr  SSKrSSKJr  SSKJs  Jr  SSKJr  SSKJrJ r J!r!J"r"J#r#J$r$  SS	K%J&r&J'r'J(r(J)r)J*r*J+r+J,r,J-r-J.r.J/r/J0r0J1r1J2r2J3r3J4r4J5r5J6r6J7r7J8r8J9r9J:r:J;r;J<r<J=r=  S
SK>J?r?  S
SK@JArA  S
SKBJCrCJDrDJErEJFrF  S
SKGJHrHJIrIJJrJ  S/rK\R                  " \M5      rN\&\&\'S.rO        GSS\<S\PS\PS\QS\QS\QS\QS\RS\RS\\\R                        S\PS\R                  4S jjrT " S S \R                  5      rU " S! S"\R                  5      rV " S# S$\R                  5      rW " S% S&\R                  5      rX " S' S(\R                  5      rY   GSS)\R                  S*\[S+\PS,\Q4S- jjr\ " S. S\R                  5      r]GSS0\R                  S1\[S2\QSS4S3 jjr^   GSS0\R                  S1\[S4\RS2\QSS4
S5 jjr_GSS0\R                  S1\[S2\QSS4S6 jjr`GSS0\R                  S1\[S2\QSS4S7 jjraGSS8\[S4\RS2\QS\4S9 jjrb    GSS;\R                  S<\R                  S+\PS=\\P\P4   S>\[S?\QS\R                  4S@ jjrc\R                  " 5       GSSA\]SB\[SC\[SD\QSS4
SE jj5       re GSSF\\[\R                  4   SA\]SC\[S\\[\R                  4   4SG jjrfSF\\[\R                  4   SA\]S\\[\R                  4   4SH jrgSF\\[\R                  4   SA\]S\\[\R                  4   4SI jrhSF\i4SJ jrj   GSSF\\[\R                  4   SA\]SK\QS>\[S?\QS\\[\R                  4   4SL jjrkGSSM\[S\\[\
4   4SN jjrl0 SO\l" SPSQ9_SR\l" SPSQ9_SS\l" STSPSSU9_SV\l" SWSPSSXSYSZ9_S[\l" S\SPSSU9_S]\l" S^SPSSXSYSZ9_S_\l" S`SPSSU9_Sa\l" SbSPSSXSYSZ9_Sc\l" SdSPSSU9_Se\l" SfSPSSXSYSZ9_Sg\l" ShSPSSU9_Si\l" SjSPSSXSYSZ9_Sk\l" SlSPSSU9_Sm\l" SnSPSSU9_So\l" SpSPSSXSYSZ9_Sq\l" SrSPSs9_St\l" SuSPSXSYSv9_0 Sw\l" SxSPSXSYSv9_Sy\l" SzSPSSU9_S{\l" S|SPSSXSYSZ9_S}\l" S~SPSSU9_S\l" SSPSSXSYSZ9_S\l" SSPSSU9_S\l" SSPSSXSYSZ9_S\l" S/S9_S\l" S/S9_S\l" S/S9_S\l" S/S9_S\l" SPSS9_S\l" SPSS9_S\l" SPSS9_S\l" SPSS9_S\l" SPSS9_S\l" SSPSSS9_E0 S\l" SSPSSS9_S\l" SSPSSS9_S\l" SSPSSS9_S\l" SSPSSS9_S\l" SSPSSS9_S\l" SSPSSS9_S\l" SSSPS9_S\l" SSSPS9_S\l" SSP\\ SS9_S\l" SSP\\ SS9_S\l" SSP\\ SS9_S\l" SSP\\ SS9_S\l" SSPS\\ SSSYS9_S\l" SSPS\\ SSSYS9_S\l" SSPS\\ SSSYS9_S\l" SSPS\\ SSSYS9_S\l" SSPS\\ SSSYS9_E0 S\l" SSPS\\ SSSYS9_S\l" SSPS\\ SSSYS9_S\l" SSPS\\ SSSYS9_S\l" SSPSSSSSS9_S\l" SSPSSSSS9_S\l" SSPSs9_S\l" SPSSSS9_S\l" SPSSS9_S\l" SPSXSSS9_S\l" SSS9_S\l" 5       _S\l" SP\#\$S9_S\l" SP\#\$SYSXS9_S\l" SP\#\$SYSS9_S\l" SP\#\$SS9_S\l" SP\#\$SYSXSS9_S\l" SP\!\"SYS9_E0 S\l" SP\!\"SYSSS9_S\l" SP\#\$SYS9_S\l" SP\#\$SYSSS9_S\l" \#\$S9_S\l" SP\#\$SSXSS9_S\l" SP\#\$SS9_S\l" SP\#\$SSXSS9_S\l" SP\#\$SYS9_S\l" SP\#\$SYSSS9_S\l" SP\#\$S9_S\l" SP\#\$SYS9_S\l" SP\#\$SYSXSS9_S\l" SP\!\"SYS9_S\l" SP\!\"SYSSS9_S\l" SP\#\$SYS9_S\l" S/\#\$SYSSS9_S\l" SP\#\$S9_E0 S\l" SP\#\$S9_S\l" SP\#\$SYSXSS9_S\l" SP\#\$SYS9_S\l" SP\#\$SS9_S\l" SP\!\"SYSGS 9_GS\l" SP\#\$SYSGS 9_GS\l" SP\#\$SS9_GS\l" SP\#\$SYSGS 9_GS\l" SP\#\$GSS9_GS\l" SP\#\$SYGSGS 9_GS\l" SP\!\"SYGSGS 9_GS	\l" SP\#\$SYGS
GS 9_GS\l" SP\#\$SYGS
GS 9_GS\l" SP\#\$SYGSGS 9_GS\l" SPGSGS\#\$GSGS9_GS\l" SPGS\#\$SYGSGS9_GS\l" SPGS\#\$SSYGSGS9_E0 GS\l" SPGS\#\$SYGSGS9_GS\l" SP\#\$SYGSGS 9_GS\l" SP\#\$SYSGSGS9_GS\l" SP\#\$SYGSGS 9_GS\l" SP\#\$SYGSGS 9_GS\l" SPGS\#\$SYGSGS9_GS\l" SPGS\#\$SYGSGS9_GS \l" SPGSGS\#\$SYGSGS!9_GS"\l" SPGSGS\#\$SYGS
GS!9_GS#\l" SP\#\$GSGSSYGS$GS
GS%9_GS&\l" SPGS'GS\#\$SYGS
GS!9_GS(\l" SPGS'\#\$GS$SYSGS
GS)9_GS*\l" SPGS'\#\$SYGSGS9_GS+\l" SPGS'\#\$GS$SYSGSGS)9_GS,\l" SPGS'GS\#\$SYGSGS!9_GS-\l" SPGS'GS\#\$SYGSGS!9_GS.\l" SPGS'GS\#\$SYGSGS!9_E0 GS/\l" SPGS'GS\#\$SYGS
GS!9_GS0\l" SPGS'\#\$SYGS
GS9_GS1\l" SPGS'GS\#\$SYGSGS!9_GS2\l" SPGS'GS\#\$SYGSGS!9_GS3\l" SPGS'GS\#\$SYGSGS!9_GS4\l" SPGS'GS\#\$SYGSGS!9_GS5\l" SPGS\#\$GSGS69_GS7\l" SPGS\#\$GSGS69_GS8\l" SPGS\#\$SYGSGS99_GS:\l" SPGS\#\$SYSGSGS;9_GS<\l" SPGS\#\$SYGS=GS>9_GS?\l" S/SSGS@9_GSA\l" S/SSGS@9_GSB\l" S/S9_GSC\l" S/S9_GSD\l" S/S9_GSE\l" SPGS\#\$GSFSYGSG9_E0 GSH\l" SPGS\#\$SSYSGSI9_GSJ\l" SPGS\#\$GSFSYGSG9_GSK\l" SPGS\#\$SSYSGSI9_GSL\l" GSMSSPSSGSN9_GSO\l" GSPSSPSSGSN9_GSQ\l" GSRSSPSSGSN9_GSS\l" GSTSSPSSGSN9_GSU\l" GSVSSPSSGSN9_GSW\l" GSXSSPSSGSN9_GSY\l" GSZSSPSSSGS[9_GS\\l" GS]SSPSSSGS[9_GS^\l" GS_SSPSSGSN9_GS`\l" GSaSSPSSGSN9_GSb\l" GScSSPSSGSN9_GSd\l" GSeSSPSSSGS[9_GSf\l" GSgSSPSSSGS[9_GSh\l" S/S9_E0 GSi\l" S/S9_GSj\l" S/S9_GSk\l" GSlSPGS'\\ SGSm9_GSn\l" GSoSPGS'\\ SGSm9_GSp\l" GSqSPGS'\\ SGSm9_GSr\l" GSsGS'\\ SGSt9_GSu\l" GSvGS'\\ SGSt9_GSw\l" GSxGS'SSY\\ SGSy9_GSz\l" GS{GS'\\ SGSt9_GS|\l" SPSSGS}9_GS~\l" SPSS9_GS\l" SPSS9_GS\l" SPSSGS}9_GS\l" SPSSGS}9_GS\l" SPSSGS}9_GS\l" SPSXSGS}9_GS\l" SPSXSGS}9_E0 GS\l" SPGSSGS}9_GS\l" SPGSSGS}9_GS\l" SPSSGS}9_GS\l" SPSSGS}9_GS\l" SPSXSGS}9_GS\l" SPSXSGS}9_GS\l" SPGSSGS}9_GS\l" SPSS9_GS\l" SPSS9_GS\l" SPGS$SGS}9_GS\l" SPGS$SGS}9_GS\l" SPSXSGS}9_GS\l" SPSSGS}9_GS\l" SPSSGS}9_GS\l" SPSXSGS}9_GS\l" SPGSSGS}9_GS\l" SPSSGS}9_E0 GS\l" SPSXSGS}9_GS\l" SPSSGS}9_GS\l" SPSS9_GS\l" SPSS9_GS\l" SPSSGS}9_GS\l" SPSSGS}9_GS\l" SPSSGS}9_GS\l" SPSXSGS}9_GS\l" SPSXSGS}9_GS\l" SPGSSGS}9_GS\l" SPGSSGS}9_GS\l" SPSSGS}9_GS\l" SPSSGS}9_GS\l" SPSXSGS}9_GS\l" SPSXSGS}9_GS\l" SPGSSGS}9_GS\l" SPSS9_E0 GS\l" SPSS9_GS\l" SPSS9_GS\l" SPSS9_GS\l" SPSS9_GS\l" SPSS9_GS\l" SPGS$SGS}9_GS\l" SPGS$SYSS9_GS\l" SPSXSYSS9_GS\l" SPSSYSS9_GS\l" SPSSYSS9_GS\l" SPSSYSS9_GS\l" SPSSYSS9_GS\l" SPSSYSS9_GS\l" SPSSYSS9_GS\l" SPSSYSS9_GS\l" SPSSYSS9_GS\l" SPGSSYSS9_E0 GS\l" SPGSSYSS9_GS\l" SPGSSYSS9_GS\l" SPGSSYSS9_GS\l" SPGSSYSS9_GS\l" SPSSGS}9_GS\l" SPSSGS}9_GS\l" SPSXSGS}9_GS\l" SPGSSGS}9_GS\l" SPSSGS}9_GS\l" SPSXSGS}9_GS\l" SPGS$SYSS9_GS\l" SPGS$SYSS9_GS\l" SPGS\#\$GSGS9_GS\l" SPGS\#\$GSGS9_GS\l" SPGS\#\$GSGS9_GS\l" SPGS\#\$GSGS9_GS\l" SPSSS9_E0 GS\l" SPSSS9_GS\l" SPSSS9_GS\l" SPSSS9_GS\l" SPSSS9_GS\l" SPSSS9_GS\l" SPSSS9_GS\l" SPSSSGS9_GS\l" SPSSS9_GS\l" SPSSS9_GS\l" SPSSS9_GS\l" SPSSS9_GS\l" SPSSS9_GS\l" SPSSSGS9_GS\l" SPSSS9_GS\l" SPSSS9_GS\l" SPSSSGS9_GS\l" SPSSSGS9_E0 GS\l" SPSXSYS9_GS\l" SPSSS9_GS\l" SPSSS9_GS\l" SPSSS9_GS\l" SPSSS9_GS\l" SPSSSGS9_GS\l" SPSSSGS9_GS\l" SPSXSYS9_GS\l" SGS9_GS\l" SPSSS9_GS\l" SPSSSGS9_GS\l" SPSXSYS9_GS\l" SGS9_GS\l" SPSSYS9_GS\l" SPSSSYGS9_GS\l" SPSXSYS9_GS\l" SPSSYSS9_E0 GS\l" SPGS\\ SSYSGS9_GS\l" SP\\ SSYSGS9_GS\l" SP\#\$GSSYSGS9_GS\l" SP\#\$GSSYSGS9_GS\l" SP\#\$GSSYSGS9_GS\l" SP\#\$GSSYSGS9_GS\l" SP\#\$GSSYSGS9_GS\l" SP\#\$GSSSYSGS9_GS\l" SP\#\$GSSSYSGS9_GS\l" SP\#\$GSSSYSGS9_GS\l" SP\#\$GSSSYSGS9_GS\l" SP\#\$GSSSYSGS9_GS\l" SP\#\$GSSSYSGS9_GS \l" SP\#\$GSSSYSGS9_GS\l" SP\#\$GSSSYSGS9_GS\l" SP\#\$GSSSYSGS9_GS\l" SPGSSS9_E\l" SPGSSS9\l" SPGSSS9\l" GSSS9\l" SP\\ SYS9\l" SP\\ SYS9\l" SP\\ SYS9\l" SP\\ SYS9\l" S/\\ SYGS9\l" S/S\\ SYGS9\l" SP\\ SYSGS 9\l" SP\\ SYSGS 9\l" SP\\ SYSGS 9\l" SP\\ SYSGS 9GS.Erm\mR                  5        V Vs/ s H1  u  pUR                  GSS:5      (       d  M  GS	UGS   S   ;   d  M/  U PM3     snn rp\p HB  rq\R                  " \m\q   5      rs\sGS
   SP:X  a	  SP\q-   \sGS
'   \s\m\qR                  GSGS5      '   MD     \H" \m5      rm\R                  R                  GSGS5      R                  5       GS:H  rw  GSGS\[GS\QGS\\Q   S\\]GS4   4GS jjrx\IGSGS\QS\]4GS jj5       ry\IGSGS\QS\]4GS jj5       rz\IGSGS\QS\]4GS jj5       r{\IGSGS\QS\]4GS jj5       r|\IGSGS\QS\]4GS jj5       r}\IGSGS\QS\]4GS jj5       r~\IGSGS\QS\]4GS jj5       r\IGSGS\QS\]4GS jj5       r\IGSGS\QS\]4GS jj5       r\IGSGS\QS\]4GS jj5       r\IGSGS\QS\]4GS jj5       r\IGSGS\QS\]4GS  jj5       r\IGSGS\QS\]4GS! jj5       r\IGSGS\QS\]4GS" jj5       r\IGSGS\QS\]4GS# jj5       r\IGSGS\QS\]4GS$ jj5       r\IGSGS\QS\]4GS% jj5       r\IGSGS\QS\]4GS& jj5       r\IGSGS\QS\]4GS' jj5       r\IGSGS\QS\]4GS( jj5       r\IGSGS\QS\]4GS) jj5       r\IGSGS\QS\]4GS* jj5       r\IGSGS\QS\]4GS+ jj5       r\IGSGS\QS\]4GS, jj5       r\IGSGS\QS\]4GS- jj5       r\IGSGS\QS\]4GS. jj5       r\IGSGS\QS\]4GS/ jj5       r\IGSGS\QS\]4GS0 jj5       r\IGSGS\QS\]4GS1 jj5       r\IGSGS\QS\]4GS2 jj5       r\IGSGS\QS\]4GS3 jj5       r\IGSGS\QS\]4GS4 jj5       r\IGSGS\QS\]4GS5 jj5       r\IGSGS\QS\]4GS6 jj5       r\IGSGS\QS\]4GS7 jj5       r\IGSGS\QS\]4GS8 jj5       r\IGSGS\QS\]4GS9 jj5       r\IGSGS\QS\]4GS: jj5       r\IGSGS\QS\]4GS; jj5       r\IGSGS\QS\]4GS< jj5       r\IGSGS\QS\]4GS= jj5       r\IGSGS\QS\]4GS> jj5       r\IGSGS\QS\]4GS? jj5       r\IGSGS\QS\]4GS@ jj5       r\IGSGS\QS\]4GSA jj5       r\IGSGS\QS\]4GSB jj5       r\IGSGS\QS\]4GSC jj5       r\IGSGS\QS\]4GSD jj5       r\IGSGS\QS\]4GSE jj5       r\IGSGS\QS\]4GSF jj5       r\IGSGS\QS\]4GSG jj5       r\IGSGS\QS\]4GSH jj5       r\IGSGS\QS\]4GSI jj5       r\IGSGS\QS\]4GSJ jj5       r\IGSGS\QS\]4GSK jj5       r\IGSGS\QS\]4GSL jj5       r\IGSGS\QS\]4GSM jj5       r\IGSGS\QS\]4GSN jj5       r\IGSGS\QS\]4GSO jj5       r\IGSGS\QS\]4GSP jj5       r\IGSGS\QS\]4GSQ jj5       r\IGSGS\QS\]4GSR jj5       r\IGSGS\QS\]4GSS jj5       r\IGSGS\QS\]4GST jj5       r\IGSGS\QS\]4GSU jj5       r\IGSGS\QS\]4GSV jj5       r\IGSGS\QS\]4GSW jj5       r\IGSGS\QS\]4GSX jj5       r\IGSGS\QS\]4GSY jj5       r\IGSGS\QS\]4GSZ jj5       r\IGSGS\QS\]4GS[ jj5       r\IGSGS\QS\]4GS\ jj5       r\IGSGS\QS\]4GS] jj5       r\IGSGS\QS\]4GS^ jj5       r\IGSGS\QS\]4GS_ jj5       r\IGSGS\QS\]4GS` jj5       r\IGSGS\QS\]4GSa jj5       r\IGSGS\QS\]4GSb jj5       r\IGSGS\QS\]4GSc jj5       r\IGSGS\QS\]4GSd jj5       r\IGSGS\QS\]4GSe jj5       r\IGSGS\QS\]4GSf jj5       r\IGSGS\QS\]4GSg jj5       r\IGSGS\QS\]4GSh jj5       r\IGSGS\QS\]4GSi jj5       r\IGSGS\QS\]4GSj jj5       r\IGSGS\QS\]4GSk jj5       r\IGSGS\QS\]4GSl jj5       r\IGSGS\QS\]4GSm jj5       r\IGSGS\QS\]4GSn jj5       r\IGSGS\QS\]4GSo jj5       r\IGSGS\QS\]4GSp jj5       r\IGSGS\QS\]4GSq jj5       r\IGSGS\QS\]4GSr jj5       r\IGSGS\QS\]4GSs jj5       r\IGSGS\QS\]4GSt jj5       r\IGSGS\QS\]4GSu jj5       r\IGSGS\QS\]4GSv jj5       r\IGSGS\QS\]4GSw jj5       r\IGSGS\QS\]4GSx jj5       r\IGSGS\QS\]4GSy jj5       r\IGSGS\QS\]4GSz jj5       r\IGSGS\QS\]4GS{ jj5       r\IGSGS\QS\]4GS| jj5       r\IGSGS\QS\]4GS} jj5       r\IGSGS\QS\]4GS~ jj5       r\IGSGS\QS\]4GS jj5       r\IGSGS\QS\]4GS jj5       r\IGSGS\QS\]4GS jj5       r\IGSGS\QS\]4GS jj5       r\IGSGS\QS\]4GS jj5       r\IGSGS\QS\]4GS jj5       r\IGSGS\QS\]4GS jj5       r\IGSGS\QS\]4GS jj5       r\IGSGS\QS\]4GS jj5       r\IGSGS\QS\]4GS jj5       r\IGSGS\QS\]4GS jj5       r\IGSGS\QS\]4GS jj5       r\IGSGS\QS\]4GS jj5       r\IGSGS\QS\]4GS jj5       r\IGSGS\QS\]4GS jj5       r\IGSGS\QS\]4GS jj5       r\IGSGS\QS\]4GS jj5       r\IGSGS\QS\]4GS jj5       r\IGSGS\QS\]4GS jj5       r\IGSGS\QS\]4GS jj5       r\IGSGS\QS\]4GS jj5       r\IGSGS\QS\]4GS jj5       r\IGSGS\QS\]4GS jj5       r\IGSGS\QS\]4GS jj5       r\IGSGS\QS\]4GS jj5       r\IGSGS\QS\]4GS jj5       r\IGSGS\QS\]4GS jj5       r\IGSGS\QS\]4GS jj5       r\IGSGS\QS\]4GS jj5       r\IGSGS\QS\]4GS jj5       Gr \IGSGS\QS\]4GS jj5       Gr\IGSGS\QS\]4GS jj5       Gr\IGSGS\QS\]4GS jj5       Gr\IGSGS\QS\]4GS jj5       Gr\IGSGS\QS\]4GS jj5       Gr\IGSGS\QS\]4GS jj5       Gr\IGSGS\QS\]4GS jj5       Gr\IGSGS\QS\]4GS jj5       Gr\IGSGS\QS\]4GS jj5       Gr	\IGSGS\QS\]4GS jj5       Gr
\IGSGS\QS\]4GS jj5       Gr\IGSGS\QS\]4GS jj5       Gr\IGSGS\QS\]4GS jj5       Gr\IGSGS\QS\]4GS jj5       Gr\IGSGS\QS\]4GS jj5       Gr\IGSGS\QS\]4GS jj5       Gr\IGSGS\QS\]4GS jj5       Gr\J" \M0 GSS_GSS_GSS_GSS_GSS_GSS_GSS_GSS_GSS_GSGS_GSGS_GSS_GSS_GSS_GSS_GSS_GSGS_GSGS	GSGS.E5        g! \ a
    SSKJr   GNf = fs  snn f (  a  Vision Transformer (ViT) in PyTorch

A PyTorch implement of Vision Transformers as described in:

'An Image Is Worth 16 x 16 Words: Transformers for Image Recognition at Scale'
    - https://arxiv.org/abs/2010.11929

`How to train your ViT? Data, Augmentation, and Regularization in Vision Transformers`
    - https://arxiv.org/abs/2106.10270

`FlexiViT: One Model for All Patch Sizes`
    - https://arxiv.org/abs/2212.08013

The official jax code is released and available at
  * https://github.com/google-research/vision_transformer
  * https://github.com/google-research/big_vision

Acknowledgments:
  * The paper authors for releasing code and weights, thanks!
  * I fixed my class token impl based on Phil Wang's https://github.com/lucidrains/vit-pytorch
  * Simple transformer style inspired by Andrej Karpathy's https://github.com/karpathy/minGPT
  * Bert reference code checks against Huggingface Transformers and Tensorflow Bert

Hacked together by / Copyright 2020, Ross Wightman
    N)OrderedDict)partial)	AnyCallableDictOptionalSetTupleTypeUnionList)Literal)Final)IMAGENET_DEFAULT_MEANIMAGENET_DEFAULT_STDIMAGENET_INCEPTION_MEANIMAGENET_INCEPTION_STDOPENAI_CLIP_MEANOPENAI_CLIP_STD)	AttentionDiffAttentionAttentionPoolLatentAttentionPoolPrr
PatchEmbedMlpSwiGLUPackedSwiGLU	LayerNormRmsNormDropPathcalculate_drop_path_ratesPatchDropouttrunc_normal_lecun_normal_resample_patch_embedresample_abs_pos_embeduse_fused_attnget_act_layerget_norm_layermaybe_add_maskresolve_self_attn_mask	LayerType
LayerScale   )build_model_with_cfg)feature_take_indices)named_apply
checkpointcheckpoint_seqadapt_input_conv)generate_default_cfgsregister_modelregister_model_deprecationsVisionTransformer) attndiffT
attn_layerdim	num_headsqkv_biasqk_norm
scale_norm	proj_bias	attn_drop	proj_drop
norm_layerdepthreturnc                     [        U [        5      (       a#  [        R                  U S 5      n U c
   SU  35       e[	        U [
        5      (       a  XS'   U " U4UUUUUUUU	S.UD6$ )NzUnknown attn_layer: rF   )r>   r?   r@   rA   rB   rC   rD   rE   )
isinstancestrATTN_LAYERSget
issubclassr   )r<   r=   r>   r?   r@   rA   rB   rC   rD   rE   rF   kwargss               X/home/wildlama/miniconda3/lib/python3.13/site-packages/timm/models/vision_transformer.py_create_attnrP   \   s     *c"" __Z6
%J)=j\'JJ% *m,,w      c            %       j  ^  \ rS rSrSrSSSSSSSSSS\R                  \\\	SSS4S	\
S
\
S\S\S\S\S\S\S\S\S\\   S\S\\R                     S\\R                     S\\R                     S\S\
SS4$U 4S jjjr  S!S\R&                  S\\R&                     S\S\R&                  4S jjrS rU =r$ )"Block   z)Transformer block with pre-normalization.      @FT        Nr   r=   r>   	mlp_ratior?   r@   scale_attn_normscale_mlp_normrB   rD   rC   init_values	drop_path	act_layerrE   	mlp_layerr<   rF   rG   c                 `  > [         TU ]  5         UUS.nU" U40 UD6U l        [        UU4UUUUUU
U	UUS.	UD6U l        U(       a  [        U4SU0UD6O[        R                  " 5       U l        US:  a  [        U5      O[        R                  " 5       U l
        U" U40 UD6U l        U" SU[        X-  5      UU(       a  UOSUU	S.UD6U l        U(       a  [        U4SU0UD6O[        R                  " 5       U l        US:  a  [        U5      U l        g[        R                  " 5       U l        g)a  Initialize Block.

Args:
    dim: Number of input channels.
    num_heads: Number of attention heads.
    mlp_ratio: Ratio of mlp hidden dim to embedding dim.
    qkv_bias: If True, add a learnable bias to query, key, value.
    qk_norm: If True, apply normalization to query and key.
    proj_bias: If True, add bias to output projection.
    proj_drop: Projection dropout rate.
    attn_drop: Attention dropout rate.
    init_values: Initial values for layer scale.
    drop_path: Stochastic depth rate.
    act_layer: Activation layer.
    norm_layer: Normalization layer.
    mlp_layer: MLP layer.
    attn_layer: Attention layer type (class or string).
    depth: Block index, passed to attention layer for depth-dependent init.
devicedtype	r>   r?   r@   rA   rB   rC   rD   rE   rF   rZ   rV   Nin_featureshidden_featuresr\   rE   biasdrop )super__init__norm1rP   r:   r-   nnIdentityls1r    
drop_path1norm2intmlpls2
drop_path2selfr=   r>   rW   r?   r@   rX   rY   rB   rD   rC   rZ   r[   r\   rE   r]   r<   rF   r`   ra   dd	__class__s                        rO   rj   Block.__init__   s;   R 	/*r*
 
  &!
 
	 FQ:cA{AbAVXVaVaVc1:R(9-R[[]*r*
 
0%3z
 
 FQ:cA{AbAVXVaVaVc1:R(9-R[[]rQ   x	attn_mask	is_causalc                    XR                  U R                  U R                  U R                  U5      X#S95      5      -   nXR	                  U R                  U R                  U R                  U5      5      5      5      -   nU$ Nr{   r|   )ro   rn   r:   rk   rt   rs   rr   rp   rv   rz   r{   r|   s       rO   forwardBlock.forward   sc     4::a=I)k lmm$**Q-)@ ABBrQ   )r:   ro   rt   rn   rs   rr   rk   rp   NF__name__
__module____qualname____firstlineno____doc__rl   GELUr   r   r   rq   floatboolr   r   Moduler,   rj   torchTensorr   __static_attributes____classcell__rx   s   @rO   rS   rS      s   3  ""!$)#("!!+/!)+*3),$-)ISIS IS 	IS
 IS IS "IS !IS IS IS IS "%IS IS BIIIS RYYIS  BII!IS" "#IS$ %IS* 
+IS IS\ 15#	||  - 	
 
 rQ   rS   c            %       p  ^  \ rS rSrSSSSSSSSSS\R
                  \\\SSS4S\	S	\	S
\
S\S\S\S\S\S\
S\
S\\
   S\
S\\R                     S\\R                     S\\R                     S\S\	SS4$U 4S jjjrS!S jr  S"S\R&                  S\\R&                     S\S\R&                  4S jjrS rU =r$ )#ResPostBlock   rU   FTrV   Nr   r=   r>   rW   r?   r@   rX   rY   rB   rD   rC   rZ   r[   r\   rE   r]   r<   rF   rG   c                   > [         TU ]  5         UUS.nXl        [        UU4UUUUUU
U	UUS.	UD6U l        U" U40 UD6U l        US:  a  [        U5      O[        R                  " 5       U l	        U" SU[        X-  5      UU(       a  UOS UU	S.UD6U l        U" U40 UD6U l        US:  a  [        U5      O[        R                  " 5       U l        U R                  5         g )Nr_   rb   rV   rc   rh   )ri   rj   rZ   rP   r:   rk   r    rl   rm   ro   rq   rr   rp   rt   init_weightsru   s                        rO   rj   ResPostBlock.__init__   s    , 	/& 
  &!
 
	  *r*
1:R(9-R[[] 
0%3z
 
  *r*
1:R(9-R[[]rQ   c                    U R                   b}  [        R                  R                  U R                  R
                  U R                   5        [        R                  R                  U R                  R
                  U R                   5        g g N)rZ   rl   init	constant_rk   weightrp   rv   s    rO   r   ResPostBlock.init_weights  s[    'GGdjj//1A1ABGGdjj//1A1AB (rQ   rz   r{   r|   c                     XR                  U R                  U R                  XUS95      5      -   nXR                  U R	                  U R                  U5      5      5      -   nU$ r~   )ro   rk   r:   rt   rp   rr   r   s       rO   r   ResPostBlock.forward  sS     

499QW`9+a bcc

488A; 788rQ   )r:   ro   rt   rZ   rr   rk   rp   rG   Nr   )r   r   r   r   rl   r   r   r   r   rq   r   r   r   r   r   r,   rj   r   r   r   r   r   r   r   s   @rO   r   r      sd   
  ""!$)#("!!+/!)+*3),$-)77 7 	7
 7 7 "7 !7 7 7 7 "%7 7 BII7 RYY7  BII!7" "#7$ %7* 
+7 7rC 15#	||  - 	
 
 rQ   r   c            '         ^  \ rS rSr% Sr\\   \S'   SSSSSSSSSS\R                  \
SSS	SSS4S
\S\S\S\S\S\S\S\S\S\S\\   S\S\\R                     S\\R                     S\\\R                        S\\   S\S\SS4&U 4S jjjrS$S jr  S%S\R(                  S \\R(                     S!\S\R(                  4S" jjrS#rU =r$ )&ParallelScalingBlocki$  zParallel ViT block (MLP & Attention in parallel)
Based on:
  'Scaling Vision Transformers to 22 Billion Parameters` - https://arxiv.org/abs/2302.05442

fused_attnrU   FTrV   Nr   r=   r>   rW   r?   r@   rX   rY   rB   rD   rC   rZ   r[   r\   rE   r]   r<   rF   fuse_out_projrG   c                 &  > [         TU ]  5         UUS.nX-  S:X  d   S5       eU(       d  U(       a   S5       eX l        X-  U l        U R                  S-  U l        [        5       U l        [        X1-  5      nUSU-  -   nU" U40 UD6U l        [        R                  " UU4SU0UD6U l        U/U/S-  -   U l        U(       a  U R                  SS 5        O0[        R                  " [        R                   " U40 UD65      U l        U(       a  U" U R                  40 UD6O[        R$                  " 5       U l        U(       a  U" U R                  40 UD6O[        R$                  " 5       U l        [        R*                  " U
5      U l        [        R*                  " U	5      U l        U" 5       U l        U(       a1  [        R                  " UU-   U4SU0UD6U l        S U l        S U l        ODS U l        [        R                  " X4SU0UD6U l        [        R                  " UU4SU0UD6U l        Ub  [9        U4S	U0UD6O[        R$                  " 5       U l        US
:  a  [=        U5      O[        R$                  " 5       U l        U RA                  5         g )Nr_   r   $dim should be divisible by num_headsScale norms not supported         rf   mlp_biasrZ   rV   )!ri   rj   r>   head_dimscaler'   r   rq   in_normrl   Linearin_projin_splitregister_parameter	Parameterr   emptyr   rm   q_normk_normDropoutrC   mlp_dropmlp_actout_projattn_out_projmlp_out_projr-   lsr    r[   reset_parameters)rv   r=   r>   rW   r?   r@   rX   rY   rB   rD   rC   rZ   r[   r\   rE   r]   r<   rF   r   r`   ra   rw   mlp_hidden_dimin_proj_out_dimrx   s                           rO   rj   ParallelScalingBlock.__init__+  s$   . 	/!#K%KK#">V;VV9"(]]d*
(*Y_-(1s72!#,,yyoKHKK'(C5194##J5LL^)Jr)JKDM9@j5"5bkkm9@j5"5bkkmI.

9- {IIcN&:CViVSUVDM!%D $D !DM!#3!J)!Jr!JD "		.# TI TQS TDDOD[*S@k@R@acalalan09B),BKKM 	rQ   c                 r    U R                   b*  [        R                  R                  U R                   5        gg)"Initialize parameters and buffers.N)r   rl   r   zeros_r   s    rO   r   %ParallelScalingBlock.reset_parametersn  s%    ==$GGNN4==) %rQ   rz   r{   r|   c           	      8   UR                   u  pEnU R                  U5      nU R                  U5      n[        R                  " XpR
                  SS9u  ppU R                  b  XR                  -   nU R                  U	R                  XEU R                  U R                  5      5      R                  SS5      n	U R                  U
R                  XEU R                  U R                  5      5      R                  SS5      n
UR                  XEU R                  U R                  5      R                  SS5      nU R                  (       a@  [        R                  " XUUU R                   (       a  U R"                  R$                  OSUS9nO[XR&                  -  n	XR                  SS5      -  n[)        X]X#S9n[+        X5      nUR-                  SS9nU R#                  U5      nX-  nUR                  SS5      R/                  XEU5      nU R1                  U5      nU R3                  U5      nU R4                  b&  U R5                  [        R6                  " X4SS95      nO#U R9                  U5      U R;                  U5      -   nXR=                  U R?                  U5      5      -   nU$ )	Nr=   r.      rV   r{   	dropout_pr|   r|   ) shaper   r   r   splitr   r   r   viewr>   r   	transposer   r   Fscaled_dot_product_attentiontrainingrC   pr   r+   r*   softmaxreshaper   r   r   catr   r   r[   r   )rv   rz   r{   r|   BNCyx_mlpqkvx_attnr:   	attn_biass                  rO   r   ParallelScalingBlock.forwards  s    ''a LLOLLOQ2>!==$MM)E KKqT^^T]]CDNNqRSTKKqT^^T]]CDNNqRSTFF17AA!QG??33a#.2mm$..**#	F JJA{{2r**D.q	WI!$2D<<B<'D>>$'DXF!!!Q'//a8 U#e$ ==$eiiR@AA""6*T->->u-EEA twwqz**rQ   )rC   r   r[   r   r   r   r   r   r   r   r   r   r   r   r>   r   r   r   r   r   )r   r   r   r   r   r   r   __annotations__rl   r   r   rq   r   r   r   r   r,   rj   r   r   r   r   r   r   r   s   @rO   r   r   $  s    d  ""!$)#("!!+/!)+*337.2"'+A A  A  	A 
 A  A  "A  !A  A  A  A  "%A  A  BIIA  RYYA    RYY0!A " !+#A $ %A &  'A , 
-A  A F* 15#	1||1  -1 	1
 
1 1rQ   r   c            '         ^  \ rS rSr% Sr\\   \S'   SSSSSSSSSS\R                  \
SSS	SSS4S
\S\S\S\S\S\S\S\S\S\S\\   S\S\\R                     S\\R                     S\\\R                        S\\   S\S\SS4&U 4S jjjrS\4S jrS&S jrS\R*                  4S  jr  S'S!\R*                  S"\\R*                     S#\S\R*                  4S$ jjrS%rU =r$ )(DiffParallelScalingBlocki  a<  Parallel ViT block with Differential Attention (MLP & Attention in parallel).

Combines the parallel MLP+Attention structure from 'Scaling Vision Transformers to
22 Billion Parameters' (https://arxiv.org/abs/2302.05442) with differential attention
from 'Differential Transformer' (https://arxiv.org/abs/2410.05258).
r   rU   FTrV   Nr   r=   r>   rW   r?   r@   rX   rY   rB   rD   rC   rZ   r[   r\   rE   r]   r<   rF   dual_lambdarG   c                   > [         TU ]  5         UUS.nX-  S:X  d   S5       eU(       d  U(       a   S5       eX l        X-  S-  U l        U R                  S-  U l        [        5       U l        [        X1-  5      nUSU-  -   nU" U40 UD6U l        [        R                  " UU4SU0UD6U l        U/U/S-  -   U l        U(       a  U R                  S	S 5        O0[        R                  " [        R                   " U40 UD65      U l        U(       a  U" U R                  40 UD6O[        R$                  " 5       U l        U(       a  U" U R                  40 UD6O[        R$                  " 5       U l        [        R*                  " U
5      U l        Xl        [1        SU R                  -  4S
S0UD6U l        UU l        U(       a  [        R                  " [        R                   " S[        R6                  US95      U l        [        R                  " [        R                   " S[        R6                  US95      U l        S =U l        =U l        =U l         U l!        GO*S =U l        U l        [        R                  " [        R                   " U R                  [        R6                  US95      U l        [        R                  " [        R                   " U R                  [        R6                  US95      U l        [        R                  " [        R                   " U R                  [        R6                  US95      U l         [        R                  " [        R                   " U R                  [        R6                  US95      U l!        [        R*                  " U	5      U l"        U" 5       U l#        [        R                  " UU-   U4SU0UD6U l$        Ub  [K        U4SU0UD6O[        R$                  " 5       U l&        US:  a  [O        U5      O[        R$                  " 5       U l(        SU l)        U RU                  U5        U RW                  5         g )Nr_   r   r   r   r   r   r   rf   r   epsh㈵>rh   )ra   r`   rZ   rV   皙?),ri   rj   r>   r   r   r'   r   rq   r   rl   r   r   r   r   r   r   r   r   rm   r   r   r   rC   attn_drop_pr   sub_normr   float32lambda_alambda_b	lambda_q1	lambda_k1	lambda_q2	lambda_k2r   r   r   r-   r   r    r[   lambda_initset_lambda_initr   )rv   r=   r>   rW   r?   r@   rX   rY   rB   rD   rC   rZ   r[   r\   rE   r]   r<   rF   r   r`   ra   rw   r   r   rx   s                           rO   rj   !DiffParallelScalingBlock.__init__  s#   . 	/!#K%KK#">V;VV9"(A-]]d*
(*Y_-(1s72!#,,yyoKHKK'(C5194##J5LL^)Jr)JKDM9@j5"5bkkm9@j5"5bkkmI.$  DMM 1BtBrB&LLRu}}U[)\]DMLLRu}}U[)\]DMPTTDNTT^Tdnt~,00DMDM\\%++dmm5==ag*hiDN\\%++dmm5==ag*hiDN\\%++dmm5==ag*hiDN\\%++dmm5==ag*hiDN

9- { 		#"6R)RrRDOD[*S@k@R@acalalan09B),BKKMU# 	rQ   c                 L    SS[         R                  " SU-  5      -  -
  U l        g )Nr   g333333?g333333ӿ)mathexpr   )rv   rF   s     rO   r   (DiffParallelScalingBlock.set_lambda_init  s!    txxu'=!==rQ   c                    U R                   b)  [        R                  R                  U R                   5        U R                  (       aS  [        R                  R                  U R
                  5        [        R                  R                  U R                  5        g[        R                  R                  U R                  SSS9  [        R                  R                  U R                  SSS9  [        R                  R                  U R                  SSS9  [        R                  R                  U R                  SSS9  g)r   Nr   皙?meanstd)r   rl   r   r   r   r   r   normal_r   r   r   r   r   s    rO   r   )DiffParallelScalingBlock.reset_parameters   s    ==$GGNN4==)GGNN4==)GGNN4==)GGOODNNO<GGOODNNO<GGOODNNO<GGOODNNO<rQ   c                    U R                   bA  [        R                  " U R                   5      n[        R                  " U R                  5      nO[        R                  " [        R                  " U R
                  U R                  -  SS9R                  5       5      n[        R                  " [        R                  " U R                  U R                  -  SS9R                  5       5      nX-
  U R                  -   $ )Nr   r   )r   r   r   r   sumr   r   r   r   r   r   )rv   lambda_1lambda_2s      rO   _compute_lambda(DiffParallelScalingBlock._compute_lambda  s    ==$yy/Hyy/Hyy4>>DNN+JPR!S!Y!Y![\Hyy4>>DNN+JPR!S!Y!Y![\H"T%5%555rQ   rz   r{   r|   c           	         UR                   u  pEnU R                  U5      nU R                  U5      n[        R                  " XpR
                  SS9u  ppU R                  b  XR                  -   nU	R                  XESU R                  -  U R                  5      R                  SS5      n	U
R                  XESU R                  -  U R                  5      R                  SS5      n
UR                  XEU R                  SU R                  -  5      R                  SS5      nU R                  U	5      U R                  U
5      pU R                  5       R                  U	5      nU R                  (       a  U	R                  X@R                  SXPR                  5      n	U
R                  X@R                  SXPR                  5      n
U	R!                  S5      u  pU
R!                  S5      u  nnU R"                  (       a  U R$                  OSn[&        R(                  " XXUUS9n[&        R(                  " UUXUUS9nUUU-  -
  nOXR*                  -  n	XR                  SS5      -  n[-        UUX#S9n[/        UU5      nUR1                  SS9nU R3                  U5      nUR5                  X@R                  SXU5      nUS S 2S S 2S	4   UUS S 2S S 2S4   -  -
  nUU-  nU R7                  U5      nUSU R8                  -
  -  nUR                  SS5      R                  XEU5      nU R;                  U5      nU R=                  U5      nU R?                  [        R@                  " UU4SS95      nXRC                  U RE                  U5      5      -   nU$ )
Nr   r   r   r.   rV   r   r   r   r   )#r   r   r   r   r   r   r   r   r>   r   r   r   r   r  type_asr   unbindr   r   r   r   r   r+   r*   r   rC   r   r   r   r   r   r   r   r[   r   )rv   rz   r{   r|   r   r   r   r   r   r   r   r   lambda_fullq1q2k1k2r   attn1attn2r   r:   r   s                          rO   r    DiffParallelScalingBlock.forward  s    ''a LLOLLOQ2>!==$MM)E IIaA.>HHANIIaA.>HHANIIaDNNA,=>HHAN{{1~t{{1~1**,44Q7??		!^^Q==AA		!^^Q==AAXXa[FBXXa[FB,0MM((sI2221]fr{|E222r1]fr{|E[500FJJA{{2r**D.q$	WI!$	2D<<B<'D>>$'D99Q18D1a=;aAg#>>DAXFv&1t///0!!!Q'//a8 U#e$ MM%))VUO<= twwqz**rQ   )rC   r   r[   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r>   r   r   r   r   r   r   )r   r   r   r   r   r   r   r   rl   r   r   rq   r   r   r   r   r,   rj   r   r   r   r   r  r   r   r   r   s   @rO   r   r     s    d  ""!$)#("!!+/!)+*337.2 %+K K  K  	K 
 K  K  "K  !K  K  K  K  "%K  K  BIIK  RYYK    RYY0!K " !+#K $ %K & 'K , 
-K  K Z>S >=6 6 15#	<||<  -< 	<
 
< <rQ   r   c            '       p  ^  \ rS rSrSrSSSSSSSSSSS\R                  \\\	S	SS4S
\
S\
S\
S\S\S\S\S\S\S\\   S\S\S\S\\R                     S\\R                     S\\R                     S\S\
SS4&U 4S jjjr  S#S\R&                  S\\R&                     S \S\R&                  4S! jjrS"rU =r$ )$ParallelThingsBlockiU  zParallel ViT block (N parallel attention followed by N parallel MLP)
Based on:
  `Three things everyone should know about Vision Transformers` - https://arxiv.org/abs/2203.09795
r   rU   FTNrV   r   r=   r>   num_parallelrW   r?   r@   rX   rY   rB   rZ   rD   rC   r[   r\   rE   r]   r<   rF   rG   c                   > UUS.n[         TU ]  5         X0l        [        R                  " 5       U l        [        R                  " 5       U l        [        U5       GHh  nU R
                  R                  [        R                  " [        SU" U40 UD64S[        UU4UUUUU	UUUUS.	UD64SU
(       a  [        U4SU
0UD6O[        R                  " 5       4SUS:  a  [        U5      O[        R                  " 5       4/5      5      5        U R                  R                  [        R                  " [        SU" U40 UD64S	U" U4[        X-  5      UU(       a  UOS U	US
.UD64SU
(       a  [        U4SU
0UD6O[        R                  " 5       4SUS:  a  [        U5      O[        R                  " 5       4/5      5      5        GMk     g )Nr_   normr:   rb   r   rZ   r[   rV   rr   )re   r\   rE   rf   rg   )ri   rj   r  rl   
ModuleListattnsffnsrangeappend
Sequentialr   rP   r-   rm   r    rq   )rv   r=   r>   r  rW   r?   r@   rX   rY   rB   rZ   rD   rC   r[   r\   rE   r]   r<   rF   r`   ra   rw   _rx   s                          rO   rj   ParallelThingsBlock.__init__Z  s   . /(]]_
MMO	|$AJJbmmKC.2./ (%#.''')   z#E;E"EZ\ZeZeZghY^hy1W#9 -  & IIR]];C.2./	$'$8'-;z""   z#E;E"EZ\ZeZeZghY^hy1W8 ,  ) %rQ   rz   r{   r|   c           	      f   Uc  U(       a  / nU R                    HW  nUR                  U5      nUR                  XbUS9nUR                  U5      nUR	                  U5      nUR                  U5        MY     U[        R                  " U5      R                  SS9-   nOFU[        R                  " U R                    Vs/ s H
  oU" U5      PM     sn5      R                  SS9-   nU[        R                  " U R                   Vs/ s H
  ow" U5      PM     sn5      R                  SS9-   nU$ s  snf s  snf )Nr   r   r   )
r  r  r:   r   r[   r  r   stackr  r  )rv   rz   r{   r|   attn_outr:   r   ffns           rO   r   ParallelThingsBlock.forward  s     IH

16)T/' # EKK)--!-44AEKKTZZ @ZTaZ @AEE!ELLAtyy9ySVy9:>>1>EE !A9s   :D)
 D.
)r  r  r  r   r   r   s   @rO   r  r  U  sp    !"!"!$)#("+/!!!)+*3),$-+== = 	=
 = = = "= != = "%= = = = BII=  RYY!=" BII#=$ "%=& '=, 
-= =D 15#	||  - 	
 
 rQ   r  rz   	pool_typenum_prefix_tokensreduce_include_prefixc                 4   U(       d  U $ US:X  a  U S S 2S4   n U $ U(       a  U O
U S S 2US 24   n US:X  a  U R                  SS9n U $ US:X  a$  SU R                  SS9U R                  SS9-   -  n U $ US:X  a  U R                  SS9n U $ U(       a
   S	U 35       eU $ )
Ntokenr   avgr.   r   avgmaxg      ?maxzUnknown pool type )r   amax)rz   r'  r(  r)  s       rO   global_pool_nlcr0    s     GadG H 'AAa1B1C.C,D1A H ("qvv!v}qvv!v}45A H %1A H !B$6yk"BB=HrQ   c            S       P  ^  \ rS rSr% Sr\\   \S'   SSSSSS	S
S
SSSSSSSSSSSSSSSSSSSSSSSSS\SSS\	\
\SS4*S\\\\\4   4   S\\\\\4   4   S\S\S\S   S\S\S\S\S\S\S\S \S!\S"\\   S#\S$\S%\S&\S'\S(\S)\\   S*\S\S+\S,\S-\S.\S/\S0\S1\S2\S3   S4\S5\S6\\   S7\\   S8\\   S9\\R.                     S:\\R.                     S;\S<S4RU 4S= jjjrShS> jrSiS?\S@\S<S4SA jjrSB\R.                  S<S4SC jr\R:                  R=                  5       SjSD\SE\S<S4SF jj5       r\R:                  R<                  S<\ \   4SG j5       r!\R:                  R<                  SkSH\S<\"\\\\#4   4   4SI jj5       r$\R:                  R<                  SlSJ\S<S4SK jj5       r%\R:                  R<                  S<\R.                  4SL j5       r&SmS\S\\   S<S4SM jjr'  SnS\\\\4      S\\\\4      S<S4SN jjr(SO\RR                  S<\RR                  4SP jr*         SoSO\RR                  SQ\\\\#\   4      SR\SS\ST\SU\SV\SW\SX\\RR                     SY\S<\\#\RR                     \\RR                  \#\RR                     4   \"\\+4   4   4SZ jjr,   SpSQ\\\#\   4   S[\S\\S<\#\   4S] jjr-     SqSO\RR                  S^\\\#\   \\   4   S_\SR\SS\SX\\RR                     S<\#\RR                     4S` jjr.  SrSO\RR                  SX\\RR                     SY\S<\RR                  4Sa jjr/SmSO\RR                  Sb\\   S<\RR                  4Sc jjr0SkSO\RR                  Sd\S<\RR                  4Se jjr1  SrSO\RR                  SX\\RR                     SY\S<\RR                  4Sf jjr2Sgr3U =r4$ )sr8   i  zVision Transformer

A PyTorch impl of : `An Image is Worth 16x16 Words: Transformers for Image Recognition at Scale`
    - https://arxiv.org/abs/2010.11929
dynamic_img_size      r     r+        rU   TFNlearnr   rV   r9   img_size
patch_sizein_chansnum_classesglobal_poolr9   r,  r-  r.  r+  mapprr	embed_dimrF   r>   rW   r?   r@   rX   rY   rB   rZ   class_token	pos_embedno_embed_class
reg_tokenspre_norm
final_normfc_normpool_include_prefixdynamic_img_pad	drop_ratepos_drop_ratepatch_drop_rateproj_drop_rateattn_drop_ratedrop_path_rateweight_init)skipresetjaxjax_nlhbmocor9   fix_initembed_layerembed_norm_layerrE   r\   block_fnr]   r<   rG   c+                 	  > [         T3U ]  5         U)U*S.n+US;   d   eU(       d  US:w  d   eUS;   d   eUc  US;   OUn,[        U$5      =(       d    [        n$[        U#5      n#[	        U%5      =(       d    [
        R                  n%X@l        X0l        XPl	        U=U l
        =U l        U l        U(       a  SOSU l        U =R                  U-  sl        UU l        UU l        UU l        UU l        UU l        S	U l        0 n-U(       a  U-R)                  [+        S	S
S95        U#b  U#U-S'   U"" S-UUUUU(       + US.U-DU+D6U l        U R,                  R.                  n.[1        U R,                  S5      (       a  U R,                  R3                  5       OUn/U(       a-  [
        R4                  " [6        R8                  " SSU40 U+D65      OSU l        U(       a-  [
        R4                  " [6        R8                  " SUU40 U+D65      OSU l        U(       a  U.OU.U R                  -   n0U(       a  US:X  a  SU l        O2[
        R4                  " [6        R8                  " SU0U40 U+D65      U l        [
        R@                  " US9U l!        US:  a  [E        UU R                  S9U l#        O[
        RH                  " 5       U l#        U(       a	  U$" U40 U+D6O[
        RH                  " 5       U l%        [M        UU5      n1[
        RN                  " [Q        U5       V2s/ s HA  n2U&" S-0 SU_SU_SU	_SU
_SU_SU_SU_SU_SU_SU_SU_SU1U2   _SU$_SU%_SU'_S U(_S!U2_U+D6PMC     sn26 U l)        [Q        U5       V2s/ s H  n2[+        S"U2 3UU/S#9PM     sn2U l*        U(       a  U,(       d	  U$" U40 U+D6O[
        RH                  " 5       U l+        US$:X  a!  [Y        U R                  4UU	U$U%S%.U+D6U l-        O=US&:X  a0  []        U R                  4UU(       a  SOS'U$S(.U+D6U l-        S)U l        OSU l-        U(       a  U,(       a	  U$" U40 U+D6O[
        RH                  " 5       U l/        [
        R@                  " U5      U l0        US:  a"  [
        Rb                  " U R                  U40 U+D6O[
        RH                  " 5       U l2        U S*:X  a  S+OU U l3        U!U l4        U S*:w  a  U Rk                  S	S,9  ggs  sn2f s  sn2f ).a  
Args:
    img_size: Input image size.
    patch_size: Patch size.
    in_chans: Number of image input channels.
    num_classes: Number of classes for classification head.
    global_pool: Type of global pooling for final sequence (default: 'token').
    embed_dim: Transformer embedding dimension.
    depth: Depth of transformer.
    num_heads: Number of attention heads.
    mlp_ratio: Ratio of mlp hidden dim to embedding dim.
    qkv_bias: Enable bias for qkv projections if True.
    init_values: Layer-scale init values (layer-scale enabled if not None).
    class_token: Use class token.
    no_embed_class: Don't include position embeddings for class (or reg) tokens.
    reg_tokens: Number of register tokens.
    pre_norm: Enable norm after embeddings, before transformer blocks (standard in CLIP ViT).
    final_norm: Enable norm after transformer blocks, before head (standard in most ViT).
    fc_norm: Move final norm after pool (instead of before), if None, enabled when global_pool == 'avg'.
    drop_rate: Head dropout rate.
    pos_drop_rate: Position embedding dropout rate.
    attn_drop_rate: Attention dropout rate.
    drop_path_rate: Stochastic depth rate.
    weight_init: Weight initialization scheme.
    fix_init: Apply weight initialization fix (scaling w/ layer index).
    embed_layer: Patch embedding layer.
    embed_norm_layer: Normalization layer to use / override in patch embed module.
    norm_layer: Normalization layer.
    act_layer: MLP activation layer.
    block_fn: Transformer block layer.
r_   r>  r+  )r9   noner8  N)r,  r-  r.  r.   r   FNHWC)strict_img_size
output_fmtrE   )r9  r:  r;  rA  rf   rJ  
feat_ratior\  )r   )r(  r=   r>   rW   r?   r@   rX   rY   rB   rZ   rD   rC   r[   r\   r]   r<   rF   blocks.)modulenum_chs	reductionr?  )r>   rW   rE   r\   r@  r,  )r>   r'  rE   TrR  rS  needs_resetrh   )6ri   rj   r)   r   r(   rl   r   r<  r;  r=  num_featureshead_hidden_sizerA  r(  num_reg_tokenshas_class_tokenrD  rI  r2  grad_checkpointingupdatedictpatch_embednum_patcheshasattrr`  r   r   r   	cls_token	reg_tokenrC  r   pos_dropr"   
patch_droprm   norm_prer!   r  r  blocksfeature_infor  r   	attn_poolr   rH  	head_dropr   headweight_init_moderW  r   )4rv   r9  r:  r;  r<  r=  rA  rF   r>   rW   r?   r@   rX   rY   rB   rZ   rB  rC  rD  rE  rF  rG  rH  rI  r2  rJ  rK  rL  rM  rN  rO  rP  rQ  rW  rX  rY  rE   r\   rZ  r]   r<   r`   ra   rw   use_fc_norm
embed_argsro  rd  	embed_lendprirx   s4                                                      rO   rj   VisionTransformer.__init__  s   X 	/QQQQkW4441111AHk%==V]#J/<9
)*:;!),7	& &ENNND1DN&1q*,(*,#6  0"'
d5VLM''7J|$& 	
!+	
 	
 	
 &&225<T=M=M|5\5\D$$//1bl	MXekk!Q	&HR&HI^bV`ekk!Z&Qb&QRfj#1K{TE[E[7[	I/!DN\\%++aI*TQS*TUDN

]3Q*"&"8"8DO
 !kkmDO7?
933R[[]'>mm* 5\+&#* ")  # $ "	
   !0  . $ ( ) ) a& & $ $  &!" %( "+&# $. Y^^cXdfXdSTD'!yINXdf3=kJy/B/WYWbWbWd	 %0##%# DN E!-#%0'e%	
 DN (,D$!DN6@[z)2r2VXVaVaVcI.DORSOBIIdnnk@R@Y[YdYdYf	+6&+@k & %0 !m&#,fs   AS$Sc                    [         R                  " 5          [        U R                  5       H  u  p[        R
                  " SUS-   -  5      nUR                  R                  R                  R                  U5        UR                  R                  R                  R                  U5        M     SSS5        g! , (       d  f       g= f)z9Apply weight initialization fix (scaling w/ layer index).g       @r.   N)r   no_grad	enumeraterv  r   sqrtr:   projr   div_rr   fc2)rv   layer_idlayerr   s       rO   fix_init_weight!VisionTransformer.fix_init_weight  sy    ]]_#,T[[#9		#A"67

&&++E2		$$))%0 $: __s   BB88
Cmoderf  c                    U=(       d    U R                   nUS;   d   eSU;   a!  [        R                  " U R                  5      * OSnU R                  b  [        U R                  SS9  U R                  b(  [        R                  R                  U R                  SS9  U R                  b(  [        R                  R                  U R                  SS9  [        [        XUS9U 5        U R                  (       a  U R                  5         gg)	a  Initialize model weights.

Args:
    mode: Weight initialization mode ('jax', 'jax_nlhb', 'moco', or '').
    needs_reset: If True, call reset_parameters() on modules that have it.
        Set to False when modules have already self-initialized in __init__.
)rT  rU  rV  rS  r9   nlhbrV   N{Gz?r  ư>re  )r{  r   logr<  rC  r#   rq  rl   r   r  rr  r1   get_init_weights_vitrW  r  )rv   r  rf  	head_biass       rO   r   VisionTransformer.init_weights  s     ,t,,????39T>TXXd..//r	>>%$..c2>>%GGOODNNO5>>%GGOODNNO5(kRTXY==  " rQ   mc                     [        U5        g)z>Initialize weights for a single module (compatibility method).N)init_weights_vit_timm)rv   r  s     rO   _init_weightsVisionTransformer._init_weights  s     	a rQ   checkpoint_pathprefixc                     [        XU5        g)zqLoad pretrained weights.

Args:
    checkpoint_path: Path to checkpoint.
    prefix: Prefix for state dict keys.
N)_load_weights)rv   r  r  s      rO   load_pretrained!VisionTransformer.load_pretrained  s     	dV4rQ   c                 
    1 Sk$ )z3Set of parameters that should not use weight decay.>   rq  rC  
dist_tokenrh   r   s    rO   no_weight_decay!VisionTransformer.no_weight_decay  s
     87rQ   coarsec                     [        SSS/S9$ )zCreate regex patterns for parameter grouping.

Args:
    coarse: Use coarse grouping.

Returns:
    Dictionary mapping group names to regex patterns.
z ^cls_token|pos_embed|patch_embed)z^blocks\.(\d+)N)z^norm)i )stemrv  )rm  )rv   r  s     rO   group_matcherVisionTransformer.group_matcher  s     4-/CD
 	
rQ   enablec                 ~    Xl         [        U R                  S5      (       a  U R                  R                  U5        gg)zgEnable or disable gradient checkpointing.

Args:
    enable: Whether to enable gradient checkpointing.
set_grad_checkpointingN)rk  rp  rn  r  )rv   r  s     rO   r  (VisionTransformer.set_grad_checkpointing  s7     #)4##%=>>33F; ?rQ   c                     U R                   $ )zGet the classifier head.)rz  r   s    rO   get_classifier VisionTransformer.get_classifier  s     yyrQ   c                 d   Xl         Ub`  US;   d   eUS;   a  U R                  c   S5       eUS;  a  U R                  b  SU l        OUS;   a  U R                  U:w  a   S5       eX l        US:  a'  [        R                  " U R
                  U5      U l        g[        R                  " 5       U l        g)zReset the classifier head.

Args:
    num_classes: Number of classes for new classifier.
    global_pool: Global pooling type.
Nr>  )r?  r@  z=Cannot currently add attention pooling in reset_classifier().zECannot currently change attention pooling type in reset_classifier().r   )r<  rx  r=  rl   r   rA  rm   rz  )rv   r<  r=  s      rO   reset_classifier"VisionTransformer.reset_classifier  s     '""UUUUn,1G]]]uN2t~~7Q!%.43C3C{3Reeeu*>IAoBIIdnnk:	SUS^S^S`	rQ   c           
         U R                   R                  nU R                   R                  XS9  U R                  b  U R                  (       a  SOU R
                  nU R                   R                  U-   nXPR                  R                  S   :w  aE  [        R                  " [        U R                  U R                   R                  UUSS95      U l        ggg)zUpdate the input image resolution and patch size.

Args:
    img_size: New input resolution, if None current resolution is used.
    patch_size: New patch size, if None existing patch size is used.
)r9  r:  Nr   r.   T)new_sizeold_sizer(  verbose)rn  	grid_sizeset_input_sizerC  rD  r(  ro  r   rl   r   r&   )rv   r9  r:  prev_grid_sizer(  num_new_tokenss         rO   r   VisionTransformer.set_input_size  s     ))33'''Q>>%%)%8%8d>T>T!--99<MMN!5!5a!88!#.DNN!--77+&7 / " 9 &rQ   rz   c           	         / nU R                   b9  UR                  U R                   R                  UR                  S   SS5      5        U R                  b9  UR                  U R                  R                  UR                  S   SS5      5        U R
                  cC  [        R                  " X!R                  UR                  S   SUR                  S   5      /-   SS9$ U R                  (       am  UR                  u  p4pVU R                  R                  n[        U R
                  XE4UU R                  (       a  SOU R                  S9nUR                  USU5      nOU R
                  nU R                  (       a$  X-   nU(       a  [        R                  " X!/-   SS9nO#U(       a  [        R                  " X!/-   SS9nX-   nU R                  U5      $ )z$Apply positional embedding to input.r   r   r.   r   )r  r  r(  )rq  r  expandr   rr  rC  r   r   r   r2  rn  r  r&   rD  r(  rs  )	rv   rz   to_catr   HWr   r  rC  s	            rO   
_pos_embedVisionTransformer._pos_embed  sq   >>%MM$..//
BCD>>%MM$..//
BCD>>!99Vvvaggaj"aggbk'J&KKQRSS  JA!!--77N.''+':':!@V@V	I q"a AI AIIfsl2 IIfsl2A}}QrQ   indicesreturn_prefix_tokensr  
stop_earlyr_  intermediates_onlyoutput_dictr{   r|   c           	         US;   d   S5       eUS:H  n/ n[        [        U R                  5      U5      u  pUR                  u  nnnnU R	                  U5      nU R                  U5      nU R                  U5      nU R                  U5      n[        R                  R                  5       (       d  U(       d  U R                  nOU R                  SUS-    n[        U5       H  u  nnU	c  U
(       a  U" XU
S9nOIU R                  (       a0  [        R                  R                  5       (       d  [        UU5      nOU" U5      nUU;   d  Mi  UR                  U(       a  U R                  U5      OU5        M     U R                   (       aK  U Vs/ s H  nUSS2SU R                   24   PM     nnU Vs/ s H  nUSS2U R                   S24   PM     nnOSnU(       ad  U R                  R#                  UU45      u  nnU Vs/ s H7  nUR%                  UUUS5      R'                  SS	SS
5      R)                  5       PM9     nnU(       a5  0 nUUS'   Ub  U(       a  UUS'   U(       d  U R                  U5      nUUS'   U$ [        R                  R                  5       (       d  U(       a  Ub  [+        [-        UU5      5      nU(       a  U$ U R                  U5      nX4$ s  snf s  snf s  snf )a  Forward features that returns intermediates.

Args:
    x: Input image tensor
    indices: Take last n blocks if int, all if None, select matching indices if sequence
    return_prefix_tokens: Return both prefix and spatial intermediate tokens
    norm: Apply norm layer to all intermediates
    stop_early: Stop iterating over blocks when last desired intermediate hit
    output_fmt: Shape of intermediate feature outputs
    intermediates_only: Only return intermediate features
    output_dict: Return outputs as a dictionary with 'image_features' and 'image_intermediates' keys
    attn_mask: Optional attention mask for masked attention (e.g., for NaFlex)
    is_causal: If True, use causal (autoregressive) masking in attention
Returns:
    A tuple with (final_features, intermediates), a list of intermediate features, or a dictionary containing
    'image_features' and 'image_intermediates' (and optionally 'image_intermediates_prefix')
)NCHWNLCz)Output format must be one of NCHW or NLC.r  Nr.   r   r   r   r   r   image_intermediatesimage_intermediates_prefiximage_features)r0   lenrv  r   rn  r  rt  ru  r   jitis_scriptingr  rk  r2   r  r  r(  dynamic_feat_sizer   permute
contiguouslistzip)rv   rz   r  r  r  r  r_  r  r  r{   r|   r   intermediatestake_indices	max_indexr   r   heightwidthrv  r  blkr   prefix_tokensr  r  result_dictx_finals                               rO   forward_intermediates'VisionTransformer.forward_intermediates5  s   < _,Y.YY,&"6s4;;7G"Q  gg1feQOOAOOAMM!99!!##:[[F[[)a-0F'FAs$	)D((1G1G1I1IsA&FL $$TTYYq\qA ( !!ERS]Qq!D$:$:"::;]MSDQRMqQq$"8"8"99:MMRM M##55vuoFDAq^kl^kYZQYYq!Q3;;Aq!QGRRT^kMl K1>K-.(-A<I89 &))A,07,- yy%%'',@]E^ ]M!BCM  IIaLG TR ms   
K/K>>K#
prune_norm
prune_headc                    [        [        U R                  5      U5      u  pEU R                  SUS-    U l        U(       a  [        R                  " 5       U l        U(       a,  [        R                  " 5       U l        U R                  SS5        U$ )a  Prune layers not required for specified intermediates.

Args:
    indices: Indices of intermediate layers to keep.
    prune_norm: Whether to prune normalization layer.
    prune_head: Whether to prune the classifier head.

Returns:
    List of indices that were kept.
Nr.   r   r9   )r0   r  rv  rl   rm   r  rH  r  )rv   r  r  r  r  r  s         rO   prune_intermediate_layers+VisionTransformer.prune_intermediate_layers  sh      #7s4;;7G"Qkk.9q=1DI;;=DL!!!R(rQ   nr   c           
      <    U R                  XUUU(       a  SOSSUS9$ )ax  Get intermediate layer outputs (DINO interface compatibility).

NOTE: This API is for backwards compat, favour using forward_intermediates() directly.

Args:
    x: Input tensor.
    n: Number or indices of layers.
    reshape: Reshape to NCHW format.
    return_prefix_tokens: Return prefix tokens.
    norm: Apply normalization.

Returns:
    List of intermediate features.
r  r  T)r  r  r_  r  r{   )r  )rv   rz   r  r   r  r  r{   s          rO   get_intermediate_layers)VisionTransformer.get_intermediate_layers  s2    . ))!5!(ve# * 
 	
rQ   c                    U R                  U5      nU R                  U5      nU R                  U5      nU R                  U5      nUc  U(       a  U R                   H
  nU" XUS9nM     O\U R
                  (       a:  [        R                  R                  5       (       d  [        U R                  U5      nOU R	                  U5      nU R                  U5      nU$ )z\Forward pass through feature layers (embeddings, transformer blocks, post-transformer norm).r   )rn  r  rt  ru  rv  rk  r   r  r  r3   r  )rv   rz   r{   r|   r  s        rO   forward_features"VisionTransformer.forward_features  s     QOOAOOAMM! I{{)D #$$UYY-C-C-E-Et{{A.AAAIIaLrQ   r'  c                     U R                   b9  U R                  (       d  USS2U R                  S24   nU R                  U5      nU$ Uc  U R                  OUn[	        UUU R                  U R                  S9nU$ )zApply pooling to feature tokens.

Args:
    x: Feature tensor.
    pool_type: Pooling type override.

Returns:
    Pooled features.
N)r'  r(  r)  )rx  rI  r(  r=  r0  )rv   rz   r'  s      rO   poolVisionTransformer.pool  s}     >>%++a//001q!AH(1(9D$$y	"44"&":":	
 rQ   
pre_logitsc                     U R                  U5      nU R                  U5      nU R                  U5      nU(       a  U$ U R                  U5      $ )zForward pass through classifier head.

Args:
    x: Feature tensor.
    pre_logits: Return features before final classifier.

Returns:
    Output tensor.
)r  rH  ry  rz  )rv   rz   r  s      rO   forward_headVisionTransformer.forward_head  sA     IIaLLLONN1q0DIIaL0rQ   c                 H    U R                  XUS9nU R                  U5      nU$ r~   )r  r  r   s       rO   r   VisionTransformer.forward  s.     !!!I!Na rQ   )rx  rv  rq  r2  rA  rH  rw  rW  r=  rk  rj  rz  ry  rh  r;  rD  r  ru  r<  rg  r(  ri  rt  rn  rI  rs  rC  rr  r{  r   r9   Tr9   F)Tr   )NN)	NFFFr  FFNF)r.   FT)r.   FFFNr   )5r   r   r   r   r   r   r   r   r   rS   r   r   r   rq   r
   r   r   r   rJ   r   r,   r   rl   r   rj   r  r   r  r   r  ignorer  r	   r  r   r   r  r  r  r  r  r   r  r   r  r  r  r  r  r  r   r   r   r   s   @rO   r8   r8     s   
 Dk! 5868#V] !!!$)#("+/ $$#("#&*(-%*$)!#%%'$&$&$&SU"$.48.2-1(-),$-W1CsCx011 c5c?231 	1
 1 !!RS1 1 1 1 1 1 1 "1 !1 1  "%!1" #1$ %1& !'1( )1* +1, -1. d^/10 "&112 #314 "516 718 !91: #;1< "=1> "?1@ "A1B !!OPC1D E1F "G1H 'y1I1J !+K1L  	*M1N 299oO1P BIIQ1R "S1X 
Y1 1B1# # # #.!ryy !T !
 YY5s 5C 5 5 5 YY8S 8 8 YY
D 
T#uS$Y?O:O5P 
 
 YY<T <T < < YY		  aC ahsm aW[ a* 3748uS#X/ !sCx1 
	2% ELL % U\\ % T 8<).$$', %04#_ ||_  eCcN34_  #'	_ 
 _  _  _  !%_  _   -_  _  
tELL!5tELL7I)I#JDQTVYQYNZ	[_ F ./$#	3S	>*  	
 
c8 45!).04
||
 S$s)U3Z/0
 	

 #'
 
  -
 
ell	
F 15#	||  - 	
 
0ell x}  01ell 1 1 1$ 15#	||  - 	
 
 rQ   r9   rb  namerf  c                 r   [        U [        R                  5      (       aL  [        U R                  SS9  U R
                  b*  [        R                  R                  U R
                  5        gg[        U S5      (       a  U R                  5         gU(       a#  [        U S5      (       a  U R                  5         ggg)zViT weight initialization, original timm impl (for reproducibility).

Args:
    module: Module to initialize.
    name: Module name for context.
    needs_reset: If True, call reset_parameters() on modules that have it.
r  r  Nr   r   )rI   rl   r   r#   r   rf   r   r   rp  r   r   rb  r  rf  s      rO   r  r    s     &"))$$fmm-;;"GGNN6;;' #		(	(	);<<! =rQ   r  c                    [        U [        R                  5      (       a  UR                  S5      (       aT  [        R                  R                  U R                  5        [        R                  R                  U R                  U5        g[        R                  R                  U R                  5        U R                  bX  SU;   a(  [        R                  R                  U R                  SS9O([        R                  R                  U R                  5        gg[        U [        R                  5      (       aM  [        U R                  5        U R                  b*  [        R                  R                  U R                  5        gg[        U S5      (       a  U R                  5         gU(       a#  [        U S5      (       a  U R                  5         ggg)zViT weight initialization, matching JAX (Flax) impl.

Args:
    module: Module to initialize.
    name: Module name for context.
    head_bias: Bias value for head layer.
    needs_reset: If True, call reset_parameters() on modules that have it.
rz  Nrr   r  r  r   r   )rI   rl   r   
startswithr   r   r   r   rf   xavier_uniform_r  Conv2dr$   rp  r   r   )rb  r  r  rf  s       rO   init_weights_vit_jaxr  +  s&    &"))$$??6""GGNN6==)GGfkk95GG##FMM2{{&:?4-6RWW^^\b\g\gMh '	FBII	&	&fmm$;;"GGNN6;;' #		(	(	);<<! =rQ   c                    [        U [        R                  5      (       a  SU;   a  [        R                  " S[        U R                  R                  S   S-  U R                  R                  S   -   5      -  5      n[        R                  R                  U R                  U* U5        O)[        R                  R                  U R                  5        U R                  b*  [        R                  R                  U R                  5        gg[        U S5      (       a  U R                  5         gU(       a#  [        U S5      (       a  U R                  5         ggg)	zViT weight initialization, matching moco-v3 impl minus fixed PatchEmbed.

Args:
    module: Module to initialize.
    name: Module name for context.
    needs_reset: If True, call reset_parameters() on modules that have it.
qkvg      @r   r   r.   Nr   r   )rI   rl   r   r   r  r   r   r   r   uniform_r  rf   r   rp  r   r   )rb  r  rf  vals       rO   init_weights_vit_mocor  K  s     &"))$$D=))Bv}}':':1'='BV]]EXEXYZE['[!\\]CGGV]]SD#6GG##FMM2;;"GGNN6;;' #		(	(	);<<! =rQ   c                 X    U(       a#  [        U S5      (       a  U R                  5         g g g )Nr   )rp  r   r  s      rO   init_weights_reset_parametersr
  b  s%    wv'9::! ;{rQ   r  c                     U R                  S5      (       a  [        [        XS9$ U R                  S5      (       a  [        [        US9$ U S:X  a  [        [        US9$ [        [
        US9$ )NrT  )r  rf  rV  re  rS  )r   r   r  r  r
  r  )r  r  rf  s      rO   r  r  g  sa    u+yZZ		 	 ,+FF	4+NN ,+FFrQ   rh   posemb
posemb_newgs_newinterpolation	antialiasc           
         UR                   S   U-
  nU R                   S   U-
  n[        [        R                  " U5      5      /S-  n[	        U5      (       d#  [        [        R                  " U5      5      /S-  n[        XUUUUSS9$ )zRescale the grid of position embeddings when loading from state_dict.
*DEPRECATED* This function is being deprecated in favour of using resample_abs_pos_embed
r.   r   T)r(  r  r  r  )r   rq   r   r  r  r&   )	r  r  r(  r  r  r  ntok_newntok_oldgs_olds	            rO   resize_pos_embedr  t  s     "%66H||A!22H$))H%&'!+Fv;;dii)*+a/!+# rQ   modelr  r  load_bfloat16c                   ^^^^ SSK mT(       a
  SSKJ m  SSKmSIUUUU4S jjnT(       a  TR                  U5      nOTR                  U5      nSnSnSnU(       d  SU;   a  SnOS	U;   a  S
nSnO
SU;   a  SnSn[	        U R
                  S5      (       Ga  U R
                  R                  n	[	        U	S5      (       + n
U
(       a  U	OU	R                  nUR                  R                  R                  [        UR                  R                  R                  S   U" XR S3   5      5      5        UR                  R                  R                  U" XR S3   5      5        UR                  R                  R                  U" XR S3   5      5        U
(       Gd  [        U	R                   5       GH  u  p[        UR"                  5       GH  u  pU SUS-    SUS-    S3n[%        S5       H  n['        USUS-    35      R                  R                  U" UU SUS-    S3   5      5        ['        USUS-    35      R                  R                  U" UU SUS-    S3   5      5        ['        USUS-    35      R                  R                  U" UU SUS-    S3   5      5        M     UR(                  c  M  UR(                  R                  R                  R                  U" UU S3   5      5        UR(                  R                  R                  R                  U" UU S3   5      5        UR(                  R                  R                  R                  U" UU S3   5      5        GM     GM     U" XR S 3   5      nOB[        U R
                  R*                  R                  R                  S   U" XR S 3   5      5      nUR                  S!S U R
                  R*                  R                  R                  S!S :w  a8  [-        UU R
                  R*                  R                  R                  S!S UUSS"9nU R
                  R*                  R                  R                  U5        U R
                  R*                  R                  R                  U" XR S#3   5      5        U R.                  b%  U R.                  R                  U" XR S$3   SS%95        U(       a  U" XR S&3   SS%9nOU" XR S'3   SS%9nUR                  U R0                  R                  :w  aC  ['        U S(S5      (       a  SO['        U S)S5      n[3        UU R
                  R4                  UUUSS*9nU R0                  R                  U5        U R                  R                  R                  U" XR S+3   5      5        U R                  R                  R                  U" XR S,3   5      5        [7        U R8                  [:        R<                  5      (       a  U S-3U;   a  U R8                  R                  R                  S   XR S-3   R                  S.   :X  a`  U R8                  R                  R                  U" XR S/3   5      5        U R8                  R                  R                  U" XR S-3   5      5        [7        U R>                  [@        5      (       Gai  U S03nUS1-   nU R>                  RB                  R                  U" UU S23   SS%95        U R>                  RD                  R                  R                  [F        RH                  " S3 Vs/ s H+  nU" UU U S3   SS%9RK                  S5      RL                  PM-     sn5      5        U R>                  RD                  R                  R                  [F        RH                  " S3 Vs/ s H!  nU" UU U S3   SS%9RO                  S.5      PM#     sn5      5        U R>                  RP                  R                  R                  U" UU S43   SS%9RK                  S5      RL                  5        U R>                  RP                  R                  R                  U" UU S53   SS%9RO                  S.5      5        U R>                  R*                  R                  R                  U" UU S63   5      RK                  S5      5        U R>                  R*                  R                  R                  U" UU S73   5      5        U R>                  R                  R                  R                  U" UU S83   5      5        U R>                  R                  R                  R                  U" UU S93   5      5        [%        S:5       H  n['        U R>                  RR                  S;US-    35      R                  R                  U" UU S<U S3   5      5        ['        U R>                  RR                  S;US-    35      R                  R                  U" UU S<U S3   5      5        M     U(       a  S=OS>u  nnn[        U R"                  RU                  5       5       GH  u  pU S?3U;   a  U S@3nUnO
U SAU S3nSnUSBU S3-   nURV                  R                  R                  U" UU S83   USC95        URV                  R                  R                  U" UU S93   USC95        URX                  RZ                  R                  R                  [F        RH                  " SD Vs/ s H,  nU" UU U S3   SUSE9RK                  S5      RL                  PM.     sn5      5        URX                  RZ                  R                  R                  [F        RH                  " SD Vs/ s H"  nU" UU U S3   SUSE9RO                  S.5      PM$     sn5      5        URX                  R*                  R                  R                  U" UU S63   USC9RK                  S5      5        URX                  R*                  R                  R                  U" UU S73   USC95        UR\                  R                  R                  U" UU SFU S3   USC95        UR\                  R                  R                  U" UU SFU S3   USC95        [%        S:5       H  n['        URR                  S;US-    35      R                  R                  U" UU SGU SHU S3   USC95        ['        URR                  S;US-    35      R                  R                  U" UU SGU SHU S3   USC95        M     GM      gs  snf s  snf s  snf s  snf )JzULoad weights from .npz checkpoints for official Google Brain Flax implementation
    r   NTc                 z  > Ub  X   n T(       aE  U R                  TR                  5      R                  TR                  5      n TR	                  U 5      n U R
                  S:X  aN  U R                  S   U R                  S   s=:X  a  U R                  S   s=:X  a  S:X  a  O  OU R                  5       n U(       ak  U R
                  S:X  a  U R                  / SQ5      n OGU R
                  S:X  a  U R                  / SQ5      n O#U R
                  S:X  a  U R                  SS/5      n [        R                  " U 5      n U $ )N   r   r.   r   )r   r   r   r.   r   )r   r   r.   )r   bfloat16astyper   arrayndimr   flattenr   r   
from_numpy)_wtidxjnpr  	ml_dtypesnps      rO   _n2p_load_weights.<locals>._n2p  s    ?B++,33CKK@B"B77a<BHHQK288A;J"((1+JJBww!|\\,/A\\),A\\1a&)b!	rQ   bilinearFzopt/target/embedding/kernelzopt/target/zparams/embedding/kernelzparams/zparams/img/embedding/kernelzparams/img/backboner  r.   zconv_root/kernelzgn_root/scalezgn_root/biasblockz/unit/r   convz/kernelr  gnz/scalez/biaszconv_proj/kernelzgn_proj/scalezgn_proj/biaszembedding/kernelr   r  r  r  zembedding/biascls)r"  pos_embeddingz(Transformer/posembed_input/pos_embeddingrD  r(  r  r(  r  r  r  zTransformer/encoder_norm/scalezTransformer/encoder_norm/biasz	head/biasr   zhead/kernelz
MAPHead_0/zMultiHeadDotProductAttention_0/probe)keyvaluezquery/kernelz
query/biasz
out/kernelzout/biaszLayerNorm_0/scalezLayerNorm_0/biasr   fczMlpBlock_0/Dense_)r   r   r.   )r.   r   r   z*Transformer/encoderblock/LayerNorm_0/scalezTransformer/encoderblock/zTransformer/encoderblock_MultiHeadDotProductAttention_)r#  )queryr4  r5  )r"  r#  
LayerNorm_	MlpBlock_z/Dense_)TN)/numpy	jax.numpyr%  loadrp  rn  r*  r  r-  r   copy_r4   r   r  rf   r  stagesrv  r  getattr
downsampler  r%   rq  rC  r&   r  rI   rz  rl   r   rx  r   latentkvr   r   r  Tr   r   rr   childrenrk   r:   r  rp   )r  r  r  r  r'  wr  r  
big_visionr*  	stem_onlyr  r  stagejr+  bprembed_conv_wpos_embed_wr(  block_prefix
mha_prefixr  mha_subb_subln1_subr#  r$  r%  r&  s      `                        @@@rO   r  r    sB     * HH_%GGO$MIJ(A-"F&!+FJ*a/"FJu  *--$$--&11	$x(--		/		0@0@0F0Fq0I4PQT\\lRmPnKopq		tA&>$?@A		T!hl$;"<=>%hoo6 )%,, 7HA"85QuQUG1=B"1Xa!eW~6==CCDbTQUVWZ[V[U\\cKdIeDfga!eW~6==CCDbTQSTUXYTYSZZ`KaIbDcda!eW~6;;AA$qB4rRSVWRWQXX]I^G_B`a & ''3((--44::4RDHXBY@Z;[\((--44::4RDBV@W;XY((--2288a2$l@S>T9UV !8 7 A(89:;'""))//2DXEU;V9W4XZ"#%"3"3"8"8"?"?"E"Ebc"JJ+""))//4'
 
!!''5	%%d1x~-F+G&HI"d1xs^#4>?1x}56%@1x'OPQUZ[EOO111!(0@%!H!HAgV[]prsNt,&&00/'
 
OO+&	JJDX-K#L!MNO	JJOO$q8+H!IJKL5::ryy))hi A%JJOO!!!$HI*>(?(E(Eb(II

Q'=%> ?@

d1xy%9#:;<
 %//#677 ,!&EF
$$T!|nE,B*Cu%MN!!''		N^3`N^Dj\!G,-7??BDDN^3` )a 	b%%eiiK[1]K[aDj\!E*+u5==bAK[1] '^ 	_  &&tAL.I,Je'T'\'\]^'_'a'ab$$T!zl*,E*F%%P%X%XY[%\]##))$qJ<z1J/K*L*T*TUV*WX!!''Q*X/F-G(HI##))$qL>AR1S/T*UV!!''Q,?O/P-Q(RSqAEOO''2a!eW6==CCDl^[lmnloovKwIxDyzEOO''2a!eW6;;AA$qL>YjkljmmrIsGtBuv  ,6i9GUGell3356X?@AE$X%>?LC$X%>qcCLC!&CG9A$NN
  a<.8I(J&KQT!UVtA6F&G$HcRS

##EIIWp/rWpRSDj\!G,-C@HHKMMWp/r %s 	t

!!%))Tm-oTmqDj\!E*+u#>FFrJTm-o #p 	q

$$T!zl*,E*FC%P%X%XYZ%[\

""4ZL*A(B#LM  a<.
7)6(R&SY\!]^tAj	&O$PVYZ[qAEIIAE7|,3399Q,ywqcIJPSTVEIIAE7|,1177Q,ywqcGHcRT % 73`1]./r-os   42u.(u 3u%)u*
state_dictc                    0 n/ SQnU R                  5        H  u  pVUR                  U5      (       d  M  UR                  US5      nU H  nUR                  US   US   5      nM     US:X  a;  SnUR                  SS5      n[        R
                  " UR                  S   5      US'   O@US:X  a#  S	nUR                  S5      R                  S5      nOUS
:X  a  UR                  S5      nXcU'   M     U$ )N)
)conv1patch_embed.proj)positional_embeddingrC  )ztransformer.resblocks.ra  )ln_preru  )ln_postr  )ln_r  )in_proj_zqkv.r   r  )zmlp.c_fcmlp.fc1)z
mlp.c_projzmlp.fc2r9   r   r.   r  head.weight	head.biasclass_embeddingrq  rC  )itemsr   replacer   r   zerosr   	unsqueeze)rT  r  r  out_dictswapsr   r   sps           rO   _convert_openai_clipri  (  s    
 HE   "||F##IIfb!B		"Q%A'A  ;AAq!A$)KK
$;H[!##AA((+A+AA! #" OrQ   c                    SS K n0 nU R                  SS 5        SU ;   aR  U R                  S5      US'   U R                  S5      U S   S S 2S4   -   US'   U R                  S5      S S 2SS 24   US'   U R                  5        Hc  u  pEUR                  SU5      (       a  XSUR	                  S	S
5      '   M2  UR                  SU5      (       a  XSUR	                  SS5      '   M_  XSU'   Me     U$ )Nr   
mask_tokenregister_tokensrr  rq  rC  r.   z(blocks\.(\d+)\.mlp\.w12\.(?:weight|bias)w12fc1z'blocks\.(\d+)\.mlp\.w3\.(?:weight|bias)w3r  )repoprb  matchrc  )rT  r  rp  rf  r   r   s         rO   _convert_dinov2rs  N  s     HNN<&J& */@ A *{ ;j>UVWYZVZ>[ [ *{ ;AqrE B  "88?CC01QYYue,-XX@!DD/0QYYtU+, # OrQ   c                 d   0 nU R                  5        H  u  p4UR                  SS5      nUR                  SS5      nUR                  SS5      nUR                  SS5      nUR                  S	S
5      nUR                  SS5      nUR                  SS5      nUR                  SS5      nXBU'   M     U$ )Nnorm_1rk   norm_2rp   zpreprocessor.patchifier.patch_embed.zpreprocessor.pos_embedrC  ztrunk.r9   zpost_trunk_norm.norm.r^  z	mlp.fc1_gzmlp.fc3z	mlp.fc1_x)rb  rc  )rT  r  rf  r   r   s        rO   _convert_aimv2ry  e  s     H  "IIh(IIh(II0.AII.<IIh#II('2IIi-IIi- # OrQ   c                    SSK nU R                  SU 5      n S H  nU R                  US5        M     / SQn0 nU R                  5        HM  u  p6SU;   a  M  U H  u  pxUR	                  XxU5      nM     US:X  a  USS R                  S5      US	'   MI  XeU'   MO     0 0 pUR                  S
5      nUR                  5        H  u  p6UR                  U5      nU(       d  XiU'   M#  UR                  5       u  pnU
R                  X40 5      nUUU'   [        U5      S:X  d  M_  [        R                  " US   US   US   /SS9U	SU SU 3'   M     U	$ )zH
Turn a BEiT-3 checkpoint into a standard VisionTransformer state-dict.
r   Nr  )zbeit3.text_embed.weightzbeit3.vision_embed.mask_token))zbeit3\.r9   )zvision_embed\.cls_tokenrq  )zvision_embed\.rw  )zembed_positions\.z
pos_embed.)z	encoder\.r9   )zlayers\.ra  )zffn_layernorm\.rx  )zffn\.zmlp.)zself_attn_layer_norm\.znorm1.)zself_attn\.zattn.)zfinal_layer_norm\.znorm2.)inner_attn_lnr  r]  )z\.A\..z.B.zpos_embed.weightr   rC  z1blocks\.(\d+)\.attn\.(q|k|v)_proj\.(weight|bias)$r   r   r   r   r   ra  z
.attn.qkv.)rp  rL   rq  rb  subre  compile	fullmatchgroups
setdefaultr  r   r   )rT  r  rp  r   rulestmpr   oldnewoutbufpatr  r  whichkindstashs                    rO   _convert_beit3r  w  s_    4J Jq$ JE  C  "A:HCs#A "" uq1CF # 2
**I
JC		MM!F88:D{B/eu:?3899sU3Zs4!4C'#j/0  JrQ   adapt_layer_scalec           
          SSK n0 nU R                  SU 5      n U R                  SU 5      n SnSU ;   a  [        X5      n OSU ;   a  [        XSS	9n OS
U ;   a  [        X5      n O[	        S U R                  5        5       5      (       a  [        X5      n OSU ;   a  U S   n SnOSU ;   d  SU ;   ac  SnSU ;   aZ  [        UR                  [        R                  5      (       a1  U S   US'   [        R                  " U S   R                  S   5      US'   OSU ;   a  SnOSU ;   a  [        X5      n U(       aI  U R                  5        VV	s0 s H,  u  pUR!                  U5      (       d  M  U[#        U5      S U	_M.     n nn	U R                  5        GHi  u  pSU;   a  UR$                  R&                  R(                  R                  u  pp[#        U	R                  5      S:  a@  UR$                  R&                  R(                  R                  u  ppU	R+                  U
SX5      n	U	R                  S   U:w  d  U	R                  S   U:w  a  [-        U	X4UUSS9n	OUS:X  an  U	R                  S   UR.                  R                  S   :w  aD  [1        USS 5      (       a  SO[1        US!S5      n[3        U	UR$                  R4                  UUUSS"9n	O*U(       a  S#U;   a  UR7                  S$S%U5      nO	S&U;   a  GMe  XU'   GMl     U$ s  sn	nf )'zIconvert patch embedding weight from manual patchify + linear proj to convr   Nr  rT  r9   zvisual.class_embeddingzmodule.visual.class_embeddingzmodule.visual.)r  rk  c              3   ,   #    U  H
  nS U;   v   M     g7f)zbeit3.Nrh   ).0r   s     rO   	<genexpr>'checkpoint_filter_fn.<locals>.<genexpr>  s     6$5qX]$5s   encoderzmodule.zvisual.trunk.pos_embedz"visual.trunk.blocks.0.norm1.weightzvisual.trunk.zvisual.head.proj.weightr_  r`  zmodule.visual.trunk.pos_embedzmodule.visual.trunk.z#preprocessor.patchifier.proj.weightzpatch_embed.proj.weightr  r   r   Tr/  rC  r.   rD  Fr(  r2  gamma_zgamma_([0-9])z
ls\1.gammar  )rp  rL   ri  rs  anykeysr  rI   rz  rl   r   r   rd  r   ry  rb  r   r  rn  r  r   r   r%   rC  r@  r&   r  r}  )rT  r  r  r  r  rp  rf  r  r   r   OIr  r  r(  s                  rO   checkpoint_filter_fnr    s    H4Jj9JF:-)*<
	(J	6)*DTU
		#$Z7
	6JOO$56	6	6#J6
	j	 	*
	!Z	/3W[e3e $
2z%**bii7X7X&01J&KH]#$)KK
;T0U0[0[\]0^$_H[!	(J	6'	.*	<#J6
5?5E5E5G`5GTQ1<<X^K_(aFoq(5G
`  "$)**//66<<JA!177|a"..33::@@
aIIaQ*wwr{a1772;!#3(F"/'  +!''!*0E0Ea0H"H%,U4De%L%LRYZ_atvwRx&**44"3+#A 8q=':AQA #B OG as   K:$K:urlc                 4    U SSS SSS[         [        SSSS	.UE$ )
Nr5  )r   r3  r3  g?bicubicTrW  rz  
apache-2.0)r  r<  
input_size	pool_sizecrop_pctr  fixed_input_sizer   r  
first_conv
classifierlicense)r   r   )r  rN   s     rO   _cfgr    s:    #" '%(  rQ   z*vit_base_patch16_224.augreg2_in21k_ft_in1kztimm/)	hf_hub_idz)vit_base_patch8_224.augreg2_in21k_ft_in1kz)vit_tiny_patch16_224.augreg_in21k_ft_in1kzhttps://storage.googleapis.com/vit_models/augreg/Ti_16-i21k-300ep-lr_0.001-aug_none-wd_0.03-do_0.0-sd_0.0--imagenet2012-steps_20k-lr_0.03-res_224.npz)r  r  custom_loadz)vit_tiny_patch16_384.augreg_in21k_ft_in1kzhttps://storage.googleapis.com/vit_models/augreg/Ti_16-i21k-300ep-lr_0.001-aug_none-wd_0.03-do_0.0-sd_0.0--imagenet2012-steps_20k-lr_0.03-res_384.npz)r     r        ?)r  r  r  r  r  z*vit_small_patch32_224.augreg_in21k_ft_in1kzhttps://storage.googleapis.com/vit_models/augreg/S_32-i21k-300ep-lr_0.001-aug_light1-wd_0.03-do_0.0-sd_0.0--imagenet2012-steps_20k-lr_0.03-res_224.npzz*vit_small_patch32_384.augreg_in21k_ft_in1kzhttps://storage.googleapis.com/vit_models/augreg/S_32-i21k-300ep-lr_0.001-aug_light1-wd_0.03-do_0.0-sd_0.0--imagenet2012-steps_20k-lr_0.03-res_384.npzz*vit_small_patch16_224.augreg_in21k_ft_in1kzhttps://storage.googleapis.com/vit_models/augreg/S_16-i21k-300ep-lr_0.001-aug_light1-wd_0.03-do_0.0-sd_0.0--imagenet2012-steps_20k-lr_0.03-res_224.npzz*vit_small_patch16_384.augreg_in21k_ft_in1kzhttps://storage.googleapis.com/vit_models/augreg/S_16-i21k-300ep-lr_0.001-aug_light1-wd_0.03-do_0.0-sd_0.0--imagenet2012-steps_20k-lr_0.03-res_384.npzz)vit_base_patch32_224.augreg_in21k_ft_in1kzhttps://storage.googleapis.com/vit_models/augreg/B_32-i21k-300ep-lr_0.001-aug_medium1-wd_0.03-do_0.0-sd_0.0--imagenet2012-steps_20k-lr_0.03-res_224.npzz)vit_base_patch32_384.augreg_in21k_ft_in1kzhttps://storage.googleapis.com/vit_models/augreg/B_32-i21k-300ep-lr_0.001-aug_light1-wd_0.1-do_0.0-sd_0.0--imagenet2012-steps_20k-lr_0.03-res_384.npzz)vit_base_patch16_224.augreg_in21k_ft_in1kzhttps://storage.googleapis.com/vit_models/augreg/B_16-i21k-300ep-lr_0.001-aug_medium1-wd_0.1-do_0.0-sd_0.0--imagenet2012-steps_20k-lr_0.01-res_224.npzz)vit_base_patch16_384.augreg_in21k_ft_in1kzhttps://storage.googleapis.com/vit_models/augreg/B_16-i21k-300ep-lr_0.001-aug_medium1-wd_0.1-do_0.0-sd_0.0--imagenet2012-steps_20k-lr_0.01-res_384.npzz(vit_base_patch8_224.augreg_in21k_ft_in1kzhttps://storage.googleapis.com/vit_models/augreg/B_8-i21k-300ep-lr_0.001-aug_medium1-wd_0.1-do_0.0-sd_0.0--imagenet2012-steps_20k-lr_0.01-res_224.npzz*vit_large_patch16_224.augreg_in21k_ft_in1kzhttps://storage.googleapis.com/vit_models/augreg/L_16-i21k-300ep-lr_0.001-aug_medium1-wd_0.1-do_0.1-sd_0.1--imagenet2012-steps_20k-lr_0.01-res_224.npzz*vit_large_patch16_384.augreg_in21k_ft_in1kzhttps://storage.googleapis.com/vit_models/augreg/L_16-i21k-300ep-lr_0.001-aug_medium1-wd_0.1-do_0.1-sd_0.1--imagenet2012-steps_20k-lr_0.01-res_384.npzz'vit_base_patch16_224.orig_in21k_ft_in1kzohttps://github.com/rwightman/pytorch-image-models/releases/download/v0.1-vitjx/jx_vit_base_p16_224-80ecf9dd.pth)r  r  z'vit_base_patch16_384.orig_in21k_ft_in1kzohttps://github.com/rwightman/pytorch-image-models/releases/download/v0.1-vitjx/jx_vit_base_p16_384-83fb41ba.pth)r  r  r  r  z(vit_large_patch32_384.orig_in21k_ft_in1kzphttps://github.com/rwightman/pytorch-image-models/releases/download/v0.1-vitjx/jx_vit_large_p32_384-9b920ba8.pthz!vit_small_patch16_224.augreg_in1kzhttps://storage.googleapis.com/vit_models/augreg/S_16-i1k-300ep-lr_0.001-aug_medium2-wd_0.1-do_0.0-sd_0.0--imagenet2012-steps_20k-lr_0.01-res_224.npzz!vit_small_patch16_384.augreg_in1kzhttps://storage.googleapis.com/vit_models/augreg/S_16-i1k-300ep-lr_0.001-aug_medium2-wd_0.1-do_0.0-sd_0.0--imagenet2012-steps_20k-lr_0.01-res_384.npzz vit_base_patch32_224.augreg_in1kzhttps://storage.googleapis.com/vit_models/augreg/B_32-i1k-300ep-lr_0.001-aug_medium2-wd_0.1-do_0.1-sd_0.1--imagenet2012-steps_20k-lr_0.01-res_224.npzz vit_base_patch32_384.augreg_in1kzhttps://storage.googleapis.com/vit_models/augreg/B_32-i1k-300ep-lr_0.001-aug_medium2-wd_0.1-do_0.1-sd_0.1--imagenet2012-steps_20k-lr_0.01-res_384.npzz vit_base_patch16_224.augreg_in1kzhttps://storage.googleapis.com/vit_models/augreg/B_16-i1k-300ep-lr_0.001-aug_strong2-wd_0.1-do_0.1-sd_0.1--imagenet2012-steps_20k-lr_0.01-res_224.npzz vit_base_patch16_384.augreg_in1kzhttps://storage.googleapis.com/vit_models/augreg/B_16-i1k-300ep-lr_0.001-aug_strong2-wd_0.1-do_0.1-sd_0.1--imagenet2012-steps_20k-lr_0.01-res_384.npzzvit_large_patch14_224.untrained)r  zvit_huge_patch14_224.untrainedzvit_giant_patch14_224.untrainedz"vit_gigantic_patch14_224.untrainedzvit_base_patch32_224.orig_in21k)r  r<  zvit_base_patch16_224.orig_in21kz vit_large_patch32_224.orig_in21kz vit_large_patch16_224.orig_in21kzvit_huge_patch14_224.orig_in21kz!vit_tiny_patch16_224.augreg_in21kzmhttps://storage.googleapis.com/vit_models/augreg/Ti_16-i21k-300ep-lr_0.001-aug_none-wd_0.03-do_0.0-sd_0.0.npziSU  )r  r  r  r<  z"vit_small_patch32_224.augreg_in21kznhttps://storage.googleapis.com/vit_models/augreg/S_32-i21k-300ep-lr_0.001-aug_light1-wd_0.03-do_0.0-sd_0.0.npzz"vit_small_patch16_224.augreg_in21kznhttps://storage.googleapis.com/vit_models/augreg/S_16-i21k-300ep-lr_0.001-aug_light1-wd_0.03-do_0.0-sd_0.0.npzz!vit_base_patch32_224.augreg_in21kzohttps://storage.googleapis.com/vit_models/augreg/B_32-i21k-300ep-lr_0.001-aug_medium1-wd_0.03-do_0.0-sd_0.0.npzz!vit_base_patch16_224.augreg_in21kznhttps://storage.googleapis.com/vit_models/augreg/B_16-i21k-300ep-lr_0.001-aug_medium1-wd_0.1-do_0.0-sd_0.0.npzz vit_base_patch8_224.augreg_in21kzmhttps://storage.googleapis.com/vit_models/augreg/B_8-i21k-300ep-lr_0.001-aug_medium1-wd_0.1-do_0.0-sd_0.0.npzz"vit_large_patch16_224.augreg_in21kznhttps://storage.googleapis.com/vit_models/augreg/L_16-i21k-300ep-lr_0.001-aug_medium1-wd_0.1-do_0.1-sd_0.1.npzzvit_base_patch32_224.sam_in1kz:https://storage.googleapis.com/vit_models/sam/ViT-B_32.npz)r  r  r  zvit_base_patch16_224.sam_in1kz:https://storage.googleapis.com/vit_models/sam/ViT-B_16.npzzvit_small_patch16_224.dinoz[https://dl.fbaipublicfiles.com/dino/dino_deitsmall16_pretrain/dino_deitsmall16_pretrain.pth)r  r  r   r  r<  zvit_small_patch8_224.dinozYhttps://dl.fbaipublicfiles.com/dino/dino_deitsmall8_pretrain/dino_deitsmall8_pretrain.pthzvit_base_patch16_224.dinozWhttps://dl.fbaipublicfiles.com/dino/dino_vitbase16_pretrain/dino_vitbase16_pretrain.pthzvit_base_patch8_224.dinozUhttps://dl.fbaipublicfiles.com/dino/dino_vitbase8_pretrain/dino_vitbase8_pretrain.pthz vit_small_patch14_dinov2.lvd142mzNhttps://dl.fbaipublicfiles.com/dinov2/dinov2_vits14/dinov2_vits14_pretrain.pthr  )r     r  )r  r  r  r   r  r<  r  r  zvit_base_patch14_dinov2.lvd142mzNhttps://dl.fbaipublicfiles.com/dinov2/dinov2_vitb14/dinov2_vitb14_pretrain.pthz vit_large_patch14_dinov2.lvd142mzNhttps://dl.fbaipublicfiles.com/dinov2/dinov2_vitl14/dinov2_vitl14_pretrain.pthz vit_giant_patch14_dinov2.lvd142mzNhttps://dl.fbaipublicfiles.com/dinov2/dinov2_vitg14/dinov2_vitg14_pretrain.pthz%vit_small_patch14_reg4_dinov2.lvd142mzShttps://dl.fbaipublicfiles.com/dinov2/dinov2_vits14/dinov2_vits14_reg4_pretrain.pthz$vit_base_patch14_reg4_dinov2.lvd142mzShttps://dl.fbaipublicfiles.com/dinov2/dinov2_vitb14/dinov2_vitb14_reg4_pretrain.pthz%vit_large_patch14_reg4_dinov2.lvd142mzShttps://dl.fbaipublicfiles.com/dinov2/dinov2_vitl14/dinov2_vitl14_reg4_pretrain.pthz%vit_giant_patch14_reg4_dinov2.lvd142mzShttps://dl.fbaipublicfiles.com/dinov2/dinov2_vitg14/dinov2_vitg14_reg4_pretrain.pthzvit_base_patch16_224_miil.in21kz}https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-tresnet/vit_base_patch16_224_in21k_miil-887286df.pth)rV   rV   rV   )r  r  r  g      ?r)  i+  )r  r  r   r  r  r  r<  z'vit_base_patch16_224_miil.in21k_ft_in1kzhttps://github.com/rwightman/pytorch-image-models/releases/download/v0.1-tresnet/vit_base_patch16_224_1k_miil_84_4-2deb18e3.pth)r  r  r   r  r  r  z vit_base_patch16_rpn_224.sw_in1kz}https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-tpu-weights/vit_base_patch16_rpn_224-sw-3b07e89d.pthz#vit_medium_patch16_gap_240.sw_in12k)r      r  gffffff?i-.  )r  r  r  r<  z+vit_medium_patch16_gap_256.sw_in12k_ft_in1k)r      r  )r  r  r  z+vit_medium_patch16_gap_384.sw_in12k_ft_in1ksquash)r  r  r  	crop_modez%vit_betwixt_patch16_gap_256.untrained)r  r  z"vit_base_patch16_gap_224.untrainedz/vit_base_patch32_clip_224.laion2b_ft_in12k_in1k)r  r   r  z/vit_base_patch32_clip_384.laion2b_ft_in12k_in1k)r  r   r  r  r  z/vit_base_patch32_clip_448.laion2b_ft_in12k_in1k)r     r  z/vit_base_patch16_clip_224.laion2b_ft_in12k_in1k)r  r   r  r  z/vit_base_patch16_clip_384.laion2b_ft_in12k_in1k)r  r   r  r  r  r  z0vit_large_patch14_clip_224.laion2b_ft_in12k_in1kz0vit_large_patch14_clip_336.laion2b_ft_in12k_in1k)r   P  r  z/vit_huge_patch14_clip_224.laion2b_ft_in12k_in1kz/vit_huge_patch14_clip_336.laion2b_ft_in12k_in1kz.vit_base_patch32_clip_224.openai_ft_in12k_in1kr   z.vit_base_patch32_clip_384.openai_ft_in12k_in1kz.vit_base_patch16_clip_224.openai_ft_in12k_in1kz.vit_base_patch16_clip_384.openai_ft_in12k_in1kz/vit_large_patch14_clip_224.openai_ft_in12k_in1kz/vit_large_patch14_clip_336.openai_ft_in12k_in1kz)vit_base_patch32_clip_224.laion2b_ft_in1kz)vit_base_patch16_clip_224.laion2b_ft_in1kz)vit_base_patch16_clip_384.laion2b_ft_in1kz*vit_large_patch14_clip_224.laion2b_ft_in1kz*vit_large_patch14_clip_336.laion2b_ft_in1kz)vit_huge_patch14_clip_224.laion2b_ft_in1kz)vit_huge_patch14_clip_336.laion2b_ft_in1kz(vit_base_patch32_clip_224.openai_ft_in1kz(vit_base_patch16_clip_224.openai_ft_in1kz(vit_base_patch16_clip_384.openai_ft_in1kz)vit_large_patch14_clip_224.openai_ft_in1kz*vit_base_patch16_clip_224.laion2b_ft_in12k)r  r   r  r<  z+vit_large_patch14_clip_224.laion2b_ft_in12k)r  r   r  r  r<  z*vit_huge_patch14_clip_224.laion2b_ft_in12kz)vit_base_patch16_clip_224.openai_ft_in12kz*vit_large_patch14_clip_224.openai_ft_in12kz!vit_base_patch32_clip_224.laion2b   z!vit_base_patch16_clip_224.laion2bz"vit_large_patch14_clip_224.laion2br6  z!vit_huge_patch14_clip_224.laion2b   z"vit_giant_patch14_clip_224.laion2bz%vit_gigantic_patch14_clip_224.laion2b   z'vit_base_patch32_clip_224.laion400m_e32mit)zDnatively QuickGELU, use quickgelu model variant for original results)r  r  notesr   r  r<  z'vit_base_patch16_clip_224.laion400m_e32)r  r  r   r  r  r<  z,vit_base_patch16_plus_clip_240.laion400m_e32  )r  r  r   r  r  r  r<  z(vit_large_patch14_clip_224.laion400m_e32z$vit_base_patch32_clip_224.datacompxlz$vit_base_patch32_clip_256.datacompxl)r  r   r  r  r  r<  z$vit_base_patch16_clip_224.datacompxlz%vit_large_patch14_clip_224.datacompxlzvit_base_patch16_clip_224.dfn2bz
apple-asclz%vit_large_patch14_clip_224.dfn2b_s39bz vit_large_patch14_clip_224.dfn2b)r  r  r  r   r  r  r<  zvit_huge_patch14_clip_224.dfn5bzvit_huge_patch14_clip_378.dfn5b)r   z  r  )r  r   r  r  r  r  r  r<  z-vit_huge_patch14_clip_224.metaclip2_worldwidezcc-by-nc-4.0z-vit_huge_patch14_clip_378.metaclip2_worldwide)r  r  r   r  r  r  r  r<  z1vit_gigantic_patch14_clip_224.metaclip2_worldwidez1vit_gigantic_patch14_clip_378.metaclip2_worldwidez(vit_base_patch32_clip_224.metaclip_2pt5bz(vit_base_patch16_clip_224.metaclip_2pt5bz)vit_large_patch14_clip_224.metaclip_2pt5bz(vit_huge_patch14_clip_224.metaclip_2pt5bz-vit_huge_patch14_clip_224.metaclip_altogetherz,vit_gigantic_patch14_clip_224.metaclip_2pt5bz'vit_base_patch32_clip_224.metaclip_400mz'vit_base_patch16_clip_224.metaclip_400mz(vit_large_patch14_clip_224.metaclip_400mz vit_base_patch32_clip_224.openai)r  r  r   r  r<  z vit_base_patch16_clip_224.openaiz!vit_large_patch14_clip_224.openai)r  r  r   r  r  r<  z!vit_large_patch14_clip_336.openai)r  r  r   r  r  r  r<  z/vit_large_patch14_clip_224.apple_mclip2_dfndr2bz
apple-amlr)r  r<  r   r  r  r  z#vit_base_patch32_plus_256.untrained)r  r  r  z#vit_base_patch16_plus_240.untrainedz$vit_small_patch16_36x1_224.untrainedz$vit_small_patch16_18x2_224.untrainedz#vit_base_patch16_18x2_224.untrainedz)eva_large_patch14_196.in22k_ft_in22k_in1k)r      r  )r  r  r   r  r  r  z)eva_large_patch14_336.in22k_ft_in22k_in1k)r  r  r   r  r  r  r  z#eva_large_patch14_196.in22k_ft_in1kz#eva_large_patch14_336.in22k_ft_in1kzflexivit_small.1200ep_in1kzEhttps://storage.googleapis.com/big_vision/flexivit/flexivit_s_i1k.npz)r  r  r  r  r  zflexivit_small.600ep_in1kzKhttps://storage.googleapis.com/big_vision/flexivit/flexivit_s_i1k_600ep.npzzflexivit_small.300ep_in1kzKhttps://storage.googleapis.com/big_vision/flexivit/flexivit_s_i1k_300ep.npzzflexivit_base.1200ep_in1kzEhttps://storage.googleapis.com/big_vision/flexivit/flexivit_b_i1k.npzzflexivit_base.600ep_in1kzKhttps://storage.googleapis.com/big_vision/flexivit/flexivit_b_i1k_600ep.npzzflexivit_base.300ep_in1kzKhttps://storage.googleapis.com/big_vision/flexivit/flexivit_b_i1k_300ep.npzzflexivit_base.1000ep_in21kzMhttps://storage.googleapis.com/big_vision/flexivit/flexivit_b_i21k_1000ep.npz)r  r  r  r  r  r<  zflexivit_base.300ep_in21kzLhttps://storage.googleapis.com/big_vision/flexivit/flexivit_b_i21k_300ep.npzzflexivit_large.1200ep_in1kzEhttps://storage.googleapis.com/big_vision/flexivit/flexivit_l_i1k.npzzflexivit_large.600ep_in1kzKhttps://storage.googleapis.com/big_vision/flexivit/flexivit_l_i1k_600ep.npzzflexivit_large.300ep_in1kzKhttps://storage.googleapis.com/big_vision/flexivit/flexivit_l_i1k_300ep.npzzflexivit_base.patch16_in21kzIhttps://storage.googleapis.com/big_vision/flexivit/vit_b16_i21k_300ep.npzzflexivit_base.patch30_in21kzIhttps://storage.googleapis.com/big_vision/flexivit/vit_b30_i21k_300ep.npzz!vit_base_patch16_xp_224.untrainedz"vit_large_patch14_xp_224.untrainedz!vit_huge_patch14_xp_224.untrainedzvit_base_patch16_224.maezEhttps://dl.fbaipublicfiles.com/mae/pretrain/mae_pretrain_vit_base.pth)r  r  r  r   r  r<  zvit_large_patch16_224.maezFhttps://dl.fbaipublicfiles.com/mae/pretrain/mae_pretrain_vit_large.pthzvit_huge_patch14_224.maezEhttps://dl.fbaipublicfiles.com/mae/pretrain/mae_pretrain_vit_huge.pthz#vit_huge_patch14_gap_224.in1k_ijepaz?https://dl.fbaipublicfiles.com/ijepa/IN1K-vit.h.14-300e.pth.tar)r  r  r   r  r<  z$vit_huge_patch14_gap_224.in22k_ijepaz@https://dl.fbaipublicfiles.com/ijepa/IN22K-vit.h.14-900e.pth.tarz#vit_huge_patch16_gap_448.in1k_ijepazEhttps://dl.fbaipublicfiles.com/ijepa/IN1K-vit.h.16-448px-300e.pth.tar)r  r  r  r  r   r  r<  z%vit_giant_patch16_gap_224.in22k_ijepaz@https://dl.fbaipublicfiles.com/ijepa/IN22K-vit.g.16-600e.pth.tarz$vit_base_patch32_siglip_256.v2_webli)r  r  r<  z$vit_base_patch16_siglip_224.v2_webliz!vit_base_patch16_siglip_224.webliz$vit_base_patch16_siglip_256.v2_webliz!vit_base_patch16_siglip_256.webliz&vit_base_patch16_siglip_256.webli_i18nz$vit_base_patch16_siglip_384.v2_webliz!vit_base_patch16_siglip_384.webliz$vit_base_patch16_siglip_512.v2_webli)r   r  r  z!vit_base_patch16_siglip_512.webliz%vit_large_patch16_siglip_256.v2_webliz"vit_large_patch16_siglip_256.webliz%vit_large_patch16_siglip_384.v2_webliz"vit_large_patch16_siglip_384.webliz%vit_large_patch16_siglip_512.v2_webliz&vit_so400m_patch14_siglip_224.v2_webliz#vit_so400m_patch14_siglip_224.webliz&vit_so400m_patch14_siglip_378.v2_webliz#vit_so400m_patch14_siglip_378.webliz#vit_so400m_patch14_siglip_384.webliz&vit_so400m_patch16_siglip_256.v2_webliz(vit_so400m_patch16_siglip_256.webli_i18nz&vit_so400m_patch16_siglip_384.v2_webliz&vit_so400m_patch16_siglip_512.v2_webliz(vit_giantopt_patch16_siglip_256.v2_webliz(vit_giantopt_patch16_siglip_384.v2_webliz(vit_base_patch32_siglip_gap_256.v2_webliz(vit_base_patch16_siglip_gap_224.v2_webliz%vit_base_patch16_siglip_gap_224.webliz(vit_base_patch16_siglip_gap_256.v2_webliz%vit_base_patch16_siglip_gap_256.webliz*vit_base_patch16_siglip_gap_256.webli_i18nz(vit_base_patch16_siglip_gap_384.v2_webliz%vit_base_patch16_siglip_gap_384.webliz(vit_base_patch16_siglip_gap_512.v2_webliz%vit_base_patch16_siglip_gap_512.webliz)vit_large_patch16_siglip_gap_256.v2_webliz&vit_large_patch16_siglip_gap_256.webliz)vit_large_patch16_siglip_gap_384.v2_webliz&vit_large_patch16_siglip_gap_384.webliz)vit_large_patch16_siglip_gap_512.v2_webliz*vit_so400m_patch14_siglip_gap_224.v2_webliz'vit_so400m_patch14_siglip_gap_224.webliz*vit_so400m_patch14_siglip_gap_224.pali_mixz)vit_so400m_patch14_siglip_gap_224.pali_ptz-vit_so400m_patch14_siglip_gap_224.pali2_3b_ptz.vit_so400m_patch14_siglip_gap_224.pali2_10b_ptz*vit_so400m_patch14_siglip_gap_378.v2_webliz'vit_so400m_patch14_siglip_gap_378.webliz'vit_so400m_patch14_siglip_gap_384.webliz*vit_so400m_patch14_siglip_gap_448.pali_mixz)vit_so400m_patch14_siglip_gap_448.pali_ptz2vit_so400m_patch14_siglip_gap_448.pali_refcoco_segz-vit_so400m_patch14_siglip_gap_448.pali_ocrvqaz-vit_so400m_patch14_siglip_gap_448.pali2_3b_ptz.vit_so400m_patch14_siglip_gap_448.pali2_10b_ptz0vit_so400m_patch14_siglip_gap_448.pali2_3b_docciz1vit_so400m_patch14_siglip_gap_448.pali2_10b_docciz)vit_so400m_patch14_siglip_gap_896.pali_pt)r     r  z2vit_so400m_patch14_siglip_gap_896.pali_refcoco_segz-vit_so400m_patch14_siglip_gap_896.pali_ocrvqaz-vit_so400m_patch14_siglip_gap_896.pali2_3b_ptz.vit_so400m_patch14_siglip_gap_896.pali2_10b_ptz*vit_so400m_patch16_siglip_gap_256.v2_webliz,vit_so400m_patch16_siglip_gap_256.webli_i18nz*vit_so400m_patch16_siglip_gap_384.v2_webliz*vit_so400m_patch16_siglip_gap_512.v2_webliz,vit_giantopt_patch16_siglip_gap_256.v2_webliz,vit_giantopt_patch16_siglip_gap_384.v2_webliz+vit_so400m_patch14_siglip_378.webli_ft_in1kz/vit_so400m_patch14_siglip_gap_378.webli_ft_in1kz,vit_xsmall_patch16_clip_224.tinyclip_yfcc15m)r  r  r   r  r<  z.vit_medium_patch32_clip_224.tinyclip_laion400mz,vit_medium_patch16_clip_224.tinyclip_yfcc15mz/vit_betwixt_patch32_clip_224.tinyclip_laion400mz%vit_wee_patch16_reg1_gap_256.sbb_in1kz/vit_dwee_patch16_reg1_gap_256.sbb_nadamuon_in1kz&vit_dwee_patch16_reg1_gap_256.sbb_in1kz&vit_pwee_patch16_reg1_gap_256.sbb_in1kz0vit_dpwee_patch16_reg1_gap_256.sbb_nadamuon_in1kz'vit_dpwee_patch16_reg1_gap_256.sbb_in1kz1vit_little_patch16_reg1_gap_256.sbb_in12k_ft_in1kz)vit_little_patch16_reg1_gap_256.sbb_in12k)r  r<  r  r  z(vit_little_patch16_reg4_gap_256.sbb_in1kz2vit_dlittle_patch16_reg1_gap_256.sbb_nadamuon_in1kz(vit_medium_patch16_reg1_gap_256.sbb_in1kz1vit_medium_patch16_reg4_gap_256.sbb_in12k_ft_in1kz(vit_medium_patch16_reg4_gap_256.sbb_in1kz)vit_medium_patch16_reg4_gap_256.sbb_in12kz8vit_mediumd_patch16_reg4_gap_256.sbb2_e200_in12k_ft_in1kz2vit_mediumd_patch16_reg4_gap_256.sbb_in12k_ft_in1kz0vit_mediumd_patch16_reg4_gap_256.sbb2_e200_in12kz*vit_mediumd_patch16_reg4_gap_256.sbb_in12kz8vit_mediumd_patch16_reg4_gap_384.sbb2_e200_in12k_ft_in1kz)vit_betwixt_patch16_reg1_gap_256.sbb_in1kz8vit_betwixt_patch16_reg4_gap_256.sbb2_e200_in12k_ft_in1kz2vit_betwixt_patch16_reg4_gap_256.sbb_in12k_ft_in1kz)vit_betwixt_patch16_reg4_gap_256.sbb_in1kz0vit_betwixt_patch16_reg4_gap_256.sbb2_e200_in12kz*vit_betwixt_patch16_reg4_gap_256.sbb_in12kz8vit_betwixt_patch16_reg4_gap_384.sbb2_e200_in12k_ft_in1kz'vit_base_patch16_reg4_gap_256.untrained)r  z6vit_so150m_patch16_reg4_gap_256.sbb_e250_in12k_ft_in1kz.vit_so150m_patch16_reg4_gap_256.sbb_e250_in12kz6vit_so150m_patch16_reg4_gap_384.sbb_e250_in12k_ft_in1kz)vit_so150m_patch16_reg4_map_256.untrainedz7vit_so150m2_patch16_reg1_gap_256.sbb_e200_in12k_ft_in1kz/vit_so150m2_patch16_reg1_gap_256.sbb_e200_in12kz7vit_so150m2_patch16_reg1_gap_384.sbb_e200_in12k_ft_in1kz7vit_so150m2_patch16_reg1_gap_448.sbb_e200_in12k_ft_in1kz$vit_intern300m_patch14_448.ogvl_distz$vit_intern300m_patch14_448.ogvl_2pt5)r  r   r  r  r  r<  z aimv2_large_patch14_224.apple_pt)r  r   r  r  r  r<  z%aimv2_large_patch14_224.apple_pt_distzaimv2_huge_patch14_224.apple_ptzaimv2_1b_patch14_224.apple_ptzaimv2_3b_patch14_224.apple_ptz aimv2_large_patch14_336.apple_pt)r  r   r  r  r  r  r<  z%aimv2_large_patch14_336.apple_pt_distzaimv2_huge_patch14_336.apple_ptzaimv2_1b_patch14_336.apple_ptzaimv2_3b_patch14_336.apple_ptz aimv2_large_patch14_448.apple_ptzaimv2_huge_patch14_448.apple_ptzaimv2_1b_patch14_448.apple_ptzaimv2_3b_patch14_448.apple_ptztest_vit.r160_in1k)r      r  )r  r   r  r  )r  r  r   r  r  )ztest_vit2.r160_in1kztest_vit3.r160_in1kztest_vit4.r160_in1kz$beit3_base_patch16_224.in22k_ft_in1kz-beit3_base_patch16_224.indomain_in22k_ft_in1kz%beit3_large_patch16_224.in22k_ft_in1kz.beit3_large_patch16_224.indomain_in22k_ft_in1kz!beit3_giant_patch14_224.untrainedz!beit3_giant_patch14_336.untrainedzbeit3_base_patch16_224.ptz"beit3_base_patch16_224.indomain_ptzbeit3_large_patch16_224.ptz#beit3_large_patch16_224.indomain_ptr  	quickgelur  _clip__clip_quickgelu_TIMM_USE_NAFLEXVITfalsetruevariant
pretrained
use_naflex	NaFlexVitc           
      8   Uc  [         nU(       a  SSKJn  U" X40 UD6$ UR                  SS5      nSU ;   a  [	        [
        SSS9nO[
        nUR                  S	S
5      nSU ;   a  UR                  SS 5      S:w  a  Sn[        [        U U4UU[        USS9S.UD6$ )Nr.   )_create_naflexvit_from_classicout_indicesr   flexir)  F)r  r  pretrained_strictTsiglipr=  r?  getter)r  feature_cls)pretrained_filter_fnr  feature_cfg)
_USE_NAFLEX_DEFAULT	naflexvitr  rq  r   r  rL   r/   r8   rm  )r  r  r  rN   r  r  
_filter_fnstricts           rO   _create_vision_transformerr    s     (
=-gLVLL**]A.K' 1W\]
)
 ZZ+T2F7vzz->%G ( [hG  rQ   c           	      L    [        SSSSS9n[        SSU 0[        U40 UD6D6nU$ )zViT-Tiny (Vit-Ti/16)
    r4     r7  r   r:  rA  rF   r>   r  )vit_tiny_patch16_224rm  r  r  rN   
model_argsr  s       rO   r  r    8     s"JJ&s*sX\]gXrkqXrsELrQ   c           	      L    [        SSSSS9n[        SSU 0[        U40 UD6D6nU$ )z$ViT-Tiny (Vit-Ti/16) @ 384x384.
    r4  r  r7  r   r  r  )vit_tiny_patch16_384r  r  s       rO   r  r    r  rQ   c           	      L    [        SSSSS9n[        SSU 0[        U40 UD6D6nU$ )zViT-Small (ViT-S/32)
        r  r7     r  r  )vit_small_patch32_224r  r  s       rO   r  r    8     s"JJ&t:tY]^hYslrYstELrQ   c           	      L    [        SSSSS9n[        SSU 0[        U40 UD6D6nU$ )z%ViT-Small (ViT-S/32) at 384x384.
    r  r  r7  r  r  r  )vit_small_patch32_384r  r  s       rO   r  r    r  rQ   c           	      L    [        SSSSS9n[        SSU 0[        U40 UD6D6nU$ )ViT-Small (ViT-S/16)
    r4  r  r7  r  r  r  )vit_small_patch16_224r  r  s       rO   r  r  (  r  rQ   c           	      L    [        SSSSS9n[        SSU 0[        U40 UD6D6nU$ )r  r4  r  r7  r  r  r  )vit_small_patch16_384r  r  s       rO   r  r  1  r  rQ   c           	      L    [        SSSSS9n[        SSU 0[        U40 UD6D6nU$ )zViT-Small (ViT-S/8)
       r  r7  r  r  r  )vit_small_patch8_224r  r  s       rO   r  r  :  s8     cqIJ&s*sX\]gXrkqXrsELrQ   c           	      L    [        SSSSS9n[        SSU 0[        U40 UD6D6nU$ )zViT-Base (ViT-B/32) from original paper (https://arxiv.org/abs/2010.11929).
ImageNet-1k weights fine-tuned from in21k, source https://github.com/google-research/vision_transformer.
r  r6  r7  r  r  )vit_base_patch32_224r  r  s       rO   r  r  C  8    
 s"KJ&s*sX\]gXrkqXrsELrQ   c           	      L    [        SSSSS9n[        SSU 0[        U40 UD6D6nU$ )zViT-Base model (ViT-B/32) from original paper (https://arxiv.org/abs/2010.11929).
ImageNet-1k weights fine-tuned from in21k @ 384x384, source https://github.com/google-research/vision_transformer.
r  r6  r7  r  r  )vit_base_patch32_384r  r  s       rO   r  r  M  r  rQ   c           	      L    [        SSSSS9n[        SSU 0[        U40 UD6D6nU$ )zViT-Base (ViT-B/16) from original paper (https://arxiv.org/abs/2010.11929).
ImageNet-1k weights fine-tuned from in21k @ 224x224, source https://github.com/google-research/vision_transformer.
r4  r6  r7  r  r  )vit_base_patch16_224r  r  s       rO   r  r  W  r  rQ   c           	      L    [        SSSSS9n[        SSU 0[        U40 UD6D6nU$ )zViT-Base model (ViT-B/16) from original paper (https://arxiv.org/abs/2010.11929).
ImageNet-1k weights fine-tuned from in21k @ 384x384, source https://github.com/google-research/vision_transformer.
r4  r6  r7  r  r  )vit_base_patch16_384r  r  s       rO   r  r  a  r  rQ   c           	      L    [        SSSSS9n[        SSU 0[        U40 UD6D6nU$ )zViT-Base (ViT-B/8) from original paper (https://arxiv.org/abs/2010.11929).
ImageNet-1k weights fine-tuned from in21k @ 224x224, source https://github.com/google-research/vision_transformer.
r  r6  r7  r  r  )vit_base_patch8_224r  r  s       rO   r  r  k  s8    
 crJJ&rrW[\fWqjpWqrELrQ   c           	      L    [        SSSSS9n[        SSU 0[        U40 UD6D6nU$ )znViT-Large model (ViT-L/32) from original paper (https://arxiv.org/abs/2010.11929). No pretrained weights.
    r  r     r4  r  r  )vit_large_patch32_224r  r  s       rO   r  r  u  8     t2LJ&t:tY]^hYslrYstELrQ   c           	      L    [        SSSSS9n[        SSU 0[        U40 UD6D6nU$ )zViT-Large model (ViT-L/32) from original paper (https://arxiv.org/abs/2010.11929).
ImageNet-1k weights fine-tuned from in21k @ 384x384, source https://github.com/google-research/vision_transformer.
r  r  r  r4  r  r  )vit_large_patch32_384r  r  s       rO   r  r  ~  8    
 t2LJ&t:tY]^hYslrYstELrQ   c           	      L    [        SSSSS9n[        SSU 0[        U40 UD6D6nU$ )zViT-Large model (ViT-L/16) from original paper (https://arxiv.org/abs/2010.11929).
ImageNet-1k weights fine-tuned from in21k @ 224x224, source https://github.com/google-research/vision_transformer.
r4  r  r  r  r  )vit_large_patch16_224r  r  s       rO   r   r     r  rQ   c           	      L    [        SSSSS9n[        SSU 0[        U40 UD6D6nU$ )zViT-Large model (ViT-L/16) from original paper (https://arxiv.org/abs/2010.11929).
ImageNet-1k weights fine-tuned from in21k @ 384x384, source https://github.com/google-research/vision_transformer.
r4  r  r  r  r  )vit_large_patch16_384r  r  s       rO   r  r    r  rQ   c           	      L    [        SSSSS9n[        SSU 0[        U40 UD6D6nU$ )zViT-Large model (ViT-L/14)
       r  r  r4  r  r  )vit_large_patch14_224r  r  s       rO   r  r    r  rQ   c           	      L    [        SSSSS9n[        SSU 0[        U40 UD6D6nU$ )zVViT-Huge model (ViT-H/14) from original paper (https://arxiv.org/abs/2010.11929).
    r  r  r  r4  r  r  )vit_huge_patch14_224r  r  s       rO   r  r    s8     t2LJ&s*sX\]gXrkqXrsELrQ   c           	      N    [        SSSSSS9n[        SSU 0[        U40 UD6D6nU$ )	zpViT-Giant (little-g) model (ViT-g/14) from `Scaling Vision Transformers` - https://arxiv.org/abs/2106.04560
    r    tE]t@(   r4  r:  rA  rW   rF   r>   r  )vit_giant_patch14_224r  r  s       rO   r  r    s;     tuBZ\]J&t:tY]^hYslrYstELrQ   c           	      P    [        SSSSSS9n[         SSU 0[        U40 UD6D6nU$ )	zpViT-Gigantic (big-G) model (ViT-G/14) from `Scaling Vision Transformers` - https://arxiv.org/abs/2106.04560
    r    ;;@0   r4  r  r  )vit_gigantic_patch14_224r  r  s       rO   r  r    sG     tuBZ\]J&"Y/9Y=A*=WPV=WYELrQ   c           	      P    [        SSSSSS9n[         SSU 0[        U40 UD6D6nU$ )zViT-Base (ViT-B/16) from original paper (https://arxiv.org/abs/2010.11929).
Weights taken from: https://github.com/Alibaba-MIIL/ImageNet21K
r4  r6  r7  F)r:  rA  rF   r>   r?   r  )vit_base_patch16_224_miilr  r  s       rO   r  r    sG    
 s"UZ[J&#Z0:Z>B:>XQW>XZELrQ   c                 X    [        SSSSSSSSSS9	n[         S
S	U 0[        U40 UD6D6nU$ )zAViT-Medium (ViT-M/16) w/o class token, w/ avg-pool @ 240x240
    r4  r  r7  r  Fr,  r  	r:  rA  rF   r>   rB  r=  r?   rZ   rH  r  )vit_medium_patch16_gap_240r  r  s       rO   r  r    U     B!EtULJ '$[1;[?CJ?YRX?Y[ELrQ   c                 X    [        SSSSSSSSSS9	n[         S
S	U 0[        U40 UD6D6nU$ )zAViT-Medium (ViT-M/16) w/o class token, w/ avg-pool @ 256x256
    r4  r  r7  r  Fr,  r  r  r  )vit_medium_patch16_gap_256r  r  s       rO   r  r    r  rQ   c                 X    [        SSSSSSSSSS9	n[         S
S	U 0[        U40 UD6D6nU$ )zAViT-Medium (ViT-M/16) w/o class token, w/ avg-pool @ 384x384
    r4  r  r7  r  Fr,  r  r  r  )vit_medium_patch16_gap_384r  r  s       rO   r  r    r  rQ   c                 X    [        SSSSSSSSSS9	n[         S
S	U 0[        U40 UD6D6nU$ )zBViT-Betwixt (ViT-b/16) w/o class token, w/ avg-pool @ 256x256
    r4  r  r7  
   Fr,  r  r  r  )vit_betwixt_patch16_gap_256r  r  s       rO   r  r    sU     B"%EtULJ '%\2<\@DZ@ZSY@Z\ELrQ   c           
      T    [        SSSSSSSS9n[         SSU 0[        U40 UD6D6nU$ )	z?ViT-Base (ViT-B/16) w/o class token, w/ avg-pool @ 224x224
    r4  r6  r7  Fr,  r:  rA  rF   r>   rB  r=  rH  r  )vit_base_patch16_gap_224r  r  s       rO   r"  r"    sP     B"%]blqsJ&"Y/9Y=A*=WPV=WYELrQ   c           
      T    [        SSSSSSSS9n[         S	SU 0[        U40 UD6D6nU$ )
z:ViT-Huge model (ViT-H/14) w/ no class token, avg pool
    r  r  r  r4  Fr,  r!  r  )vit_huge_patch14_gap_224r  r  s       rO   r$  r$    P     R25^cmrtJ&"Y/9Y=A*=WPV=WYELrQ   c           
      T    [        SSSSSSSS9n[         SSU 0[        U40 UD6D6nU$ )	zDViT-Huge model (ViT-H/16) w/ no class token, avg pool @ 448x448
    r4  r  r  Fr,  r!  r  )vit_huge_patch16_gap_448r  r  s       rO   r'  r'    r%  rQ   c                 V    [        SSSSSSSSS9n[         S	SU 0[        U40 UD6D6nU$ )
zGViT-Giant (little-gg) model (ViT-g/16) w/ no class token, avg pool
    r4  r	  r  r
  Fr,  r:  rA  rF   r>   rW   rB  r=  rH  r  )vit_giant_patch16_gap_224r  r  s       rO   r*  r*    sR     R2ue=J '#Z0:Z>B:>XQW>XZELrQ   c                 h    [        SSSS[        [        SS9S9n[         S	SU 0[        U40 UD6D6nU$ )
Nr  r  r  Tr   r   rA  rF   r>   rF  rE   r  )vit_xsmall_patch16_clip_224rm  r   r   r  r  s       rO   r.  r.  )  P     2TV]^gmqVrsJ&%\2<\@DZ@ZSY@Z\ELrQ   c                 j    [        SSSSS[        [        SS9S9n[         S
S	U 0[        U40 UD6D6nU$ )Nr  r  r7  r  Tr   r,  r:  rA  rF   r>   rF  rE   r  )vit_medium_patch32_clip_224r/  r  s       rO   r3  r3  2  sV     B!dW^_hnrWsuJ&%\2<\@DZ@ZSY@Z\ELrQ   c                 h    [        SSSS[        [        SS9S9n[         S	SU 0[        U40 UD6D6nU$ )
Nr  r7  r  Tr   r,  r-  r  )vit_medium_patch16_clip_224r/  r  s       rO   r5  r5  <  r0  rQ   c                 j    [        SSSSS[        [        SS9S9n[         S
S	U 0[        U40 UD6D6nU$ )Nr  r  r7  r  Tr   r,  r2  r  )vit_betwixt_patch32_clip_224r/  r  s       rO   r7  r7  E  sV     B"tX_`iosXtvJ&&]3=]AEjA[TZA[]ELrQ   c                 j    [        SSSSS[        [        SS9S9n[         S	SU 0[        U40 UD6D6nU$ )
(ViT-B/32 CLIP image tower @ 224x224
    r  r6  r7  Tr   r,  r2  r  )vit_base_patch32_clip_224r/  r  s       rO   r:  r:  O  V     B"tX_`iosXtvJ&#Z0:Z>B:>XQW>XZELrQ   c                 j    [        SSSSS[        [        SS9S9n[         S	SU 0[        U40 UD6D6nU$ )
z(ViT-B/32 CLIP image tower @ 256x256
    r  r6  r7  Tr   r,  r2  r  )vit_base_patch32_clip_256r/  r  s       rO   r=  r=  Z  r;  rQ   c                 j    [        SSSSS[        [        SS9S9n[         S	SU 0[        U40 UD6D6nU$ )
z(ViT-B/32 CLIP image tower @ 384x384
    r  r6  r7  Tr   r,  r2  r  )vit_base_patch32_clip_384r/  r  s       rO   r?  r?  e  r;  rQ   c                 j    [        SSSSS[        [        SS9S9n[         S	SU 0[        U40 UD6D6nU$ )
z(ViT-B/32 CLIP image tower @ 448x448
    r  r6  r7  Tr   r,  r2  r  )vit_base_patch32_clip_448r/  r  s       rO   rA  rA  p  r;  rQ   c                 j    [        SSSSS[        [        SS9S9n[         S	SU 0[        U40 UD6D6nU$ )
zViT-B/16 CLIP image tower
    r4  r6  r7  Tr   r,  r2  r  )vit_base_patch16_clip_224r/  r  s       rO   rC  rC  {  r;  rQ   c                 j    [        SSSSS[        [        SS9S9n[         S	SU 0[        U40 UD6D6nU$ )
z(ViT-B/16 CLIP image tower @ 384x384
    r4  r6  r7  Tr   r,  r2  r  )vit_base_patch16_clip_384r/  r  s       rO   rE  rE    r;  rQ   c                 j    [        SSSSS[        [        SS9S9n[         S
S	U 0[        U40 UD6D6nU$ )z4ViT-Base (ViT-B/16+) CLIP image tower @ 240x240
    r4  r  r7  r  Tr   r,  r2  r  )vit_base_patch16_plus_clip_240r/  r  s       rO   rG  rG    sV     B"tX_`iosXtvJ&(_5?_CG
C]V\C]_ELrQ   c                 j    [        SSSSS[        [        SS9S9n[         S
S	U 0[        U40 UD6D6nU$ )z0ViT-Large model (ViT-L/14) CLIP image tower
    r  r  r  r4  Tr   r,  r2  r  )vit_large_patch14_clip_224r/  r  s       rO   rI  rI    V     R2Y`ajptYuwJ&$[1;[?CJ?YRX?Y[ELrQ   c                 j    [        SSSSS[        [        SS9S9n[         S
S	U 0[        U40 UD6D6nU$ )z:ViT-Large model (ViT-L/14) CLIP image tower @ 336x336
    r  r  r  r4  Tr   r,  r2  r  )vit_large_patch14_clip_336r/  r  s       rO   rL  rL    rJ  rQ   c                 j    [        SSSSS[        [        SS9S9n[         S
S	U 0[        U40 UD6D6nU$ )z0ViT-Huge model (ViT-H/14) CLIP image tower.
    r  r  r  r4  Tr   r,  r2  r  )vit_huge_patch14_clip_224r/  r  s       rO   rN  rN    V     R2Y`ajptYuwJ&#Z0:Z>B:>XQW>XZELrQ   c                 j    [        SSSSS[        [        SS9S9n[         S
S	U 0[        U40 UD6D6nU$ )z9ViT-Huge model (ViT-H/14) CLIP image tower @ 336x336
    r  r  r  r4  Tr   r,  r2  r  )vit_huge_patch14_clip_336r/  r  s       rO   rQ  rQ    rO  rQ   c                 j    [        SSSSS[        [        SS9S9n[         S
S	U 0[        U40 UD6D6nU$ )z9ViT-Huge model (ViT-H/14) CLIP image tower @ 378x378
    r  r  r  r4  Tr   r,  r2  r  )vit_huge_patch14_clip_378r/  r  s       rO   rS  rS    rO  rQ   c                 l    [        SSSSSS[        [        SS9S	9n[         SS
U 0[        U40 UD6D6nU$ )zViT-Giant (little-g) model (ViT-g/14) from `Scaling Vision Transformers` - https://arxiv.org/abs/2106.04560
Pretrained weights from CLIP image tower.
r  r	  r
  r  r4  Tr   r,  r:  rA  rW   rF   r>   rF  rE   r  )vit_giant_patch14_clip_224r/  r  s       rO   rV  rV    sX    
 bBY]9$/J '$[1;[?CJ?YRX?Y[ELrQ   c                 l    [        SSSSSS[        [        SS9S	9n[         SS
U 0[        U40 UD6D6nU$ )ViT-bigG model (ViT-G/14) from `Scaling Vision Transformers` - https://arxiv.org/abs/2106.04560
Pretrained weights from CLIP image tower.
r  r  r  r  r4  Tr   r,  rU  r  )vit_gigantic_patch14_clip_224r/  r  s       rO   rY  rY    X    
 bBY]9$/J ''^4>^BFzB\U[B\^ELrQ   c                 l    [        SSSSSS[        [        SS9S	9n[         SS
U 0[        U40 UD6D6nU$ )rX  r  r  r  r  r4  Tr   r,  rU  r  )vit_gigantic_patch14_clip_378r/  r  s       rO   r\  r\    rZ  rQ   c                 l    [        SSSSS[        [        SS9SS9n[         S
S	U 0[        U40 UD6D6nU$ )r9  r  r6  r7  Tr   r,  
quick_gelur:  rA  rF   r>   rF  rE   r\   r  )#vit_base_patch32_clip_quickgelu_224r/  r  s       rO   r`  r`    W     B"t9$/<J '-d:DdHLZHb[aHbdELrQ   c                 l    [        SSSSS[        [        SS9SS9n[         S
S	U 0[        U40 UD6D6nU$ )z/ViT-B/16 CLIP image tower w/ QuickGELU act
    r4  r6  r7  Tr   r,  r^  r_  r  )#vit_base_patch16_clip_quickgelu_224r/  r  s       rO   rc  rc  
  ra  rQ   c                 l    [        SSSSS[        [        SS9SS	9n[         SS
U 0[        U40 UD6D6nU$ )zAViT-Large model (ViT-L/14) CLIP image tower w/ QuickGELU act
    r  r  r  r4  Tr   r,  r^  r_  r  )$vit_large_patch14_clip_quickgelu_224r/  r  s       rO   re  re    W     R29$/<J '.e;EeIMjIc\bIceELrQ   c                 l    [        SSSSS[        [        SS9SS	9n[         SS
U 0[        U40 UD6D6nU$ )zKViT-Large model (ViT-L/14) CLIP image tower @ 336x336 w/ QuickGELU act
    r  r  r  r4  Tr   r,  r^  r_  r  )$vit_large_patch14_clip_quickgelu_336r/  r  s       rO   rh  rh  $  rf  rQ   c                 l    [        SSSSS[        [        SS9SS	9n[         SS
U 0[        U40 UD6D6nU$ )zAViT-Huge model (ViT-H/14) CLIP image tower w/ QuickGELU act.
    r  r  r  r4  Tr   r,  r^  r_  r  )#vit_huge_patch14_clip_quickgelu_224r/  r  s       rO   rj  rj  1  W     R29$/<J '-d:DdHLZHb[aHbdELrQ   c                 l    [        SSSSS[        [        SS9SS	9n[         SS
U 0[        U40 UD6D6nU$ )zJViT-Huge model (ViT-H/14) CLIP image tower @ 378x378 w/ QuickGELU act
    r  r  r  r4  Tr   r,  r^  r_  r  )#vit_huge_patch14_clip_quickgelu_378r/  r  s       rO   rm  rm  >  rk  rQ   c                 n    [        SSSSSS[        [        SS9S	S
9n[         SSU 0[        U40 UD6D6nU$ )z/ViT-bigG model (ViT-G/14) w/ QuickGELU act
    r  r  r  r  r4  Tr   r,  r^  )r:  rA  rW   rF   r>   rF  rE   r\   r  )'vit_gigantic_patch14_clip_quickgelu_224r/  r  s       rO   ro  ro  K  s[     bBY]9$/<J '1h>HhLPQ[Lf_eLfhELrQ   c           	      P    [        SSSSSS9n[         SSU 0[        U40 UD6D6nU$ )	zViT-Base (ViT-B/32+)
    r  r  r7  r  r   r:  rA  rF   r>   rZ   r  )vit_base_patch32_plus_256r  r  s       rO   rr  rr  Z  G     s"X\]J&#Z0:Z>B:>XQW>XZELrQ   c           	      P    [        SSSSSS9n[         SSU 0[        U40 UD6D6nU$ )	zViT-Base (ViT-B/16+)
    r4  r  r7  r  r   rq  r  )vit_base_patch16_plus_240r  r  s       rO   ru  ru  d  rs  rQ   c                 `    [        SSSSSSS[        SS9	n[         S	SU 0[        U40 UD6D6nU$ )
z.ViT-Base (ViT-B/16) w/ residual post-norm
    r4  r6  r7  Fr   r,  )	r:  rA  rF   r>   r?   rZ   rB  rZ  r=  r  )vit_base_patch16_rpn_224)rm  r   r  r  s       rO   rw  rw  n  sV     B"uZ^LeEJ '"Y/9Y=A*=WPV=WYELrQ   c           	      P    [        SSSSSS9n[         SSU 0[        U40 UD6D6nU$ )	a  ViT-Base w/ LayerScale + 36 x 1 (36 block serial) config. Experimental, may remove.
Based on `Three things everyone should know about Vision Transformers` - https://arxiv.org/abs/2203.09795
Paper focuses on 24x2 + 48x1 for 'Small' width but those are extremely slow.
r4  r  $   r  r   rq  r  )vit_small_patch16_36x1_224r  r  s       rO   rz  rz  z  sG     s"W[\J&$[1;[?CJ?YRX?Y[ELrQ   c           	      Z    [        SSSSS[        S9n[         SSU 0[        U40 UD6D6nU$ )	a  ViT-Small w/ LayerScale + 18 x 2 (36 block parallel) config. Experimental, may remove.
Based on `Three things everyone should know about Vision Transformers` - https://arxiv.org/abs/2203.09795
Paper focuses on 24x2 + 48x1 for 'Small' width but those are extremely slow.
r4  r     r  r   r:  rA  rF   r>   rZ   rZ  r  )vit_small_patch16_18x2_224rm  r  r  r  s       rO   r~  r~    sM     B!XkmJ&$[1;[?CJ?YRX?Y[ELrQ   c           	      Z    [        SSSSS[        S9n[         SSU 0[        U40 UD6D6nU$ )	zViT-Base w/ LayerScale + 18 x 2 (36 block parallel) config. Experimental, may remove.
Based on `Three things everyone should know about Vision Transformers` - https://arxiv.org/abs/2203.09795
r4  r6  r|  r7  r   r}  r  )vit_base_patch16_18x2_224r  r  s       rO   r  r    sM    
 B"$YlnJ&#Z0:Z>B:>XQW>XZELrQ   c           	      P    [        SSSSSS9n[         SSU 0[        U40 UD6D6nU$ )	zFEVA-large model https://arxiv.org/abs/2211.07636 /via MAE MIM pretrainr  r  r  r4  r,  r:  rA  rF   r>   r=  r  )eva_large_patch14_196r  r  s       rO   r  r    sF     t2Y^_J&V,6V:>z:TV:TVELrQ   c           	      N    [        SSSSSS9n[        SSU 0[        U40 UD6D6nU$ )	zEEVA-large model https://arxiv.org/abs/2211.07636 via MAE MIM pretrainr  r  r  r4  r,  r  r  )eva_large_patch14_336r  r  s       rO   r  r    s;     t2Y^_J&t:tY]^hYslrYstELrQ   c           	      N    [        SSSSSS9n[        SSU 0[        U40 UD6D6nU$ )	zFlexiViT-Small
    r4  r  r7  r  Tr:  rA  rF   r>   rD  r  )flexivit_smallr  r  s       rO   r  r    s;     s"Z^_J&mJmRVWaRlekRlmELrQ   c           	      N    [        SSSSSS9n[        SSU 0[        U40 UD6D6nU$ )zFlexiViT-Base
    r4  r6  r7  Tr  r  )flexivit_baser  r  s       rO   r  r    s;     s"[_`J&l:lQUV`QkdjQklELrQ   c           	      N    [        SSSSSS9n[        SSU 0[        U40 UD6D6nU$ )zFlexiViT-Large
    r4  r  r  Tr  r  )flexivit_larger  r  s       rO   r  r    s;     t2\`aJ&mJmRVWaRlekRlmELrQ   c                 j    [        SSSSSS[        [        SSS9
n[         SSU 0[        U40 UD6D6nU$ )	GViT-Large model (ViT-L/14) w/ parallel blocks and qk norm enabled.
    r4  r6  r7  TF
r:  rA  rF   r>   rF  rD  rE   rZ  r?   r@   r  )vit_base_patch16_xp_224rm  r   r   r  r  s       rO   r  r    sX     B"t\`%9ESWJ '!X.8X<@<Vv<VXELrQ   c                 j    [        SSSSSS[        [        SSS9
n[         S	SU 0[        U40 UD6D6nU$ )
r  r  r  r  r4  TFr  r  )vit_large_patch14_xp_224r  r  s       rO   r  r    sY     R2]a%9ESWJ '"Y/9Y=A*=WPV=WYELrQ   c                 j    [        SSSSSS[        [        SSS9
n[         S	SU 0[        U40 UD6D6nU$ )
zFViT-Huge model (ViT-H/14) w/ parallel blocks and qk norm enabled.
    r  r  r  r4  TFr  r  )vit_huge_patch14_xp_224r  r  s       rO   r  r    sX     R2]a%9ESWJ '!X.8X<@<Vv<VXELrQ   c           	      P    [        SSSSSS9n[         SSU 0[        U40 UD6D6nU$ )	zViT-S/14 for DINOv2
    r  r  r7  r  r   rq  r  )vit_small_patch14_dinov2r  r  s       rO   r  r    sG     s"W[\J&"Y/9Y=A*=WPV=WYELrQ   c           	      P    [        SSSSSS9n[         SSU 0[        U40 UD6D6nU$ )zViT-B/14 for DINOv2
    r  r6  r7  r   rq  r  )vit_base_patch14_dinov2r  r  s       rO   r  r    sF     s"X\]J&!X.8X<@<Vv<VXELrQ   c           	      P    [        SSSSSS9n[         SSU 0[        U40 UD6D6nU$ )	zViT-L/14 for DINOv2
    r  r  r  r4  r   rq  r  )vit_large_patch14_dinov2r  r  s       rO   r  r    sG     t2Y]^J&"Y/9Y=A*=WPV=WYELrQ   c                 z    [        SSSSSS[        [        R                  S9n[	         S	SU 0[        U40 UD6D6nU$ )
ViT-G/14 for DINOv2
    r     r  r  r   h˹WU@)r:  rA  rF   r>   rZ   rW   r]   r\   r  )vit_giant_patch14_dinov2rm  r   rl   SiLUr  r  s       rO   r  r    sV     R24J '"Y/9Y=A*=WPV=WYELrQ   c           
      T    [        SSSSSSSS9n[         S
S	U 0[        U40 UD6D6nU$ )z'ViT-S/14 for DINOv2 w/ 4 registers
    r  r  r7  r  r   r  Tr:  rA  rF   r>   rZ   rE  rD  r  )vit_small_patch14_reg4_dinov2r  r  s       rO   r  r  !  sP     B!TJ ''^4>^BFzB\U[B\^ELrQ   c           
      T    [        SSSSSSSS9n[         S	SU 0[        U40 UD6D6nU$ )
z'ViT-B/14 for DINOv2 w/ 4 registers
    r  r6  r7  r   r  Tr  r  )vit_base_patch14_reg4_dinov2r  r  s       rO   r  r  .  sP     B"$TJ '&]3=]AEjA[TZA[]ELrQ   c           
      T    [        SSSSSSSS9n[         S
S	U 0[        U40 UD6D6nU$ )z'ViT-L/14 for DINOv2 w/ 4 registers
    r  r  r  r4  r   r  Tr  r  )vit_large_patch14_reg4_dinov2r  r  s       rO   r  r  ;  sP     R24TJ ''^4>^BFzB\U[B\^ELrQ   c                 ~    [        SSSSSS[        [        R                  SSS	9
n[	         SS
U 0[        U40 UD6D6nU$ )r  r  r  r  r  r   r  r  T)
r:  rA  rF   r>   rZ   rW   r]   r\   rE  rD  r  )vit_giant_patch14_reg4_dinov2r  r  s       rO   r  r  H  s\     R24[f"''aPTJ ''^4>^BFzB\U[B\^ELrQ   c           
      T    [        SSSSSSSS9n[         S	SU 0[        U40 UD6D6nU$ )
Nr  r6  r7  Fr?  	gelu_tanhr:  rA  rF   r>   rB  r=  r\   r  )vit_base_patch32_siglip_256r  r  s       rO   r  r  Y  sO    B"%]bJ '%\2<\@DZ@ZSY@Z\ELrQ   c           	      R    [        SSSSSSS9n[         SSU 0[        U40 UD6D6nU$ )	Nr4  r6  r7  Fr?  r:  rA  rF   r>   rB  r=  r  )vit_base_patch16_siglip_224r  r  s       rO   r  r  d  L    B"%]bJ '%\2<\@DZ@ZSY@Z\ELrQ   c           	      R    [        SSSSSSS9n[         SSU 0[        U40 UD6D6nU$ )	Nr4  r6  r7  Fr?  r  r  )vit_base_patch16_siglip_256r  r  s       rO   r  r  n  r  rQ   c           	      R    [        SSSSSSS9n[         SSU 0[        U40 UD6D6nU$ )	Nr4  r6  r7  Fr?  r  r  )vit_base_patch16_siglip_384r  r  s       rO   r  r  x  r  rQ   c           	      R    [        SSSSSSS9n[         SSU 0[        U40 UD6D6nU$ )	Nr4  r6  r7  Fr?  r  r  )vit_base_patch16_siglip_512r  r  s       rO   r  r    r  rQ   c           	      R    [        SSSSSSS9n[         SSU 0[        U40 UD6D6nU$ )	Nr4  r  r  Fr?  r  r  )vit_large_patch16_siglip_256r  r  s       rO   r  r    L    R25^cJ '&]3=]AEjA[TZA[]ELrQ   c           	      R    [        SSSSSSS9n[         SSU 0[        U40 UD6D6nU$ )	Nr4  r  r  Fr?  r  r  )vit_large_patch16_siglip_384r  r  s       rO   r  r    r  rQ   c           
      T    [        SSSSSSSS9n[         S	SU 0[        U40 UD6D6nU$ )
Nr4  r  r  Fr?  r  r  r  )vit_large_patch16_siglip_512r  r  s       rO   r  r    sO    R25^cJ '&]3=]AEjA[TZA[]ELrQ   c           
      T    [        SSSSSSSS9n[         S
S	U 0[        U40 UD6D6nU$ )Nr       r4  爅ZӼ@Fr?  r:  rA  rF   r>   rW   rB  r=  r  )vit_so400m_patch14_siglip_224r  r  s       rO   r  r    O    R2]bpuJ ''^4>^BFzB\U[B\^ELrQ   c           
      T    [        SSSSSSSS9n[         S
S	U 0[        U40 UD6D6nU$ )Nr  r  r  r4  r  Fr?  r  r  )vit_so400m_patch14_siglip_378r  r  s       rO   r  r    sQ     R2]bpuJ ''^4>^BFzB\U[B\^ELrQ   c           
      T    [        SSSSSSSS9n[         S
S	U 0[        U40 UD6D6nU$ )Nr  r  r  r4  r  Fr?  r  r  )vit_so400m_patch14_siglip_384r  r  s       rO   r  r    r  rQ   c                 V    [        SSSSSSSSS9n[         S
S	U 0[        U40 UD6D6nU$ )Nr4  r  r  r  Fr?  r  r:  rA  rF   r>   rW   rB  r=  r\   r  )vit_so400m_patch16_siglip_256r  r  s       rO   r  r    R    R2]bpuJ ''^4>^BFzB\U[B\^ELrQ   c                 V    [        SSSSSSSSS9n[         S
S	U 0[        U40 UD6D6nU$ )Nr4  r  r  r  Fr?  r  r  r  )vit_so400m_patch16_siglip_384r  r  s       rO   r  r    r  rQ   c                 V    [        SSSSSSSSS9n[         S
S	U 0[        U40 UD6D6nU$ )Nr4  r  r  r  Fr?  r  r  r  )vit_so400m_patch16_siglip_512r  r  s       rO   r  r    r  rQ   c           
      T    [        SSSSSSSS9n[         S	SU 0[        U40 UD6D6nU$ )
Nr4  r  r  Fr?  r  r  r  )vit_giantopt_patch16_siglip_256r  r  s       rO   r  r    O    R25^cJ ')`6@`DHD^W]D^`ELrQ   c           
      T    [        SSSSSSSS9n[         S	SU 0[        U40 UD6D6nU$ )
Nr4  r  r  Fr?  r  r  r  )vit_giantopt_patch16_siglip_384r  r  s       rO   r  r    r  rQ   c                 V    [        SSSSSSSSS9n[         S	SU 0[        U40 UD6D6nU$ )
Nr  r6  r7  Fr,  r  r:  rA  rF   r>   rB  r=  rH  r\   r  )vit_base_patch32_siglip_gap_256r  r  s       rO   r  r    sR    B"%]blqJ ')`6@`DHD^W]D^`ELrQ   c           
      T    [        SSSSSSSS9n[         SSU 0[        U40 UD6D6nU$ )	]A SigLIP variant of ViT with global average pooling (GAP) instead of attention pooling (MAP).r4  r6  r7  Fr,  r!  r  )vit_base_patch16_siglip_gap_224r  r  s       rO   r  r    Q     B"%]blqJ ')`6@`DHD^W]D^`ELrQ   c           
      T    [        SSSSSSSS9n[         SSU 0[        U40 UD6D6nU$ )	r  r4  r6  r7  Fr,  r!  r  )vit_base_patch16_siglip_gap_256r  r  s       rO   r  r    r  rQ   c           
      T    [        SSSSSSSS9n[         SSU 0[        U40 UD6D6nU$ )	r  r4  r6  r7  Fr,  r!  r  )vit_base_patch16_siglip_gap_384r  r  s       rO   r  r  "  r  rQ   c           
      T    [        SSSSSSSS9n[         SSU 0[        U40 UD6D6nU$ )	r  r4  r6  r7  Fr,  r!  r  )vit_base_patch16_siglip_gap_512r  r  s       rO   r  r  -  r  rQ   c           
      T    [        SSSSSSSS9n[         SSU 0[        U40 UD6D6nU$ )	r  r4  r  r  Fr,  r!  r  ) vit_large_patch16_siglip_gap_256r  r  s       rO   r  r  8  Q     R25^cmrJ '*a7AaEI*E_X^E_aELrQ   c           
      T    [        SSSSSSSS9n[         SSU 0[        U40 UD6D6nU$ )	r  r4  r  r  Fr,  r!  r  ) vit_large_patch16_siglip_gap_384r  r  s       rO   r  r  C  r  rQ   c                 V    [        SSSSSSSSS9n[         S	SU 0[        U40 UD6D6nU$ )
Nr4  r  r  Fr,  r  r  r  ) vit_large_patch16_siglip_gap_512r  r  s       rO   r  r  N  sP    R255KJ '*a7AaEI*E_X^E_aELrQ   c                 V    [        SSSSSSSSS9n[         S
S	U 0[        U40 UD6D6nU$ )r  r  r  r  r4  r  Fr,  r)  r  )!vit_so400m_patch14_siglip_gap_224r  r  s       rO   r  r  Y  R     R2ueJ '+b8BbFJ:F`Y_F`bELrQ   c                 V    [        SSSSSSSSS9n[         S
S	U 0[        U40 UD6D6nU$ )r  r  r  r  r4  r  Fr,  r)  r  )!vit_so400m_patch14_siglip_gap_378r  r  s       rO   r  r  e  r  rQ   c                 V    [        SSSSSSSSS9n[         S
S	U 0[        U40 UD6D6nU$ )r  r  r  r  r4  r  Fr,  r)  r  )!vit_so400m_patch14_siglip_gap_384r  r  s       rO   r  r  q  r  rQ   c                 V    [        SSSSSSSSS9n[         S
S	U 0[        U40 UD6D6nU$ )r  r  r  r  r4  r  Fr,  r)  r  )!vit_so400m_patch14_siglip_gap_448r  r  s       rO   r  r  }  r  rQ   c                 V    [        SSSSSSSSS9n[         S
S	U 0[        U40 UD6D6nU$ )r  r  r  r  r4  r  Fr,  r)  r  )!vit_so400m_patch14_siglip_gap_896r  r  s       rO   r  r    r  rQ   c                 X    [        SSSSSSSSSS9	n[         S
S	U 0[        U40 UD6D6nU$ )r  r4  r  r  r  Fr,  r  	r:  rA  rF   r>   rW   rB  r=  rH  r\   r  )!vit_so400m_patch16_siglip_gap_256r  r  s       rO   r  r    sT     R2ue{J '+b8BbFJ:F`Y_F`bELrQ   c                 X    [        SSSSSSSSSS9	n[         S
S	U 0[        U40 UD6D6nU$ )Nr4  r  r  r  Fr,  r  r  r  )!vit_so400m_patch16_siglip_gap_384r  r  s       rO   r  r    S    R2]b5KJ '+b8BbFJ:F`Y_F`bELrQ   c                 X    [        SSSSSSSSSS9	n[         S
S	U 0[        U40 UD6D6nU$ )Nr4  r  r  r  Fr,  r  r  r  )!vit_so400m_patch16_siglip_gap_512r  r  s       rO   r   r     r  rQ   c                 V    [        SSSSSSSSS9n[         S	SU 0[        U40 UD6D6nU$ )
Nr4  r  r  Fr,  r  r  r  )#vit_giantopt_patch16_siglip_gap_256r  r  s       rO   r  r    P    R255KJ '-d:DdHLZHb[aHbdELrQ   c                 V    [        SSSSSSSSS9n[         S	SU 0[        U40 UD6D6nU$ )
Nr4  r  r  Fr,  r  r  r  )#vit_giantopt_patch16_siglip_gap_384r  r  s       rO   r  r    r  rQ   c                 Z    [        SSSSSSSSS	S
S9
n[         SSU 0[        U40 UD6D6nU$ )Nr4  r  r  r  r      FTr.   r,  
r:  rA  rF   r>   rZ   rW   rB  rD  rE  r=  r  )vit_wee_patch16_reg1_gap_256r  r  s       rO   r	  r	    sU    B!YZ$1%J '&]3=]AEjA[TZA[]ELrQ   c                 \    [        SSSSSSSSS	S
SS9n[         SSU 0[        U40 UD6D6nU$ )Nr4  r  r  r  r   r  FTr.   r,  r;   r:  rA  rF   r>   rZ   rW   rB  rD  rE  r=  r<   r  )vit_dwee_patch16_reg1_gap_256r  r  s       rO   r  r    sX    B!YZ$1%\bJ ''^4>^BFzB\U[B\^ELrQ   c                 d    [        SSSSSSSSSS	[        S
9n[         SSU 0[        U40 UD6D6nU$ )Nr4  r  r  r   r  FTr.   r,  r:  rA  rF   r>   rZ   rW   rB  rD  rE  r=  rZ  r  )vit_pwee_patch16_reg1_gap_256)rm  r   r  r  s       rO   r  r    sX    B!YZ$1%ZnJ ''^4>^BFzB\U[B\^ELrQ   c                 d    [        SSSSSSSSSS	[        S
9n[         SSU 0[        U40 UD6D6nU$ )Nr4  r  r  r   r  FTr.   r,  r  r  )vit_dpwee_patch16_reg1_gap_256)rm  r   r  r  s       rO   r  r    sX    B!YZ$1%ZrJ '(_5?_CG
C]V\C]_ELrQ   c                 Z    [        SSSSSSSSS	S
S9
n[         SSU 0[        U40 UD6D6nU$ )Nr4  @  r  r  r   ffffff@FTr.   r,  r  r  )vit_little_patch16_reg1_gap_256r  r  s       rO   r  r    U    B!Y\$1%J ')`6@`DHD^W]D^`ELrQ   c                 \    [        SSSSSSSSS	S
SS9n[         SSU 0[        U40 UD6D6nU$ )Nr4  r  r  r  r   r  FTr.   r,  r;   r  r  ) vit_dlittle_patch16_reg1_gap_256r  r  s       rO   r  r    sX    B!Y\$1%\bJ '*a7AaEI*E_X^E_aELrQ   c                 Z    [        SSSSSSSSS	S
S9
n[         SSU 0[        U40 UD6D6nU$ )Nr4  r  r  r  r   r  FTr  r,  r  r  )vit_little_patch16_reg4_gap_256r  r  s       rO   r  r    r  rQ   c                 X    [        SSSSSSSSS	S
9	n[         SSU 0[        U40 UD6D6nU$ )Nr4  r  r7  r  r   FTr.   r,  	r:  rA  rF   r>   rZ   rB  rD  rE  r=  r  )vit_medium_patch16_reg1_gap_256r  r  s       rO   r  r    R    B!$1%J ')`6@`DHD^W]D^`ELrQ   c                 X    [        SSSSSSSSS	S
9	n[         SSU 0[        U40 UD6D6nU$ )Nr4  r  r7  r  r   FTr  r,  r  r  )vit_medium_patch16_reg4_gap_256r  r  s       rO   r   r   %  r  rQ   c                 X    [        SSSSSSSSS	S
9	n[         SSU 0[        U40 UD6D6nU$ )Nr4  r     r  r   FTr  r,  r  r  ) vit_mediumd_patch16_reg4_gap_256r  r  s       rO   r#  r#  0  R    B!$1%J '*a7AaEI*E_X^E_aELrQ   c                 X    [        SSSSSSSSS	S
9	n[         SSU 0[        U40 UD6D6nU$ )Nr4  r  r"  r  r   FTr  r,  r  r  ) vit_mediumd_patch16_reg4_gap_384r  r  s       rO   r&  r&  ;  r$  rQ   c                 X    [        SSSSSSSSS	S
9	n[         SSU 0[        U40 UD6D6nU$ )Nr4  r  r7  r  r   FTr.   r,  r  r  ) vit_betwixt_patch16_reg1_gap_256r  r  s       rO   r(  r(  F  R    B"$$1%J '*a7AaEI*E_X^E_aELrQ   c                 X    [        SSSSSSSSS	S
9	n[         SSU 0[        U40 UD6D6nU$ )Nr4  r  r7  r  r   FTr  r,  r  r  ) vit_betwixt_patch16_reg4_gap_256r  r  s       rO   r+  r+  Q  r)  rQ   c                 X    [        SSSSSSSSS	S
9	n[         SSU 0[        U40 UD6D6nU$ )Nr4  r  r7  r  r   FTr  r,  r  r  ) vit_betwixt_patch16_reg4_gap_384r  r  s       rO   r-  r-  \  r)  rQ   c                 V    [        SSSSSSSSS9n[         S
S	U 0[        U40 UD6D6nU$ )Nr4  r6  r7  FTr,  r  )r:  rA  rF   r>   rB  rD  r=  rE  r  )vit_base_patch16_reg4_gap_256r  r  s       rO   r/  r/  g  sP    B"%1J ''^4>^BFzB\U[B\^ELrQ   c                 V    [        SSSSSSSSS	9n[         SS
U 0[        U40 UD6D6nU$ )ESO150M (shape optimized, but diff than paper def, optimized for GPU) r4  r  r|  r  ~jt@Fr  r?  )r:  rA  rF   r>   rW   rB  rE  r=  r  )vit_so150m_patch16_reg4_map_256r  r  s       rO   r3  r3  r  sR     B"aUJ ')`6@`DHD^W]D^`ELrQ   c                 X    [        SSSSSSSSSS	9	n[         SS
U 0[        U40 UD6D6nU$ )r1  r4  r  r|  r  r2  Fr  r,  	r:  rA  rF   r>   rW   rB  rE  r=  rH  r  )vit_so150m_patch16_reg4_gap_256r  r  s       rO   r6  r6  ~  T     B"aUEJ ')`6@`DHD^W]D^`ELrQ   c                 X    [        SSSSSSSSSS	9	n[         SS
U 0[        U40 UD6D6nU$ )r1  r4  r  r|  r  r2  Fr  r,  r5  r  )vit_so150m_patch16_reg4_gap_384r  r  s       rO   r9  r9    r7  rQ   c                 Z    [        SSSSSSSSSS	S
9
n[         SSU 0[        U40 UD6D6nU$ )HSO150M v2 (shape optimized, but diff than paper def, optimized for GPU) r4  @        NN@r   Fr.   r,  
r:  rA  rF   r>   rW   rZ   r?   rB  rE  r=  r  ) vit_so150m2_patch16_reg1_gap_256r  r  s       rO   rA  rA    W     B"[_EaUJ '*a7AaEI*E_X^E_aELrQ   c                 Z    [        SSSSSSSSSS	S
9
n[         SSU 0[        U40 UD6D6nU$ )r;  r4  r<  r=  r>  r?  r   Fr.   r,  r@  r  ) vit_so150m2_patch16_reg1_gap_384r  r  s       rO   rD  rD    rB  rQ   c                 Z    [        SSSSSSSSSS	S
9
n[         SSU 0[        U40 UD6D6nU$ )r;  r4  r<  r=  r>  r?  r   Fr.   r,  r@  r  ) vit_so150m2_patch16_reg1_gap_448r  r  s       rO   rF  rF    rB  rQ   c           
      T    [        SSSSSSSS9n[         S
S	U 0[        U40 UD6D6nU$ )Nr  r  r  r4  r   FT)r:  rA  rF   r>   rZ   rG  r2  r  )vit_intern300m_patch14_448r  r  s       rO   rH  rH    sN    R2EDJ '$[1;[?CJ?YRX?Y[ELrQ   c                     [        SSSSSSSSSSS[        [        S	S
9[        [        S	S
9[        S9n[	         SSU 0[        U40 UD6D6nU$ )ViT Large AIM-v2 model
    r  r  r  r  F      @r,  silur   r,  r:  rA  rF   r>   rB  rH  rW   r=  r?   rB   r\   rE   rY  r]   r  )aimv2_large_patch14_224rm  r   r   r   r  r  s       rO   rN  rN    p     R1%Y^EEUV\7-UY@ZflJ
 '!X.8X<@<Vv<VXELrQ   c                     [        SSSSSSSSSSS[        [        S	S
9[        [        S	S
9[        S9n[	         SSU 0[        U40 UD6D6nU$ )ViT Huge AIM-v2 model
    r  r  r  r7  FAfU@r,  rL  r   r,  rM  r  )aimv2_huge_patch14_224rO  r  s       rO   rT  rT    sp    
 R25Z_eeuX^7-UY@ZflJ
 ' W-7W;?
;Uf;UWELrQ   c                     [        SSSSSSSSSSS[        [        S	S
9[        [        S	S
9[        S9n[	         SSU 0[        U40 UD6D6nU$ )ViT 1B AIM-v2 model
    r     r  r4  FrK  r,  rL  r   r,  rM  r  )aimv2_1b_patch14_224rO  r  s       rO   rX  rX    p     R25Z_EEUV\7-UY@ZflJ
 'U+5U9=j9SF9SUELrQ   c                     [        SSSSSSSSSSS[        [        SS	9[        [        SS	9[        S
9n[	         SSU 0[        U40 UD6D6nU$ )ViT 3B AIM-v2 model
    r     r  FrS  r,  rL  r   r,  rM  r  )aimv2_3b_patch14_224rO  r  s       rO   r]  r]    p     R25Z_eeuX^7-UY@ZflJ
 'U+5U9=j9SF9SUELrQ   c                     [        SSSSSSSSSSS[        [        S	S
9[        [        S	S
9[        S9n[	         SSU 0[        U40 UD6D6nU$ )rJ  r  r  r  r  FrK  r,  rL  r   r,  rM  r  )aimv2_large_patch14_336rO  r  s       rO   r`  r`    rP  rQ   c                     [        SSSSSSSSSSS[        [        S	S
9[        [        S	S
9[        S9n[	         SSU 0[        U40 UD6D6nU$ )rR  r  r  r  r7  FrS  r,  rL  r   r,  rM  r  )aimv2_huge_patch14_336rO  r  s       rO   rb  rb    p     R25Z_eeuX^7-UY@ZflJ
 ' W-7W;?
;Uf;UWELrQ   c                     [        SSSSSSSSSSS[        [        S	S
9[        [        S	S
9[        S9n[	         SSU 0[        U40 UD6D6nU$ )rV  r  rW  r  r4  FrK  r,  rL  r   r,  rM  r  )aimv2_1b_patch14_336rO  r  s       rO   re  re    rY  rQ   c                     [        SSSSSSSSSSS[        [        SS	9[        [        SS	9[        S
9n[	         SSU 0[        U40 UD6D6nU$ )r[  r  r\  r  FrS  r,  rL  r   r,  rM  r  )aimv2_3b_patch14_336rO  r  s       rO   rg  rg  (  r^  rQ   c                     [        SSSSSSSSSSS[        [        S	S
9[        [        S	S
9[        S9n[	         SSU 0[        U40 UD6D6nU$ )rJ  r  r  r  r  FrK  r,  rL  r   r,  rM  r  )aimv2_large_patch14_448rO  r  s       rO   ri  ri  6  rP  rQ   c                     [        SSSSSSSSSSS[        [        S	S
9[        [        S	S
9[        S9n[	         SSU 0[        U40 UD6D6nU$ )rR  r  r  r  r7  FrS  r,  rL  r   r,  rM  r  )aimv2_huge_patch14_448rO  r  s       rO   rk  rk  D  rc  rQ   c                     [        SSSSSSSSSSS[        [        S	S
9[        [        S	S
9[        S9n[	         SSU 0[        U40 UD6D6nU$ )rV  r  rW  r  r4  FrK  r,  rL  r   r,  rM  r  )aimv2_1b_patch14_448rO  r  s       rO   rm  rm  R  rY  rQ   c                     [        SSSSSSSSSSS[        [        SS	9[        [        SS	9[        S
9n[	         SSU 0[        U40 UD6D6nU$ )r[  r  r\  r  FrS  r,  rL  r   r,  rM  r  )aimv2_3b_patch14_448rO  r  s       rO   ro  ro  `  r^  rQ   c           	      P    [        SSSSSSS9n[        S	SU 0[        U40 UD6D6nU$ )
ViT Test
    r4  @   r  r   r   T)r:  rA  rF   r>   rW   r2  r  )test_vitr  r  s       rO   rs  rs  n  s=     raSTgklJ&gjgDQ[Lf_eLfgELrQ   c                 X    [        SSSSSSSSS	S
S9
n[        SSU 0[        U40 UD6D6nU$ )rq  r4  rr  r  r   r   Fr.   r,  r   T)
r:  rA  rF   r>   rW   rB  rE  r=  rZ   r2  r  )	test_vit2r  r  s       rO   ru  ru  w  sK     1QaU_ceJ 'hzhTR\Mg`fMghELrQ   c                 X    [        SSSSSSSSS	S
S9
n[        SSU 0[        U40 UD6D6nU$ )rq  r4  `   	   r   r   Fr.   r?  Tr   )
r:  rA  rF   r>   rW   rB  rE  r=  rI  rZ   r  )	test_vit3r  r  s       rO   ry  ry    sL     1QaUPTbfhJ 'hzhTR\Mg`fMghELrQ   c                 Z    [        SSSSSSSSSS	S
S9n[        SSU 0[        U40 UD6D6nU$ )rq  r4  rw  rx  r   Fr.   r,  r   Trmsnorm)r:  rA  rF   r>   rW   rB  rE  r=  rZ   r2  rE   r  )	test_vit4r  r  s       rO   r|  r|    sM     1QaU_cJ
 'hzhTR\Mg`fMghELrQ   c                 p    [        SSSSSSSSS[        [        SS9S	9
n[        SS
U 0[        U40 UD6D6nU$ )zlBEiT3 Base model (ViT-Base size) with patch size 16x16.
Remapped to VisionTransformer with scale_norm=True.
r4  r6  r7  r  Tr,  r   r,  
r:  rA  rF   r>   rW   rX   rY   rB  r=  rE   r  )beit3_base_patch16_224r/  r  s       rO   r  r    sS    
 B"TtQV9$/J
 'uJuZ^_iZtmsZtuELrQ   c                 p    [        SSSSSSSSS[        [        SS9S	9
n[        SS
U 0[        U40 UD6D6nU$ )znBEiT3 Large model (ViT-Large size) with patch size 16x16.
Remapped to VisionTransformer with scale_norm=True.
r4  r  r  r  Tr,  r   r,  r~  r  )beit3_large_patch16_224r/  r  s       rO   r  r    sS    
 R2TtQV9$/J
 'vZv[_`j[unt[uvELrQ   c                 p    [        SSSSSSSSS[        [        SS	9S
9
n[        SSU 0[        U40 UD6D6nU$ )z]BEiT3 Giant model with patch size 14x14.
Remapped to VisionTransformer with scale_norm=True.
r  r	  r  r4  8mt@Tr,  r   r,  r~  r  )beit3_giant_patch14_224r/  r  s       rO   r  r    sS    
 R2TtQV9$/J
 'vZv[_`j[unt[uvELrQ   c                 r    [        SSSSSSSSSS[        [        S	S
9S9n[        SSU 0[        U40 UD6D6nU$ )ztBEiT3 Giant model with patch size 14x14 and image size 336x336.
Remapped to VisionTransformer with scale_norm=True.
r  r  r	  r  r4  r  Tr,  r   r,  )r9  r:  rA  rF   r>   rW   rX   rY   rB  r=  rE   r  )beit3_giant_patch14_336r/  r  s       rO   r  r    sV    
 t2W]TtQV9$/J
 'vZv[_`j[unt[uvELrQ   vit_tiny_patch16_224_in21kvit_small_patch32_224_in21kvit_small_patch16_224_in21kvit_base_patch32_224_in21kvit_base_patch16_224_in21kvit_base_patch8_224_in21kvit_large_patch32_224_in21kvit_large_patch16_224_in21kvit_huge_patch14_224_in21kvit_base_patch32_224_samzvit_base_patch32_224.samvit_base_patch16_224_samzvit_base_patch16_224.samvit_small_patch16_224_dinovit_small_patch8_224_dinovit_base_patch16_224_dinovit_base_patch8_224_dinovit_base_patch16_224_miil_in21k!vit_base_patch32_224_clip_laion2b)"vit_large_patch14_224_clip_laion2b!vit_huge_patch14_224_clip_laion2b"vit_giant_patch14_224_clip_laion2b)FFFTrV   rV   Nr   )r+  r.   Fr  )r9   rV   T)rT  rV   T)r.   rh   r  F)r9   F)zvisual.)Fr  Tr  )FNr  (  r   copyloggingr   oscollectionsr   	functoolsr   typingr   r   r   r   r	   r
   r   r   r   r   ImportErrortyping_extensionsr   torch.nnrl   torch.nn.functional
functionalr   	torch.jitr   	timm.datar   r   r   r   r   r   timm.layersr   r   r   r   r   r   r   r   r   r   r    r!   r"   r#   r$   r%   r&   r'   r(   r)   r*   r+   r,   r-   _builderr/   	_featuresr0   _manipulater1   r2   r3   r4   	_registryr5   r6   r7   __all__	getLoggerr   _loggerrK   rq   r   r   r   rP   rS   r   r   r   r  r   rJ   r0  r8   r  r  r  r
  r  r  r  r  ri  rs  ry  rm  r  r  r  default_cfgsrb  rL   _quick_gelu_cfgsr  deepcopycrc  environlowerr  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r   r  r  r  r  r  r  r  r  r  r  r"  r$  r'  r*  r.  r3  r5  r7  r:  r=  r?  rA  rC  rE  rG  rI  rL  rN  rQ  rS  rV  rY  r\  r`  rc  re  rh  rj  rm  ro  rr  ru  rw  rz  r~  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r   r  r  r	  r  r  r  r  r  r  r  r   r#  r&  r(  r+  r-  r/  r3  r6  r9  rA  rD  rF  rH  rN  rT  rX  r]  r`  rb  re  rg  ri  rk  rm  ro  rs  ru  ry  r|  r  r  r  r  )r  r  s   00rO   <module>r     s@  2    	 #  O O O*      
      4 + + R R Y Y
 

H
% 	  04!!! ! 	!
 ! ! ! ! ! T"))_-! ! YY!HVBII VrH299 HV@299 @Fkryy k\V")) Vv !!"&+	<<   $	2O			 O	d"")) "3 "$ "Z^ "(  	"		"" " 	"
 
"@"")) "3 "$ "Z^ "."")) "3 "RV "bf "

Gs 
Gu 
GQU 
Gai 
G  "#"$&LL  c3h	
   \\2 WT* WTS WT# WTcg WTtx WT WTz  #ell*+# # # 
#u||
	#Lell*+  
#u||
.ell*+  
#u||
$9t 9~ #(&Lell*+L L  L 	L
 L 
#u||
L^c T#s(^ "| 1$3|
 02| 0 d2| 0 d]S2B|" 1$ e3#|* 1$ e]S3B+|2 1$ e33|: 1$ e]S3B;|B 0 f2C|J 0 d]S2BK|R 0 e2S|Z 0 e]S2B[|b / d1c|j 1$ e3k|r 1$ e]S3Bs|~ .t}0|D .t} 300E|L /~ 310M|X ( d*Y|` ( d]S*Ba|h ' d)i|p ' d]S)Bq|x ' d)y|@ ' d]S)BA|J &t|K|L %drlM|N &t|O|P )$2,Q|V &t(W|^ &t(_|f ')g|n ')o|v &t(w|@ ({e*-A|H )$|e+-I|P )$|e+-Q|X (}e*-Y|` (|e*-a|h '{e)-i|p )$|e+-q|| $THVZ&}|B $THVZ&C|L !$i"(<!#MM|T  g"(<!"MU|\  e"(<!"M]|d c"(<!!Me|p '\"(<! 3)0q|| &t\"(<! 3(0}|H '\"(<! 3)0I|T '\"(<! 3)0U|d ,Ta"(<! 3.0e|p +Da"(<! 3-0q|| ,Ta"(<! 3.0}|H ,Ta"(<! 3.0I|X &t L|e:ch(jY|` .t N|e:0Wa|l ' L)m|r *4 4U,Ds|x 24 441y|~ 24 484E|D ,T 4.1E|H )$&I|N 6t?84O|T 6t?S]8\U|Z 6t?S]8\[|` 6t?T8Ca|f 6t?(8Dg|n 7$*@39Po|t 7$*@(9Du|| 6t?S8B}|B	 6t?(8DC	|L	 5d?74M	|R	 5d?-87ES	|Z	 5d?T7C[	|`	 5d?-87Ea	|h	 6t?S8Bi	|n	 6t?(8Do	|x	 0?24y	|~	 0?S2B	|D
 0?(2DE
|L
 1$$*@33PM
|R
 1$$*@(3DS
|Z
 0?S2B[
|`
 0?(2Da
|j
 /?14k
|p
 /?14q
|v
 /?(1Dw
|~
 0?S2B
|F 1$?3GG|L 24$*@3\a4cM|R 1$?Se3US|Z 0?2G[|` 1$?Se3Ua|h (?*Ei|n (?Sc*So|t )$$*@3\_+au|z (?Sd*T{|@ )$?Sd+TA|F ,T?Sd.TG|N .tW?	0EO|X .t?Sc0SY|` 3D? 3C	5Aa|j /?Sc1Sk|t +D?Sc-Su|z +D?C-A{|B +D?Sc-SC|H ,T?Sc.SI|P &t?Sc(SQ|X ,T?Sc.SY|` 'W?Sc	)Sa|j &tW?Sd	(Tk|t &t?WD(Bu|L 4TW?Sd	6TM|V 4T? 3(PT	6VW|` 8?Sd:Ta|h 8? 3(PT	:Vi|t /W?Sc	1Su|~ /W?Sc	1S|H 0W?Sc	2SI|R /W?Sd	1TS|\ 4T?Sd6T]|d 3DW?Sd	5Te|n .tW?Sc	0So|x .tW?Sc	0Sy|B /W?Sc	1SC|N 'W?)EO|V 'W?)EW|^ (W?Sc*S_|f (W?C	*Ag|r 6t?S	8s|B *4B=[_+`C|D *4B=[_+`E|F +DRLG|H +DRLI|J *4B<K|R 05? 3	20S|\ 05? 3(	2D]|f *45? 3	,0g|p *45? 3(	,Dq|| !$Sae 4#1}|D  Ygk 4"1E|L  Ygk 4"1M|V  Sae 4"1W|^ Ygk 4!1_|f Ygk 4!1g|n !$[im 4U#Do|v  Zhl 4U"Dw|@ !$Sae 4#1A|H  Ygk 4"1I|P  Ygk 4"1Q|Z "4Wei 4U$D[|b "4Wei 4U$Dc|l ("m|n )$2,o|p ("q|t S"(<!	!Mu|~  T"(<!	"M|H S"(<!	!MI|T *4M"(<!	,MU|^ +DN"(<!	-M_|h *4S 3"(<!,Mi|t ,TN"(<!	.Mu|@ +D -A|H +D-I|N (*O|T +D -U|\ ( *]|d -d /e|l +D -m|t ( *u|| +D -}|D ( *E|L ,T .M|T )$ +U|\ ,T .]|d )$ +e|l ,T .m|t -d/u|z *4,{|@ -d /A|H *4 ,I|P *4 ,Q|X -d /Y|` / 1a|h -d /i|p -d /q|x / 1y|@ / 1A|J / 1K|R /1S|X ,T.Y|^ / 1_|f ,T .g|n 1$ 3o|v / 1w|~ ,T .|F / 1G|N ,T .O|V 0 2W|^ -d /_|f 0 2g|n -d /o|v 0 2w|~ 1$3|D .t0E|J 1$3K|P 02Q|V 4T6W|\ 5d7]|l 1$ 3m|t .t 30u|| .t 30}|D 1$ 33E|L 0 32M|T 9$ 3;U|\ 4T 36]|d 4T 36e|l 5d 37m|@ 7 39A|H 8 3:I|P 0 32Q|X 9$ 3;Y|` 4T 36a|h 4T 36i|p 5d 37q|D 1$ 3E|L 3D 5M|T 1$ 3U|\ 1$ 3]|d 3D 5e|l 3D 5m|v 24 3(4w|~ 6t 3(8|H 3D?5EI|P 5d?7EQ|X 3D?5EY|` 6t?8Ea|j ,T 4.1k|p 6t 481q|v -d 4/1w|| -d 4/1}|B 7 491C|H .t 401I|N 8 4:1O|T 0 421U|\ / 411]|b 9$ 4;1c|h / 411i|n 8 4:1o|t / 411u|z 0 421{|B  ? 4A1C |H  9$ 4;1I |N  7 491O |V  1$ 431W |^  ? 3A0_ |d  0 421e |j  ? 4A1k |p  9$ 4;1q |v  0 421w ||  7 491} |D! 1$ 431E!|L! ? 3A0M!|R! .t 0"S!|X! =d 4?1Y!|^! 5d 471_!|f! =d 3?0g!|l! 0 2"m!|p! >t 3@0q!|v! 6t 380w!|~! >t 3@0!|D" >t 3(@DE"|L" +D"(< 3A	-M"|X" +D"(< 3A-Y"|d" '?L!)%e"|l" ,T?L!.%m"|t" &t?L!(%u"||" $T?L!&%}"|D# $T?L!&%E#|L# '?L 3A)?M#|T# ,T?L 3A.?U#|\# &t?L 3A(?]#|d# $T?L 3A&?e#|l# $T?L 3A&?m#|t# '?L 3A)?u#||# &t?L 3A(?}#|D$ $T?L 3A&?E$|L$ $T?L 3A&?M$|V$ $ 41W$|\$   41   41   41 -1"(<s-L 6:"(<s6L .2"(<s.L 7;"(<s7L *.*0Ds*T)-=/DJ^il*n!%"(<s"
 +/"(<s+
 #'"(<s#
 ,0"(<s,o%||% #/"4"4"6n"6$!!%%:LAQ\`abi`jkl`mQmA"6n 	Al1o&A~  1+<=L8%789 
 %\2 jjnn%97CIIKvU  !%)### TN#
 k)*#L T @Q   T @Q   d AR   d AR   d AR   d AR   T @Q   T @Q   T @Q   T @Q   T @Q   D ?P   d AR   d AR   d AR   d AR   d AR   T @Q   d AR    DU   $ EV   4 FW   4 FW   4 FW   D GX    DU    DU    DU   $ EV   D GX   D GX   D GX   T HY   $ EV   $ EV   $ EV   $ EV   $ EV   $ EV   t J[   4 FW   4 FW   $ EV   $ EV   $ EV   
4 
FW 
 
 
d 
IZ 
 
 
d 
IZ 
 
 	D 	O` 	 	 	D 	O` 	 	 	T 	Pa 	 	 	T 	Pa 	 	 	D 	O` 	 	 	D 	O` 	 	 	 	Sd 	 	 $ EV   $ EV    DU   4 FW   	4 	FW 	 	 $ EV   d AR   d AR   t :K   d 9J   t :K   	 	CT 	 	 	 	DU 	 	 	 	CT 	 	  DU    CT    DU    DU    	d 	IZ 	 	 	T 	HY 	 	 	d 	IZ 	 	 d IZ    D GX   D GX   D GX   D GX   D GX   T HY   T HY   T HY   d IZ   d IZ   d IZ   d IZ   d IZ   d IZ    K\    K\    K\    K\    K\    K\    K\    L]    L]    L]   $ M^   $ M^   $ M^   $ M^   $ M^   $ M^   $ M^   $ M^   D O`   D O`   T HY   d IZ   d IZ   t J[    K\    L]    K\    K\    K\    L]    L]    L]    L]    L]   d IZ    K\    K\    K\    L]    L]    L]   4 FW   
 
CT 
 
 t BS   
T 
@Q 
 
 
T 
@Q 
 
 
 
CT 
 
 
t 
BS 
 
 
T 
@Q 
 
 
T 
@Q 
 
 
 
CT 
 
 
t 
BS 
 
 
T 
@Q 
 
 
T 
@Q 
 
  4E   $ 5F   $ 5F   	$ 	5F 	 	 
t 
BS 
 
 
 
CT 
 
 
 
CT 
 
 
 
CT 
 
 H ' "E'!#G' "#G' !"E	'
 !"E'  !C' "#E' "#G' !"C'  :'  :' !">'  !<'  !<'  :'  &'H!'" ()L#'$ +O)L*N)' _U  *))*\] os.   Ax* FAx=F)Ax=F9Ax=x*Ax:x9Ax: