
    
3jT              	          S SK Jr  S SKrS SKJs  Jr  S SKJr  SSKJrJ	r	  SSK
JrJr  SSKJrJr  SS	KJr  SS
KJrJr  SSKJrJrJrJr  SSKJr  SSKJr  SSKJrJ r   \RB                  " \"5      r# " S S\RH                  5      r% " S S\RH                  5      r& " S S\RH                  5      r' " S S5      r( " S S\RH                  5      r) " S S\\\\\5      r*g)    )AnyN)nn   )ConfigMixinregister_to_config)FromOriginalModelMixinPeftAdapterMixin)apply_lora_scalelogging   )AttentionMixin)	AttentionSanaLinearAttnProcessor2_0)
PatchEmbedPixArtAlphaTextProjectionTimestepEmbedding	Timesteps)Transformer2DModelOutput)
ModelMixin)AdaLayerNormSingleRMSNormc                      ^  \ rS rSr   SS\S\S\S\S-  S\SS4U 4S	 jjjrS
\	R                  S\	R                  4S jrSrU =r$ )	GLUMBConv&   Nin_channelsout_channelsexpand_ratio	norm_typeresidual_connectionreturnc           	        > [         TU ]  5         [        X1-  5      nX@l        XPl        [
        R                  " 5       U l        [
        R                  " XS-  SSS5      U l	        [
        R                  " US-  US-  SSSUS-  S9U l
        [
        R                  " XbSSSSS9U l        S U l        US:X  a  [        US	S
S
S9U l        g g )Nr      r   r   )groupsFbiasrms_normh㈵>T)epselementwise_affiner%   )super__init__intr   r   r   SiLUnonlinearityConv2dconv_inverted
conv_depth
conv_pointnormr   )selfr   r   r   r   r   hidden_channels	__class__s          h/home/wildlama/miniconda3/lib/python3.13/site-packages/diffusers/models/transformers/sana_transformer.pyr+   GLUMBConv.__init__'   s     	l89"#6 GGIYY{a4GAqQ))Oa$719LaQRTU^mpq^qr))O1aQVW	
"$4VZ[DI #    hidden_statesc                    U R                   (       a  UnU R                  U5      nU R                  U5      nU R                  U5      n[        R
                  " USSS9u  pXR                  U5      -  nU R                  U5      nU R                  S:X  a1  U R                  UR                  SS5      5      R                  SS5      nU R                   (       a  UW-   nU$ )Nr   r"   dimr&   )
r   r0   r.   r1   torchchunkr2   r   r3   movedim)r4   r:   residualgates       r7   forwardGLUMBConv.forward>   s    ##$H**=9))-86#kk-B%(9(9$(??6>>Z' IIm&;&;Ar&BCKKBPQRM##)H4Mr9   )r1   r0   r2   r.   r3   r   r   )   NT)__name__
__module____qualname____firstlineno__r,   floatstrboolr+   r?   TensorrD   __static_attributes____classcell__r6   s   @r7   r   r   &   s~    
   $$(\\ \ 	\
 :\ "\ 
\ \.U\\ ell  r9   r   c                      ^  \ rS rSrSS\S\S\4U 4S jjjrS\R                  S\R                  S\R                  S	\R                  4S
 jr
SrU =r$ )SanaModulatedNormU   r=   r)   r(   c                 V   > [         TU ]  5         [        R                  " XUS9U l        g )Nr)   r(   )r*   r+   r   	LayerNormr3   )r4   r=   r)   r(   r6   s       r7   r+   SanaModulatedNorm.__init__V   s!    LLQTU	r9   r:   tembscale_shift_tabler    c                     U R                  U5      nUS    US S 2S 4   R                  UR                  5      -   R                  SSS9u  pEUSU-   -  U-   nU$ )Nr   r"   r<   )r3   todevicer@   )r4   r:   rY   rZ   shiftscales         r7   rD   SanaModulatedNorm.forwardZ   sh     		-0)$/$q$w-2B2BCTC[C[2\\ccdeklcm%U3e;r9   )r3   )Fư>)rG   rH   rI   rJ   r,   rM   rK   r+   r?   rN   rD   rO   rP   rQ   s   @r7   rS   rS   U   sc    VC VT V V V"\\16RWR^R^	 r9   rS   c                   ~   ^  \ rS rSrU 4S jrSS\R                  S\R                  S\R                  4S jjrSr	U =r
$ )	&SanaCombinedTimestepGuidanceEmbeddingsc   c                   > [         TU ]  5         [        SSSS9U l        [	        SUS9U l        [        SSSS9U l        [	        SUS9U l        [        R                  " 5       U l
        [        R                  " USU-  SS9U l        g )N   Tr   )num_channelsflip_sin_to_cosdownscale_freq_shift)r   time_embed_dim   r$   )r*   r+   r   	time_projr   timestep_embedderguidance_condition_projguidance_embedderr   r-   siluLinearlinear)r4   embedding_dimr6   s     r7   r+   /SanaCombinedTimestepGuidanceEmbeddings.__init__d   sx    "T`ab!2sS`!a'0cSWno'p$!2sS`!aGGI	iiq=/@tLr9   timestepguidancehidden_dtypec                 
   U R                  U5      nU R                  UR                  US95      nU R                  U5      nU R	                  UR                  US95      nXW-   nU R                  U R                  U5      5      U4$ )N)dtype)rl   rm   r\   rn   ro   rr   rp   )	r4   ru   rv   rw   timesteps_projtimesteps_embguidance_projguidance_embconditionings	            r7   rD   .SanaCombinedTimestepGuidanceEmbeddings.forwardo   s    1..~/@/@|/@/TU44X>--m.>.>\.>.RS$3{{499\23\AAr9   )rn   ro   rr   rp   rl   rm   NN)rG   rH   rI   rJ   r+   r?   rN   ry   rD   rO   rP   rQ   s   @r7   rc   rc   c   s<    	MB B B[`[f[f B Br9   rc   c                       \ rS rSrSrS r  SS\S\R                  S\R                  S-  S\R                  S-  S	\R                  4
S
 jjr	Sr
g)SanaAttnProcessor2_0z   zk
Processor for implementing scaled dot-product attention (enabled by default if you're using PyTorch 2.0).
c                 D    [        [        S5      (       d  [        S5      eg )Nscaled_dot_product_attentionzTSanaAttnProcessor2_0 requires PyTorch 2.0, to use it, please upgrade PyTorch to 2.0.)hasattrFImportError)r4   s    r7   r+   SanaAttnProcessor2_0.__init__   s!    q899tuu :r9   Nattnr:   encoder_hidden_statesattention_maskr    c           	      *   Uc  UR                   OUR                   u  pVnUb<  UR                  XFU5      nUR                  XQR                  SUR                   S   5      nUR	                  U5      nUc  UnUR                  U5      n	UR                  U5      n
UR                  b  UR                  U5      nUR                  b  UR                  U	5      n	U	R                   S   nXR                  -  nUR                  USUR                  U5      R                  SS5      nU	R                  USUR                  U5      R                  SS5      n	U
R                  USUR                  U5      R                  SS5      n
[        R                  " XXSSS9nUR                  SS5      R                  USUR                  U-  5      nUR                  UR                  5      nUR                  S   " U5      nUR                  S   " U5      nX!R                   -  nU$ )Nr>   r"   r           F)	attn_mask	dropout_p	is_causalr   )shapeprepare_attention_maskviewheadsto_qto_kto_vnorm_qnorm_k	transposer   r   reshaper\   ry   to_outrescale_output_factor)r4   r   r:   r   r   
batch_sizesequence_length_querykeyvalue	inner_dimhead_dims                r7   __call__SanaAttnProcessor2_0.__call__   s    $9#@MF[FaFa 	'
Q %!88ZdeN ,00ZZ^MaMabdMefN		-( ($1!ii-.		/0;;"KK&E;;"++c"CIIbM	

*

:r4::x@JJ1aPhhz2tzz8<FFq!L

:r4::x@JJ1aP 663RW
 &//15==j"djj[cNcd%((5 A}5A}5%(B(BBr9    r   )rG   rH   rI   rJ   __doc__r+   r   r?   rN   r   rO   r   r9   r7   r   r   z   si    v 6:.266 ||6  %||d2	6
 t+6 
6 6r9   r   c                   h  ^  \ rS rSrSr             SS\S\S\S\S\S-  S	\S-  S
\S-  S\S\S\S\S\S\S-  SS4U 4S jjjr	      SS\
R                  S\
R                  S-  S\
R                  S-  S\
R                  S-  S\
R                  S-  S\S\S\
R                  4S jjrSrU =r$ )SanaTransformerBlock   zS
Transformer block introduced in [Sana](https://huggingface.co/papers/2410.10629).
Nr=   num_attention_headsattention_head_dimdropoutnum_cross_attention_headscross_attention_head_dimcross_attention_dimattention_biasnorm_elementwise_affinenorm_epsattention_out_bias	mlp_ratioqk_normr    c                   > [         TU ]  5         [        R                  " USU
S9U l        [        UUUUb  UOS UUUS [        5       S9	U l        Ub>  [        R                  " XU
S9U l        [        UUUb  UOS UUUUSU[        5       S9
U l
        [        XUS SS9U l        [        R                  " [        R                  " SU5      US-  -  5      U l        g )	NFrV   )		query_dimr   dim_headkv_headsr   r   r%   r   	processorT)
r   r   r   r   r   r   r   r%   out_biasr   )r   r   rk         ?)r*   r+   r   rW   norm1r   r   attn1norm2r   attn2r   ff	Parameterr?   randnrZ   )r4   r=   r   r   r   r   r   r   r   r   r   r   r   r   r6   s                 r7   r+   SanaTransformerBlock.__init__   s      	 \\#%XN
%',3,?(T $02


 *c[cdDJ"6=6I2t$7/1+.0DJ Ci4UZ[!#ekk!S.ACH.L!Mr9   r:   r   r   encoder_attention_maskru   heightwidthc                 j   UR                   S   nU R                  S    UR                  USS5      -   R                  SSS9u  pppU R	                  U5      nUSU
-   -  U	-   nUR                  UR                  5      nU R                  U5      nXU-  -   nU R                  b  U R                  UUUS9nUU-   nU R                  U5      nUSU-   -  U-   nUR                  SXg45      R                  SSSS5      nU R                  U5      nUR                  SS5      R                  SSS5      nXU-  -   nU$ )	Nr   rk   r>   r"   r<   )r   r   r   r   )r   rZ   r   r@   r   r\   ry   r   r   r   	unflattenpermuter   flatten)r4   r:   r   r   r   ru   r   r   r   	shift_msa	scale_msagate_msa	shift_mlp	scale_mlpgate_mlpnorm_hidden_statesattn_output	ff_outputs                     r7   rD   SanaTransformerBlock.forward   so    #((+
 ""4(8+;+;J2+NN
%q%/ 	G	h9
 "ZZ6/1y=AIM/22=3F3FGjj!34%;(>> ::!**&;5 % K
 (-7M "ZZ6/1y=AIM/99!f_MUUVWYZ\]_`aGG./	%%a+33Aq!<	%9(<<r9   )r   r   r   r   r   rZ   )  F       r      p   r   TFra   T      @N)NNNNNN)rG   rH   rI   rJ   r   r,   rK   rM   rL   r+   r?   rN   
LongTensorrD   rO   rP   rQ   s   @r7   r   r      s    #%"$02/2*.#(-#'"3N3N !3N  	3N
 3N $':3N #&*3N !4Z3N 3N "&3N 3N !3N 3N t3N 
3N 3Np /3596:,0+||+ t++  %||d2	+
 !&t 3+ ""T)+ + + 
+ +r9   r   c            .       .  ^  \ rS rSrSrSr/ SQrSS/r\                     S+S\	S	\	S-  S
\	S\	S\	S\	S-  S\	S-  S\	S-  S\	S\
S\
S\S\	S\	S\S\
S\	S-  S\S\
S\S-  S\
SS4,U 4S jjj5       r\" S5            S,S \R                   S!\R                   S"\R                   S#\R                   S-  S$\R                   S-  S%\R                   S-  S\\\4   S-  S&\\R                      S-  S'\S\\R                   S(4   \-  4S) jj5       rS*rU =r$ )-SanaTransformer2DModeli$  a  
A 2D Transformer model introduced in [Sana](https://huggingface.co/papers/2410.10629) family of models.

Args:
    in_channels (`int`, defaults to `32`):
        The number of channels in the input.
    out_channels (`int`, *optional*, defaults to `32`):
        The number of channels in the output.
    num_attention_heads (`int`, defaults to `70`):
        The number of heads to use for multi-head attention.
    attention_head_dim (`int`, defaults to `32`):
        The number of channels in each head.
    num_layers (`int`, defaults to `20`):
        The number of layers of Transformer blocks to use.
    num_cross_attention_heads (`int`, *optional*, defaults to `20`):
        The number of heads to use for cross-attention.
    cross_attention_head_dim (`int`, *optional*, defaults to `112`):
        The number of channels in each head for cross-attention.
    cross_attention_dim (`int`, *optional*, defaults to `2240`):
        The number of channels in the cross-attention output.
    caption_channels (`int`, defaults to `2304`):
        The number of channels in the caption embeddings.
    mlp_ratio (`float`, defaults to `2.5`):
        The expansion ratio to use in the GLUMBConv layer.
    dropout (`float`, defaults to `0.0`):
        The dropout probability.
    attention_bias (`bool`, defaults to `False`):
        Whether to use bias in the attention layer.
    sample_size (`int`, defaults to `32`):
        The base size of the input latent.
    patch_size (`int`, defaults to `1`):
        The size of the patches to use in the patch embedding layer.
    norm_elementwise_affine (`bool`, defaults to `False`):
        Whether to use elementwise affinity in the normalization layer.
    norm_eps (`float`, defaults to `1e-6`):
        The epsilon value for the normalization layer.
    qk_norm (`str`, *optional*, defaults to `None`):
        The normalization to use for the query and key.
    timestep_scale (`float`, defaults to `1.0`):
        The scale to use for the timesteps.
T)r   r   rS   patch_embedr3   Nr   r   r   r   
num_layersr   r   r   caption_channelsr   r   r   sample_size
patch_sizer   r   interpolation_scaleguidance_embedsguidance_embeds_scaler   timestep_scaler    c                 t  > [         TU ]  5         U=(       d    UnX4-  n[        UUUUUUUb  SOS S9U l        U(       a  [	        U5      U l        O[        U5      U l        [        U	US9U l        [        USSS9U l
        [        R                  " [        U5       Vs/ s H  n[        UUUUUUUUUUU
US9PM     sn5      U l        [        R                   " ["        R$                  " SU5      US	-  -  5      U l        [)        US
SS9U l        [        R,                  " UX-  U-  5      U l        S
U l        g s  snf )Nsincos)r   r   r   r   	embed_dimr   pos_embed_type)in_featureshidden_sizer'   T)r(   r)   )	r   r   r   r   r   r   r   r   r   r   r   Fra   rV   )r*   r+   r   r   rc   
time_embedr   r   caption_projectionr   caption_normr   
ModuleListranger   transformer_blocksr   r?   r   rZ   rS   norm_outrq   proj_outgradient_checkpointing)r4   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r6   s                           r7   r+   SanaTransformer2DModel.__init__S  sN   2 	#2{'<	 &!# 3':'F8D
 DYODO0;DO";HXfo"p#I4DQ #%--  z* +A %'&#.G-E(;#1,C%'# +#
* "$ekk!Y.G)UX..X!Y))SWX		)Z-D|-ST&+#1s   D5attention_kwargsr:   r   ru   rv   r   r   controlnet_block_samplesreturn_dict.c
                    UbB  UR                   S:X  a2  SUR                  UR                  5      -
  S-  nUR                  S5      nUbB  UR                   S:X  a2  SUR                  UR                  5      -
  S-  nUR                  S5      nUR                  u  ppU R
                  R                  nX-  X-  nnU R                  U5      nUb  U R                  X4UR                  S9u  nnOU R                  X:UR                  S9u  nnU R                  U5      nUR                  U
SUR                  S   5      nU R                  U5      n[        R                  " 5       (       at  U R                  (       ac  [        U R                   5       HI  u  nnU R#                  UUUUUUUU5      nUc  M#  SUs=:  a  [%        U5      ::  d  M;  O  M?  XUS-
     -   nMK     OX[        U R                   5       H?  u  nnU" UUUUUUU5      nUc  M  SUs=:  a  [%        U5      ::  d  M1  O  M5  XUS-
     -   nMA     U R'                  UUU R(                  5      nU R+                  U5      nUR-                  XUU R
                  R                  U R
                  R                  S5      nUR/                  SSSS	SS
5      nUR-                  U
SX-  UU-  5      nU	(       d  U4$ [1        US9$ )Nr   r"   g     )rv   rw   )r   rw   r>   r      r   rF   )sample)ndimr\   ry   	unsqueezer   configr   r   r   r   r   r   r?   is_grad_enabledr   	enumerater   _gradient_checkpointing_funclenr   rZ   r   r   r   r   )r4   r:   r   ru   rv   r   r   r   r   r   r   rg   r   r   ppost_patch_heightpost_patch_widthembedded_timestepindex_blockblockoutputs                        r7   rD   SanaTransformer2DModel.forward  s   . %.*=*=*B
  ."3"3M4G4G"HHHTN+55a8N "-2H2M2MQR2R&'*@*C*CMDWDW*X&X\d%d"%;%E%Ea%H" 3@2E2E/
&KK"".4k5:+((7*.//-:M:M +: +'H' +///m>Q>Q +: +'H' !% 7 78M N 5 : ::r=K^K^_aKb c $ 1 12G H   ""t'B'B&/0G0G&H"U $ A A!")*%$	! ,7A<lsSkOl<l<l$1[[\_4]$]M 'I '00G0G&H"U %!")*%$! ,7A<lsSkOl<l<l$1[[\_4]$]M 'I m5FH^H^_m4 &--+;T[[=S=SUYU`U`UkUkmo
 &--aAq!Q?&&z27H7LN^abNbc9'v66r9   )	r   r   r   r   r   r   rZ   r   r   )r   r   r   r   r   r   r   r   i 	  r   r   Fr   r"   Fra   NFg?Ng      ?)NNNNNT)rG   rH   rI   rJ   r    _supports_gradient_checkpointing_no_split_modules _skip_layerwise_casting_patternsr   r,   rK   rM   rL   r+   r
   r?   rN   dictr   tupler   rD   rO   rP   rQ   s   @r7   r   r   $  sr   (T (,$S(5v'>$ #%#%"$02/2*. $$(-*. %'*" #-K,K, DjK, !	K,
  K, K, $':K, #&*K, !4ZK, K, K, K, K, K, K,  "&!K," #K,$ !4Z%K,& 'K,(  %)K,* t+K,, -K,. 
/K, K,Z () )-6:.226?C e7||e7  %||e7 ,,	e7
 ,,%e7 !&t 3e7 t+e7 sCx.4/e7 #("5"<e7 e7 
u||S 	!$<	<e7 *e7r9   r   )+typingr   r?   torch.nn.functionalr   
functionalr   configuration_utilsr   r   loadersr   r	   utilsr
   r   	attentionr   attention_processorr   r   
embeddingsr   r   r   r   modeling_outputsr   modeling_utilsr   normalizationr   r   
get_loggerrG   loggerModuler   rS   rc   r   r   r   r   r9   r7   <module>r%     s         B ? . & ] \ 7 ' 7 
		H	%,		 ,^		 BRYY B.? ?De299 ePc7ZFVXn c7r9   