
    
3jS                        S SK r S SKJr  S SKrS SKJr  S SKJs  Jr  SSK	J
r
Jr  SSKJr  SSKJr  SSKJrJr  SS	KJr  SS
KJr  SSKJrJrJrJr  SSKJr  SSKJr  SSK J!r!J"r"J#r#  \RH                  " \%5      r& " S S\RN                  5      r( " S S5      r) " S S\RN                  5      r* " S S\RN                  5      r+ " S S\\
\\5      r,g)    N)Any   )ConfigMixinregister_to_config)PeftAdapterMixin)FromOriginalModelMixin)apply_lora_scalelogging   )LuminaFeedForward)	Attention)TimestepEmbedding	Timestepsapply_rotary_embget_1d_rotary_pos_embed)Transformer2DModelOutput)
ModelMixin)LuminaLayerNormContinuousLuminaRMSNormZeroRMSNormc                      ^  \ rS rSr    SS\S\S\S\SS4
U 4S jjjrS	\R                  S
\R                  S\R                  S\	\R                  \R                  4   4S jr
SrU =r$ )'Lumina2CombinedTimestepCaptionEmbedding%   hidden_sizecap_feat_dimfrequency_embedding_sizenorm_epsreturnNc           	         > [         TU ]  5         [        USSS9U l        [	        U[        US5      S9U l        [        R                  " [        X$S9[        R                  " X!SS95      U l        g )NTg        )num_channelsflip_sin_to_cosdownscale_freq_shift   )in_channelstime_embed_dimeps)bias)super__init__r   	time_projr   mintimestep_embeddernn
Sequentialr   Linearcaption_embedder)selfr   r   r   r   	__class__s        k/home/wildlama/miniconda3/lib/python3.13/site-packages/diffusers/models/transformers/transformer_lumina2.pyr*   0Lumina2CombinedTimestepCaptionEmbedding.__init__&   si     	"14^a
 "30[RVAW"
 !#L/<[_1`!
    hidden_statestimestepencoder_hidden_statesc                     U R                  U5      R                  U5      nU R                  U5      nU R                  U5      nXV4$ N)r+   type_asr-   r1   )r2   r7   r8   r9   timestep_proj
time_embedcaption_embeds          r4   forward/Lumina2CombinedTimestepCaptionEmbedding.forward;   sG     x088G++M:
--.CD((r6   )r1   r+   r-   )i   i      h㈵>)__name__
__module____qualname____firstlineno__intfloatr*   torchTensortupler@   __static_attributes____classcell__r3   s   @r4   r   r   %   s       (+

 
 #&	

 
 

 
*)"\\)5:\\)Z_ZfZf)	u||U\\)	*) )r6   r   c                       \ rS rSrSrS r   SS\S\R                  S\R                  S\R                  S-  S	\R                  S-  S
\	S-  S\R                  4S jjr
Srg)Lumina2AttnProcessor2_0D   z
Processor for implementing scaled dot-product attention (enabled by default if you're using PyTorch 2.0). This is
used in the Lumina2Transformer2DModel model. It applies normalization and RoPE on query and key vectors.
c                 D    [        [        S5      (       d  [        S5      eg )Nscaled_dot_product_attentionzPAttnProcessor2_0 requires PyTorch 2.0, to use it, please upgrade PyTorch to 2.0.)hasattrFImportError)r2   s    r4   r*    Lumina2AttnProcessor2_0.__init__J   s!    q899pqq :r6   Nattnr7   r9   attention_maskimage_rotary_embbase_sequence_lengthr   c                    UR                   u  pxn	UR                  U5      n
UR                  U5      nUR                  U5      nU
R                   S   nUR                   S   nXR                  -  nU
R
                  nX-  nU
R                  USUR                  U5      n
UR                  USUU5      nUR                  USUU5      nUR                  b  UR                  U
5      n
UR                  b  UR                  U5      nUb  [        XSS9n
[        XSS9nU
R                  U5      UR                  U5      pUb8  [        R                  " [        R                  " X5      5      UR                  -  nOUR                  nUR                  U-  nUS:  ah  UR                  S5      R!                  SSSUS5      R#                  SS5      nUR                  S5      R!                  SSSUS5      R#                  SS5      nUb"  UR%                  5       R                  USSS5      nU
R'                  SS5      n
UR'                  SS5      nUR'                  SS5      n[(        R*                  " XXUS9nUR'                  SS5      R-                  USUR                  U-  5      nUR/                  U
5      nUR0                  S   " U5      nUR0                  S   " U5      nU$ )	NF)use_real   r   r   )	attn_maskscaler   )shapeto_qto_kto_vheadsdtypeviewnorm_qnorm_kr   tomathsqrtlogrb   	unsqueezerepeatflattenbool	transposerV   rT   reshaper<   to_out)r2   rY   r7   r9   rZ   r[   r\   
batch_sizesequence_length_querykeyvalue	query_dim	inner_dimhead_dimrh   kv_headssoftmax_scalen_reps                       r4   __call__ Lumina2AttnProcessor2_0.__call__N   s    *7)<)<&
Q 		-(ii-.		/0KKO	IIbM	

* (

:r4::x@hhz2x:

:r8X> ;;"KK&E;;"++c"C '$UuME"35ICXXe_cffUms  + IIdhh&UVY]YcYccM JJM 

h&A:--"))!Q5!<DDQJCOOA&--aAua@HHANE %+00277
Aq"MN1%mmAq!1%66}
 &//15==j"djj[cNcd%--e4 A}5A}5r6    )NNN)rD   rE   rF   rG   __doc__r*   r   rJ   rK   rH   r   rM   r   r6   r4   rQ   rQ   D   s    
r /304+/GG ||G  %||	G
 t+G  ,,-G "DjG 
G Gr6   rQ   c                      ^  \ rS rSr SS\S\S\S\S\S\S\S	S
4U 4S jjjr SS\R                  S\R                  S\R                  S\R                  S
-  S	\R                  4
S jjr
SrU =r$ )Lumina2TransformerBlock   dimnum_attention_headsnum_kv_headsmultiple_offfn_dim_multiplierr   
modulationr   Nc                 J  > [         TU ]  5         X-  U l        Xpl        [	        US X-  SUUSSS[        5       S9
U l        [        USU-  UUS9U l        U(       a  [        UUSS9U l
        O[        XS	9U l
        [        XS	9U l        [        XS	9U l        [        XS	9U l        g )
Nrms_normrC   F)
r}   cross_attention_dimdim_headqk_normrg   r   r'   r(   out_bias	processor   )r   r~   r   r   T)embedding_dimr   norm_elementwise_affiner&   )r)   r*   r   r   r   rQ   rY   r   feed_forwardr   norm1r   	ffn_norm1norm2	ffn_norm2)	r2   r   r   r   r   r   r   r   r3   s	           r4   r*    Lumina2TransformerBlock.__init__   s     	2$ $/%!-/
	 .#g#1	
 *!!(,DJ !3DJ 3S/
 3r6   r7   rZ   r[   tembc                    U R                   (       a  U R                  X5      u  pVpxU R                  UUUUS9n	XR                  S5      R	                  5       U R                  U	5      -  -   nU R                  U R                  U5      SUR                  S5      -   -  5      n
XR                  S5      R	                  5       U R                  U
5      -  -   nU$ U R                  U5      nU R                  UUUUS9n	XR                  U	5      -   nU R                  U R                  U5      5      n
XR                  U
5      -   nU$ )N)r7   r9   rZ   r[   r`   )	r   r   rY   rp   tanhr   r   r   r   )r2   r7   rZ   r[   r   norm_hidden_statesgate_msa	scale_mlpgate_mlpattn_output
mlp_outputs              r4   r@   Lumina2TransformerBlock.forward   sA    ??@D

=@_=)))0&8-!1	 $ K *,>,>q,A,F,F,H4::VaKb,bbM**4>>-+HAPYPcPcdePfLf+ghJ),>,>q,A,F,F,H4>>ZdKe,eeM  "&M!:))0&8-!1	 $ K *JJ{,CCM**4>>-+HIJ)NN:,FFMr6   )rY   r   r   r   r   r   r   r   )Tr;   )rD   rE   rF   rG   rH   rI   rs   r*   rJ   rK   r@   rM   rN   rO   s   @r4   r   r      s      -4-4 !-4 	-4
 -4 "-4 -4 -4 
-4 -4h %)||   ,,	
 llT! 
 r6   r   c            	          ^  \ rS rSrSS\S\\   S\\   S\4U 4S jjjrS\\   S\\   S\S\\R                     4S jr	S	\R                  S\R                  4S
 jr
S\R                  S\R                  4S jrSrU =r$ )Lumina2RotaryPosEmbed   thetaaxes_dim	axes_lens
patch_sizec                    > [         TU ]  5         Xl        X l        X0l        X@l        U R                  X#U5      U l        g r;   )r)   r*   r   r   r   r   _precompute_freqs_cis	freqs_cis)r2   r   r   r   r   r3   s        r4   r*   Lumina2RotaryPosEmbed.__init__   s6    
 "$33HOr6   r   c                 0   / n[         R                  R                  R                  5       (       a  [         R                  O[         R
                  n[        [        X5      5       H.  u  nu  px[        XxU R                  US9n	UR                  U	5        M0     U$ )N)r   freqs_dtype)rJ   backendsmpsis_availablefloat32float64	enumeratezipr   r   append)
r2   r   r   r   r   r   ideembs
             r4   r   +Lumina2RotaryPosEmbed._precompute_freqs_cis   sp    	',~~'9'9'F'F'H'Hemmemm"3x#;<IAv)!djjkZCS! = r6   idsc           
         UR                   nUR                   R                  S:X  a  UR                  S5      n/ n[        [	        U R
                  5      5       H  nU R                  U   R                  UR                   5      nUS S 2S S 2XDS-   24   R                  SSUR                  S   5      R                  [        R                  5      nUR                  [        R                  " UR                  S5      R                  UR                  S   SS5      SUS95        M     [        R                  " USS9R                  U5      $ )Nr   cpur`   r^   r   )r   indexr   )devicetyperl   rangelenr   r   rq   rc   rJ   int64r   gatherrp   cat)r2   r   r   resultr   freqsr   s          r4   _get_freqs_cis$Lumina2RotaryPosEmbed._get_freqs_cis   s    ::??e#&&-Cs4==)*ANN1%((4E1aa%i(//1ekk"oFII%++VEMM%,,uq'9'@'@QQRTU'V\]ejkl + yyR(++F33r6   r7   rZ   c                 f   UR                   u  p4pVU R                  nXW-  Xg-  pX-  n
UR                  nUR                   S   nUR                  SS9R	                  5       nU Vs/ s H  oU
-   PM	     nn[        U5      n[        R                  " UUS[        R                  US9n[        [        X5      5       H  u  nu  nn[        R                  " U[        R                  US9UUS U2S4'   UUUUU2S4'   [        R                  " U[        R                  US9R                  SS5      R                  SU	5      R                  5       n[        R                  " U	[        R                  US9R                  SS5      R                  US5      R                  5       nUUUUU2S4'   UUUUU2S4'   M     U R                  U5      n[        R                  " X<UR                   S   UUR                   S9n[        R                  " X:UR                   S   UUR                   S9n[        [        X5      5       H)  u  nu  nnUUS U24   UUS U24'   UUUU24   UUS U
24'   M+     UR                  X4XX5      R#                  SSS	SS
S5      R                  S5      R                  SS5      nUUUUX4$ s  snf )Nr`   r   r   )rh   r   r   r^   r   )r   rh   r      )rc   r   r   sumtolistmaxrJ   zerosint32r   r   arangeri   rq   rr   r   rh   permute)r2   r7   rZ   rw   channelsheightwidthppost_patch_heightpost_patch_widthimage_seq_lenr   encoder_seq_lenl_effective_cap_lencap_seq_lenseq_lengthsmax_seq_lenposition_idsr   seq_lenrow_idscol_idsr   cap_freqs_cisimg_freqs_ciss                            r4   r@   Lumina2RotaryPosEmbed.forward  s   .;.A.A+
fOO.4k5:+)<%%(..q1,00Q07>>@FYZFY{]2FYZ+& {{:{AU[[Y_`)237J3X)Y%A%W/4||Ku{{ci/jLL[L!+,6ALK/23 .ekk&Qb!+,	  -U[[Pa)1-	  7>LK/236=LK/23' *Z, ''5	 )<VS\SbSb
 yr':6QZQ`Q`
 *337J3X)Y%A%W-6q,;,-GM!\k\/*/8K<O9O/PM!^m^+, *Z z5FK[_WQ1aA&WQZWQ]	 	 m]IGZgge [s   #J.)r   r   r   r   r   )i,     r   r   )rD   rE   rF   rG   rH   listr*   rJ   rK   r   r   r@   rM   rN   rO   s   @r4   r   r      s    Pc PT#Y P49 Plo P Pd3i DI VY ^bchcoco^p 
4%,, 
45<< 
4;hU\\ ;h5<< ;h ;hr6   r   c            $         ^  \ rS rSrSrSrS/rSS/r\                S"S\	S	\	S
\	S\	S-  S\	S\	S\	S\	S\	S\	S\
S-  S\
S\
S\\	\	\	4   S\\	\	\	4   S\	SS4"U 4S jjj5       r\" S5        S#S\R                  S\R                  S\R                  S\R                  S\\\4   S-  S\S\R                  \-  4S  jj5       rS!rU =r$ )$Lumina2Transformer2DModeliE  a<  
Lumina2NextDiT: Diffusion model with a Transformer backbone.

Parameters:
    sample_size (`int`): The width of the latent images. This is fixed during training since
        it is used to learn a number of position embeddings.
    patch_size (`int`, *optional*, (`int`, *optional*, defaults to 2):
        The size of each patch in the image. This parameter defines the resolution of patches fed into the model.
    in_channels (`int`, *optional*, defaults to 4):
        The number of input channels for the model. Typically, this matches the number of channels in the input
        images.
    hidden_size (`int`, *optional*, defaults to 4096):
        The dimensionality of the hidden layers in the model. This parameter determines the width of the model's
        hidden representations.
    num_layers (`int`, *optional*, default to 32):
        The number of layers in the model. This defines the depth of the neural network.
    num_attention_heads (`int`, *optional*, defaults to 32):
        The number of attention heads in each attention layer. This parameter specifies how many separate attention
        mechanisms are used.
    num_kv_heads (`int`, *optional*, defaults to 8):
        The number of key-value heads in the attention mechanism, if different from the number of attention heads.
        If None, it defaults to num_attention_heads.
    multiple_of (`int`, *optional*, defaults to 256):
        A factor that the hidden size should be a multiple of. This can help optimize certain hardware
        configurations.
    ffn_dim_multiplier (`float`, *optional*):
        A multiplier for the dimensionality of the feed-forward network. If None, it uses a default value based on
        the model configuration.
    norm_eps (`float`, *optional*, defaults to 1e-5):
        A small value added to the denominator for numerical stability in normalization layers.
    scaling_factor (`float`, *optional*, defaults to 1.0):
        A scaling factor applied to certain parameters or layers in the model. This can be used for adjusting the
        overall scale of the model's operations.
Tr   
x_embeddernormNsample_sizer   r$   out_channelsr   
num_layersnum_refiner_layersr   r   r   r   r   scaling_factoraxes_dim_roper   r   r   c                   > [         TU ]  5         U=(       d    UU l        [        SXUS9U l        [
        R                  " X"-  U-  US9U l        [        UUUS9U l	        [
        R                  " [        U5       Vs/ s H  n[        UUU	U
UUSS9PM     sn5      U l        [
        R                  " [        U5       Vs/ s H  n[        UUU	U
UUSS9PM     sn5      U l        [
        R                  " [        U5       Vs/ s H  n[        UUU	U
UUSS9PM     sn5      U l        [!        U[#        US5      SS	SX"-  U R                  -  S
9U l        SU l        g s  snf s  snf s  snf )Ni'  )r   r   r   r   )in_featuresout_features)r   r   r   T)r   Fr#   gư>)r   conditioning_embedding_dimelementwise_affiner'   r(   out_dim)r)   r*   r   r   rope_embedderr.   r0   r   r   time_caption_embed
ModuleListr   r   noise_refinercontext_refinerlayersr   r,   norm_outgradient_checkpointing)r2   r   r   r$   r   r   r   r   r   r   r   r   r   r   r   r   r   ry   r3   s                     r4   r*   "Lumina2Transformer2DModel.__init__m  s   ( 	(7K 3-Q[
 ))
0G+0Udop"I#,#

  ]] 12 3A (' &# 3
  "}} 12 3A (' &$ 3 
  mm z* +A (' &# +
  2%'*;'=$+d.?.??
 ',#o s   <E>E E#attention_kwargsr7   r8   r9   encoder_attention_maskreturn_dictc           
         UR                   u  pxpU R                  XU5      u  pU R                  X5      u  nnnnnnU R                  U5      nU R                   H  nU" X4U5      nM     U R
                   H  nU" US X5      nM     [        U5      n[        [        U5      5      S:  nUR                  UU[        R                  S9nUR                  UUU R                  R                  5      n[        [        UU5      5       H.  u  nu  nnSUUS U24'   UUS U24   UUS U24'   UU   UUUU24'   M0     UnU R                    H`  n[        R"                  " 5       (       a0  U R$                  (       a  U R'                  UUU(       a  UOS X5      nMM  U" UU(       a  UOS X5      nMb     U R)                  X5      nU R                  R*                  n/ n[        [        UU5      5       Hv  u  nu  nnUR-                  UU   UU R/                  U	U-  U
U-  UUU R0                  5      R3                  SSSSS5      R5                  SS5      R5                  SS5      5        Mx     [        R6                  " USS9nU(       d  U4$ [9        US	9$ )
Nr`   )rh   Tr   r   r   r   r   )sample)rc   r  r  r   r  r  r   r   set	new_zerosrJ   rs   configr   r   r   r  is_grad_enabledr
  _gradient_checkpointing_funcr	  r   r   ri   r   r   rr   stackr   )r2   r7   r8   r9   r  r  r  rw   ry   r   r   r   context_rotary_embnoise_rotary_emb
rotary_embencoder_seq_lengthsr   layerr   use_maskrZ   joint_hidden_statesr   r   r   r   outputs                              r4   r@   !Lumina2Transformer2DModel.forward  s    (5':':$
v&*&=&=mWl&m# }E	
 6 ))E$)*?Yk$l! * ''E!-7GNM ( +&s;'(1,&00[PUPZPZ0[+55j+t{{OfOfg-6s;NP[7\-])A)*.N1hwh;'7LQP`Q`P`M`7a#3O#3 34>KA>N?7#: :; .^
 ,[[E$$&&4+F+F $ A A=H.$PZ! !&mx^UY[e l ! m: KK""-6s;NP[7\-])A)MMa 9fk5A:q!T5F5FGAq!Q'AA .^ V+9'v66r6   )	r  r
  r  r  r	  r   r  r  r   )   r      Ni 	     r         rB   NrC   g      ?)    r%  r%  r   r#   )NT)rD   rE   rF   rG   r    _supports_gradient_checkpointing_no_split_modules _skip_layerwise_casting_patternsr   rH   rI   rL   r*   r	   rJ   rK   dictstrr   rs   r   r@   rM   rN   rO   s   @r4   r   r   E  s   !F (,$23(4f'=$ #'"##%+/ #.:*9 #Z,Z, Z, 	Z,
 DjZ, Z, Z,  Z, !Z, Z, Z, "DLZ, Z, Z, S#s]+Z,  c3'!Z," #Z,$ 
%Z, Z,x () 37 G7||G7 ,,G7  %||	G7
 !&G7 sCx.4/G7 G7 
0	0G7 *G7r6   r   )-rm   typingr   rJ   torch.nnr.   torch.nn.functional
functionalrV   configuration_utilsr   r   loadersr   loaders.single_file_modelr   utilsr	   r
   	attentionr   attention_processorr   
embeddingsr   r   r   r   modeling_outputsr   modeling_utilsr   normalizationr   r   r   
get_loggerrD   loggerModuler   rQ   r   r   r   r   r6   r4   <module>r<     s          B ' ? . ) + ` ` 7 ' Q Q 
		H	%)bii )>Q QhNbii NbYhBII YhxM7
K9IKa M7r6   