
    
3jDf                     l   S SK Jr  S SKrS SKrS SKJr  SSKJrJ	r	  SSK
JrJrJr  SSKJrJrJr  SSKJr  SSKJr  S	S
KJrJr  S	SKJr  S	SKJrJrJrJr  S	SK J!r!  S	SK"J#r#  S	SK$J%r%J&r&J'r'  SSK(J)r)J*r*  \RV                  " \,5      r- " S S\R\                  5      r/ " S S\R\                  5      r0 " S S\R\                  5      r1 " S S\R\                  5      r2 " S S\R\                  5      r3\ " S S\R\                  5      5       r4\ " S S\R\                  5      5       r5 " S  S!\#\\\\\\5	      r6g)"    )AnyN   )ConfigMixinregister_to_config)FluxTransformer2DLoadersMixinFromOriginalModelMixinPeftAdapterMixin)apply_lora_scale	deprecatelogging)is_torch_npu_available)maybe_allow_in_graph   )AttentionMixinFeedForward)
CacheMixin)FluxPosEmbedPixArtAlphaTextProjection	Timestepsget_timestep_embedding)Transformer2DModelOutput)
ModelMixin)CombinedTimestepLabelEmbeddingsFP32LayerNormRMSNorm   )FluxAttentionFluxAttnProcessorc                   `  ^  \ rS rSrSrSS\S\S-  4U 4S jjjr    SS\R                  S\R                  S-  S	\R                  S-  S
\R                  S-  S\R                  S-  S\\R                  \R                  \R                  \R                  \R                  4   4S jjrSrU =r$ )ChromaAdaLayerNormZeroPruned'   
Norm layer adaptive layer norm zero (adaLN-Zero).

Parameters:
    embedding_dim (`int`): The size of each embedding vector.
    num_embeddings (`int`): The size of the embeddings dictionary.
Nembedding_dimnum_embeddingsc                    > [         TU ]  5         Ub  [        X!5      U l        OS U l        US:X  a  [        R
                  " USSS9U l        g US:X  a  [        USSS9U l        g [        SU S35      e)	N
layer_normFư>elementwise_affineepsfp32_layer_norm)r)   biasUnsupported `norm_type` (@) provided. Supported ones are: 'layer_norm', 'fp32_layer_norm'.)	super__init__r   embnn	LayerNormnormr   
ValueError)selfr#   r$   	norm_typer,   	__class__s        j/home/wildlama/miniconda3/lib/python3.13/site-packages/diffusers/models/transformers/transformer_chroma.pyr0   %ChromaAdaLayerNormZeroPruned.__init__0   sx    %6~UDHDH$]uRVWDI++%mTYZDI+I;6vw     xtimestepclass_labelshidden_dtyper1   returnc                     U R                   b  U R                  X#US9nUR                  SS5      R                  SSS9u  pgppU R                  U5      SUS S 2S 4   -   -  US S 2S 4   -   nXXU4$ )N)r?   r   r      dim)r1   flattenchunkr4   )r6   r<   r=   r>   r?   r1   	shift_msa	scale_msagate_msa	shift_mlp	scale_mlpgate_mlps               r9   forward$ChromaAdaLayerNormZeroPruned.forward@   s     88((8(MCILUVXYIZI`I`abhiI`IjF	h9IIaLA	!T' 223i46HHI(::r;   )r1   r4   )Nr&   T)NNNN)__name__
__module____qualname____firstlineno____doc__intr0   torchTensor
LongTensordtypetuplerM   __static_attributes____classcell__r8   s   @r9   r    r    '   s    c 3:  & )-04+/#';<<; ,,%; &&-	;
 kkD(; \\D ; 
u||U\\5<<u||S	T; ;r;   r    c                      ^  \ rS rSrSrSS\4U 4S jjjr SS\R                  S\R                  S-  S\	\R                  \R                  \R                  \R                  \R                  4   4S	 jjr
S
rU =r$ )"ChromaAdaLayerNormZeroSinglePrunedO   r"   r#   c                    > [         TU ]  5         US:X  a  [        R                  " USSS9U l        g [        SU S35      e)Nr&   Fr'   r(   r-   r.   )r/   r0   r2   r3   r4   r5   )r6   r#   r7   r,   r8   s       r9   r0   +ChromaAdaLayerNormZeroSinglePruned.__init__X   sE    $]uRVWDI+I;6vw r;   Nr<   r1   r@   c                     UR                  SS5      R                  SSS9u  p4nU R                  U5      SUS S 2S 4   -   -  US S 2S 4   -   nX4$ )Nr   r   r   rC   )rE   rF   r4   )r6   r<   r1   rG   rH   rI   s         r9   rM   *ChromaAdaLayerNormZeroSinglePruned.forwardb   s_    
 *-Q):)@)@)@)J&	hIIaLA	!T' 223i46HH{r;   r4   )r&   TN)rO   rP   rQ   rR   rS   rT   r0   rU   rV   rY   rM   rZ   r[   r\   s   @r9   r^   r^   O   su    c   $(<< \\D  
u||U\\5<<u||S	T	 r;   r^   c                      ^  \ rS rSrSr    SS\S\4U 4S jjjrS\R                  S\R                  S\R                  4S	 jr	S
r
U =r$ )"ChromaAdaLayerNormContinuousPrunedl   aq  
Adaptive normalization layer with a norm layer (layer_norm or rms_norm).

Args:
    embedding_dim (`int`): Embedding dimension to use during projection.
    conditioning_embedding_dim (`int`): Dimension of the input condition.
    elementwise_affine (`bool`, defaults to `True`):
        Boolean flag to denote if affine transformation should be applied.
    eps (`float`, defaults to 1e-5): Epsilon factor.
    bias (`bias`, defaults to `True`): Boolean flag to denote if bias should be use.
    norm_type (`str`, defaults to `"layer_norm"`):
        Normalization layer to use. Values supported: "layer_norm", "rms_norm".
r#   conditioning_embedding_dimc                    > [         TU ]  5         US:X  a  [        R                  " XX55      U l        g US:X  a  [        XU5      U l        g [        SU 35      e)Nr&   rms_normzunknown norm_type )r/   r0   r2   r3   r4   r   r5   )r6   r#   ri   r)   r*   r,   r7   r8   s          r9   r0   +ChromaAdaLayerNormContinuousPruned.__init__{   sU     	$]9KRDI*$4FGDI1)=>>r;   r<   r1   r@   c                     [         R                  " UR                  SS5      R                  UR                  5      SSS9u  p4U R                  U5      SU-   S S 2S S S 24   -  US S 2S S S 24   -   nU$ )Nr   r   rC   )rU   rF   rE   torX   r4   )r6   r<   r1   shiftscales        r9   rM   *ChromaAdaLayerNormContinuousPruned.forward   sf    {{3;;q!#4#7#7#@!KIIaLAIq$z22U1dA:5FFr;   rd   )Tgh㈵>Tr&   )rO   rP   rQ   rR   rS   rT   r0   rU   rV   rM   rZ   r[   r\   s   @r9   rg   rg   l   s]    .  ?? %(? ?, ELL U\\  r;   rg   c                   n   ^  \ rS rSrS\S\4U 4S jjrS\R                  S\R                  4S jrSr	U =r
$ )	(ChromaCombinedTimestepTextProjEmbeddings   num_channelsout_dimc           
         > [         TU ]  5         [        USSS9U l        [        USSS9U l        U R                  S[        [        R                  " U5      S-  SU-  SSS9SS	9  g )
NTr   )ru   flip_sin_to_cosdownscale_freq_shiftmod_proj  r   )rx   ry   F)
persistent)	r/   r0   r   	time_projguidance_projregister_bufferr   rU   arange)r6   ru   rv   r8   s      r9   r0   1ChromaCombinedTimestepTextProjEmbeddings.__init__   ss    "dijk&LRVmno"W%,a,.>PTkl  	 	
r;   r=   r@   c                    U R                   R                  S   nUR                  S   nU R                  U5      R                  UR                  S9nU R                  [        R                  " S/U-  5      5      R                  UR                  UR                  S9nU R                   R                  UR                  UR                  S9R                  USS5      n[        R                  " XE/SS9R                  S5      R                  SUS5      n[        R                  " Xv/SS9nUR                  UR                  5      $ )Nr   )rX   )rX   devicer   rC   )rz   shaper}   rn   rX   r~   rU   tensorr   repeatcat	unsqueeze)	r6   r=   mod_index_length
batch_sizetimesteps_projr~   rz   timestep_guidance	input_vecs	            r9   rM   0ChromaCombinedTimestepTextProjEmbeddings.forward   s   ==..q1^^A&
1448>>4J**5<<j8H+IJMM.. N 
 ==##.*>*>~G\G\#]ddeoqrtuvII~51=GGJQQRSUeghi 	 II0;D	||HNN++r;   )r~   r}   )rO   rP   rQ   rR   rT   r0   rU   rV   rM   rZ   r[   r\   s   @r9   rs   rs      s6    
S 
3 
, , , ,r;   rs   c            	       F   ^  \ rS rSrS	S\S\S\S\4U 4S jjjrS rSrU =r$ )
ChromaApproximator   in_dimrv   
hidden_dimn_layersc                   > [         TU ]  5         [        R                  " XSS9U l        [        R
                  " [        U5       Vs/ s H  n[        X3SS9PM     sn5      U l        [        R
                  " [        U5       Vs/ s H  n[        R                  " U5      PM     sn5      U l
        [        R                  " X25      U l        g s  snf s  snf )NTr,   silu)act_fn)r/   r0   r2   Linearin_proj
ModuleListranger   layersr   normsout_proj)r6   r   rv   r   r   _r8   s         r9   r0   ChromaApproximator.__init__   s    yy$?mmW\]eWfgWfRS&zfMWfg
 ]]E(O#TOqBJJz$:O#TU
		*6 h#Ts   C Cc                     U R                  U5      n[        U R                  U R                  5       H  u  p#X" U" U5      5      -   nM     U R	                  U5      $ re   )r   zipr   r   r   )r6   r<   layerr   s       r9   rM   ChromaApproximator.forward   sM    LLOTZZ8LEE%(O#A 9 }}Qr;   )r   r   r   r   )   )	rO   rP   rQ   rR   rT   r0   rM   rZ   r[   r\   s   @r9   r   r      s3    7s 7S 7c 7S 7 7   r;   r   c                     ^  \ rS rSr SS\S\S\S\4U 4S jjjr   SS\R                  S	\R                  S
\	\R                  \R                  4   S-  S\R                  S-  S\
\\4   S-  S\R                  4S jjrSrU =r$ )ChromaSingleTransformerBlock   rD   num_attention_headsattention_head_dim	mlp_ratioc                   > [         TU ]  5         [        X-  5      U l        [	        U5      U l        [        R                  " XR                  5      U l        [        R                  " SS9U l
        [        R                  " XR                  -   U5      U l        [        5       (       a  SSKJn  Sn[        SSU5        U" 5       nO
[!        5       n[#        UUUUSUS	SS
9U l        g )Ntanh)approximater   )FluxAttnProcessor2_0_NPUzDefaulting to FluxAttnProcessor2_0_NPU for NPU devices will be removed. Attention processors should be set explicitly using the `set_attn_processor` method.npu_processorz0.34.0Tr'   )	query_dimdim_headheadsrv   r,   	processorr*   pre_only)r/   r0   rT   mlp_hidden_dimr^   r4   r2   r   proj_mlpGELUact_mlpproj_outr   attention_processorr   r   r   r   attn)	r6   rD   r   r   r   r   deprecation_messager   r8   s	           r9   r0   %ChromaSingleTransformerBlock.__init__   s     	!#/26s;			#':':;ww62		#(;(;";SA!##FR   ox1DE02I)+I!'%	
	r;   Nhidden_statestembimage_rotary_embattention_maskjoint_attention_kwargsr@   c                    UnU R                  XS9u  pxU R                  U R                  U5      5      n	U=(       d    0 nUb  US S 2S S S S 24   US S 2S S S 2S 4   -  nU R                  " SUUUS.UD6n
[        R
                  " X/SS9nUR                  S5      nXR                  U5      -  nXa-   nUR                  [        R                  :X  a  UR                  SS5      nU$ )	Nr1   )r   r   r   r   rC   r        )r4   r   r   r   rU   r   r   r   rX   float16clip)r6   r   r   r   r   r   residualnorm_hidden_statesgatemlp_hidden_statesattn_outputs              r9   rM   $ChromaSingleTransformerBlock.forward   s     !#'99]9#E  LL7I)JK!7!=2%+AtT1,<=qRVXY[_O_@``Nii 
,-)
 %	
 		;"BJ~~a }}];; 0%--/)..vu=Mr;   )r   r   r   r4   r   r   )g      @NNN)rO   rP   rQ   rR   rT   floatr0   rU   rV   rY   dictstrr   rM   rZ   r[   r\   s   @r9   r   r      s     #
#
 !#
  	#

 #
 #
R FJ.28<|| ll  ell :;dB	
 t+ !%S#X 5 
 r;   r   c                   H  ^  \ rS rSr  SS\S\S\S\S\4
U 4S jjjr   SS	\R                  S
\R                  S\R                  S\
\R                  \R                  4   S-  S\R                  S-  S\\\4   S-  S\
\R                  \R                  4   4S jjrSrU =r$ )ChromaTransformerBlocki  rD   r   r   qk_normr*   c                 F  > [         TU ]  5         [        U5      U l        [        U5      U l        [        UUUUUSS[        5       US9	U l        [        R                  " USSS9U l
        [        XSS9U l        [        R                  " USSS9U l        [        XSS9U l        g )NFT)	r   added_kv_proj_dimr   r   rv   context_pre_onlyr,   r   r*   r'   r(   zgelu-approximate)rD   dim_outactivation_fn)r/   r0   r    norm1norm1_contextr   r   r   r2   r3   norm2r   ffnorm2_context
ff_context)r6   rD   r   r   r   r*   r8   s         r9   r0   ChromaTransformerBlock.__init__  s     	1#6
9#>!!'%"')

	 \\#%TJ
#BTU\\#%TR%#J\]r;   Nr   encoder_hidden_statesr   r   r   r   r@   c                    US S 2S S24   US S 2SS 24   pU R                  XS9u  ppnU R                  X(S9u  pnnnU=(       d    0 nUb  US S 2S S S S 24   US S 2S S S 2S 4   -  nU R                  " S	U	UUUS.UD6n[        U5      S:X  a  Uu  nnO[        U5      S:X  a  Uu  nnnU
R	                  S5      W-  nUU-   nU R                  U5      n	U	SUS S 2S 4   -   -  US S 2S 4   -   n	U R                  U	5      nUR	                  S5      U-  nUU-   n[        U5      S:X  a  UW-   nUR	                  S5      W-  nUU-   nU R                  U5      nUSUS S 2S 4   -   -  US S 2S 4   -   nU R                  U5      nUUR	                  S5      U-  -   nUR                  [        R                  :X  a  UR                  SS5      nX!4$ )
NrB   r   )r   r   r   r   r   r   r   r   r   r   )r   r   r   lenr   r   r   r   r   rX   rU   r   r   )r6   r   r   r   r   r   r   temb_imgtemb_txtr   rI   rJ   rK   rL   norm_encoder_hidden_states
c_gate_msac_shift_mlpc_scale_mlp
c_gate_mlpattention_outputsr   context_attn_outputip_attn_output	ff_outputcontext_ff_outputs                            r9   rM   ChromaTransformerBlock.forward3  s\    "!RaR%[$q!"u+(GKzzR_zGnDiHW[WiWi! Xj X
T"[* "8!=2%+AtT1,<=qRVXY[_O_@``N !II 
,"<-)	

 %
  !Q&/@,K,"#q(?P<K,n ((+k9%3!ZZ6/1yD7I3IJYWXZ^W^M__GG./	&&q)I5	%	1 !Q&)N:M )22158KK 58K K%)%7%78M%N"%?1{STVZSZG[C[%\_jklnrkr_s%s" OO,FG 5
8L8LQ8ORc8c c &&%--7$9$>$>vu$M!$33r;   )r   r   r   r   r   r   r   )rk   r'   r   )rO   rP   rQ   rR   rT   r   r   r0   rU   rV   rY   r   r   rM   rZ   r[   r\   s   @r9   r   r     s     "^^ !^  	^
 ^ ^ ^F FJ.28<<4||<4  %||<4 ll	<4
  ell :;dB<4 t+<4 !%S#X 5<4 
u||U\\)	*<4 <4r;   r   c                     ^  \ rS rSrSrSrSS/rSS/rSS/r\	            S#S	\
S
\
S\
S-  S\
S\
S\
S\
S\
S\\
S4   S\
S\
S\
4U 4S jjj5       r\" S5                S$S\R                  S\R                  S\R                   S\R                  S\R                  S\R                  S\\\4   S-  S\S\S \R                  \-  4S! jj5       rS"rU =r$ )%ChromaTransformer2DModelir  a  
The Transformer model introduced in Flux, modified for Chroma.

Reference: https://huggingface.co/lodestones/Chroma1-HD

Args:
    patch_size (`int`, defaults to `1`):
        Patch size to turn the input data into small patches.
    in_channels (`int`, defaults to `64`):
        The number of channels in the input.
    out_channels (`int`, *optional*, defaults to `None`):
        The number of channels in the output. If not specified, it defaults to `in_channels`.
    num_layers (`int`, defaults to `19`):
        The number of layers of dual stream DiT blocks to use.
    num_single_layers (`int`, defaults to `38`):
        The number of layers of single stream DiT blocks to use.
    attention_head_dim (`int`, defaults to `128`):
        The number of dimensions to use for each attention head.
    num_attention_heads (`int`, defaults to `24`):
        The number of attention heads to use.
    joint_attention_dim (`int`, defaults to `4096`):
        The number of dimensions to use for the joint attention (embedding/channel dimension of
        `encoder_hidden_states`).
    axes_dims_rope (`tuple[int]`, defaults to `(16, 56, 56)`):
        The dimensions to use for the rotary positional embeddings.
Tr   r   	pos_embedr4   N
patch_sizein_channelsout_channels
num_layersnum_single_layersr   r   joint_attention_dimaxes_dims_rope.approximator_num_channelsapproximator_hidden_dimapproximator_layersc                 l  > [         TU ]  5         U=(       d    UU l        Xv-  U l        [	        SU	S9U l        [        U
S-  SU-  SU-  -   S-   S9U l        [        U
U R                  UUS9U l	        [        R                  " XR                  5      U l        [        R                  " X R                  5      U l        [        R                  " [        U5       Vs/ s H  n[!        U R                  UUS	9PM     sn5      U l        [        R                  " [        U5       Vs/ s H  n[%        U R                  UUS	9PM     sn5      U l        [)        U R                  U R                  S
SS9U l        [        R                  " U R                  X-  U R                  -  SS9U l        S
U l        g s  snf s  snf )Ni'  )thetaaxes_dim   r      r   )ru   rv   )r   rv   r   r   )rD   r   r   Fr'   r(   Tr   )r/   r0   r   	inner_dimr   r   rs   time_text_embedr   distilled_guidance_layerr2   r   context_embedder
x_embedderr   r   r   transformer_blocksr   single_transformer_blocksrg   norm_outr   gradient_checkpointing)r6   r   r   r   r   r   r   r   r   r   r   r  r  r   r8   s                 r9   r0   !ChromaTransformer2DModel.__init__  s     	(7K,A%ENKG2a7))EJ,>>B 
 );,NN.(	)
% !#		*=~~ N))K@"$-- z* +A '(;'9
 +	#
 *, 01 2A -(;'9
 2	*
& ;NNDNNu$
 		$..*2IDL]L]2]dhi&+#5s   F, F1r   r   r   r=   img_idstxt_idsr   return_dictcontrolnet_blocks_repeatr@   c           
         U R                  U5      nUR                  UR                  5      S-  nU R                  U5      nU R	                  U5      nU R                  U5      nUR                  S:X  a  [        R                  S5        US   nUR                  S:X  a  [        R                  S5        US   n[        R                  " XT4SS9nU R                  U5      nUb;  SU;   a5  UR                  S5      nU R                  U5      nUR                  S	U05        [        U R                   5       GH4  u  nnS[#        U R$                  5      -  nUS
[#        U R                   5      -  -   nUS
U-  -   nUS
U-  -   n[        R                  " USS2UUS
-   24   USS2UUS
-   24   4SS9n[        R&                  " 5       (       a(  U R(                  (       a  U R+                  UXUX5      u  p!OU" UUUUUUS9u  p!Uc  M  [#        U R                   5      [#        U5      -  n[-        [.        R0                  " U5      5      nU(       a  XU[#        U5      -     -   nGM*  XUU-     -   nGM7     [        R                  " X!/SS9n[        U R$                  5       H  u  nnSU-  nUSS2UUS-   24   n[        R&                  " 5       (       a&  U R(                  (       a  U R+                  UUUU5      nO
U" UUUUUS9nU	c  Mh  [#        U R$                  5      [#        U	5      -  n[-        [.        R0                  " U5      5      nUSS2UR2                  S   S2S4   U	UU-     -   USS2UR2                  S   S2S4'   M     USS2UR2                  S   S2S4   nUSS2SS24   nU R5                  UU5      nU R7                  U5      nU
(       d  U4$ [9        US9$ )a$  
The [`FluxTransformer2DModel`] forward method.

Args:
    hidden_states (`torch.Tensor` of shape `(batch_size, image_sequence_length, in_channels)`):
        Input `hidden_states`.
    encoder_hidden_states (`torch.Tensor` of shape `(batch_size, text_sequence_length, joint_attention_dim)`):
        Conditional embeddings (embeddings computed from the input conditions such as prompts) to use.
    timestep ( `torch.LongTensor`):
        Used to indicate denoising step.
    block_controlnet_hidden_states: (`list` of `torch.Tensor`):
        A list of tensors that if specified are added to the residuals of transformer blocks.
    joint_attention_kwargs (`dict`, *optional*):
        A kwargs dictionary that if specified is passed along to the `AttentionProcessor` as defined under
        `self.processor` in
        [diffusers.models.attention_processor](https://github.com/huggingface/diffusers/blob/main/src/diffusers/models/attention_processor.py).
    return_dict (`bool`, *optional*, defaults to `True`):
        Whether or not to return a [`~models.transformer_2d.Transformer2DModelOutput`] instead of a plain
        tuple.

Returns:
    If `return_dict` is True, an [`~models.transformer_2d.Transformer2DModelOutput`] is returned, otherwise a
    `tuple` where the first element is the sample tensor.
r{   r   zrPassing `txt_ids` 3d torch.Tensor is deprecated.Please remove the batch dimension and pass it as a 2d torch Tensorr   zrPassing `img_ids` 3d torch.Tensor is deprecated.Please remove the batch dimension and pass it as a 2d torch TensorrC   Nip_adapter_image_embedsip_hidden_statesrB   r   )r   r   r   r   r   r   )r   r   r   r   r   .)sample)r  rn   rX   r	  r
  r  ndimloggerwarningrU   r   r   popencoder_hid_projupdate	enumerater  r   r  is_grad_enabledr  _gradient_checkpointing_funcrT   npceilr   r  r   r   )r6   r   r   r=   r  r  r   r   controlnet_block_samplescontrolnet_single_block_samplesr  r  r   pooled_tembidsr   r  r  index_blockblock
img_offset
txt_offsetimg_modulationtext_modulationr   interval_control	start_idxoutputs                               r9   rM    ChromaTransformer2DModel.forward  s    P 6;;}223d:((2	33I> $ 5 56K L<<1NNU ajG<<1NNU ajGii*2>>#.!-2KOe2e&<&@&@AZ&[##445LM"))+=?O*PQ"+D,C,C"DKS!?!?@@J#a#d.E.E*F&FFJ'!k/9N(1{?:O99>NQ4F#F FG?_q5H#H HI D $$&&4+F+F7;7X7X=GW84%}
 8="/*?%5#1+A84% (3#&t'>'>#?#F^B_#_ #&rww/?'@#A +%sSkOlAl(mm " %2[\lMl4m$mMK #EL 		#8"HaP"+D,J,J"KKKIq)i!m";;<D$$&&4+F+F $ A A!$	! !&"/%5#1+A! /:#&t'E'E#FMlIm#m #&rww/?'@#A !!%:%@%@%C%Es"JK5kEU6UVW a!6!<!<Q!?!A3FG1 #L: &a)>)D)DQ)G)I3&NO1bc6"mT:}-9'v66r;   )r  r
  r  r  r  r   r   r   r  r	  r  r  )r   @   N   &         i   )   8   r:  r4  i   r   )
NNNNNNNNTF)rO   rP   rQ   rR   rS    _supports_gradient_checkpointing_no_split_modules_repeated_blocks _skip_layerwise_casting_patternsr   rT   rY   r0   r
   rU   rV   rW   r   r   r   boolr   rM   rZ   r[   r\   s   @r9   r   r   r  s   6 (,$13QR02PQ(3V'<$ #'!#"%#%#'*6)+'+#$>,>, >, Dj	>,
 >, >,  >, !>, !>, c3h>, $'>, "%>, !>, >,@ ./ /3%) $ $'+8<!%(, ).S7||S7  %||S7 ""	S7
 S7 S7 S7 !%S#X 5S7 S7 #'S7 
0	0S7 0S7r;   r   )7typingr   numpyr$  rU   torch.nnr2   configuration_utilsr   r   loadersr   r   r	   utilsr
   r   r   utils.import_utilsr   utils.torch_utilsr   	attentionr   r   cache_utilsr   
embeddingsr   r   r   r   modeling_outputsr   modeling_utilsr   normalizationr   r   r   transformer_fluxr   r   
get_loggerrO   r  Moduler    r^   rg   rs   r   r   r   r   r   r;   r9   <module>rQ     s        B ^ ^ 9 9 8 5 3 $ c c 7 ' S S > 
		H	%%;299 %;P :) )X,ryy ,@   & D299 D DN [4RYY [4 [4|~7!~7r;   