
    
3jU                     z   S SK r S SKJs  Jr  S SK Jr  SSKJrJr  SSKJ	r	  SSK
Jr  SSKJrJr  SS	KJrJrJr  SS
KJr  SSKJr  SSKJrJrJr  \	R6                  " \5      r " S S\R<                  5      r " S S\R<                  5      r  " S S5      r!\ " S S\R<                  5      5       r" " S S\\5      r#g)    N)nn   )ConfigMixinregister_to_config)logging)maybe_allow_in_graph   )	AttentionFeedForward)TimestepEmbedding	Timestepsget_3d_rotary_pos_embed)Transformer2DModelOutput)
ModelMixin)AdaLayerNormFP32LayerNormRMSNormc                     ^  \ rS rSr    SS\S\S\S\S\S\SS	4U 4S
 jjjrS\	R                  S\	R                  S\	R                  S\\	R                  \	R                  \	R                  \	R                  4   4S jrSrU =r$ )EasyAnimateLayerNormZero!   conditioning_dimembedding_dimelementwise_affineepsbias	norm_typereturnNc                   > [         TU ]  5         [        R                  " 5       U l        [        R
                  " USU-  US9U l        US:X  a  [        R                  " X#US9U l        g US:X  a  [        X#US9U l        g [        SU S35      e)N   )r   
layer_normr   r   fp32_layer_normzUnsupported `norm_type` (z@) provided. Supported ones are: 'layer_norm', 'fp32_layer_norm'.)super__init__r   SiLUsiluLinearlinear	LayerNormnormr   
ValueError)selfr   r   r   r   r   r   	__class__s          o/home/wildlama/miniconda3/lib/python3.13/site-packages/diffusers/models/transformers/transformer_easyanimate.pyr$   !EasyAnimateLayerNormZero.__init__"   s     	GGI	ii 0!m2C$O$]_bcDI++%m`cdDI+I;6vw     hidden_statesencoder_hidden_statestembc                 N   U R                  U R                  U5      5      R                  SSS9u  pEpgpU R                  U5      SUR	                  S5      -   -  UR	                  S5      -   nU R                  U5      SUR	                  S5      -   -  UR	                  S5      -   nXXi4$ )Nr      dim)r(   r&   chunkr*   	unsqueeze)
r,   r1   r2   r3   shiftscalegate	enc_shift	enc_scaleenc_gates
             r.   forward EasyAnimateLayerNormZero.forward9   s     >B[[SW=Y=_=_`agh=_=i:dy		-0A8J4JKeoo^_N`` $		*? @A	H[H[\]H^D^ _bkbubuc
 !
 TCCr0   )r(   r*   r&   )Th㈵>Tr"   )__name__
__module____qualname____firstlineno__intboolfloatstrr$   torchTensortupler@   __static_attributes____classcell__r-   s   @r.   r   r   !   s    
 $(*  !	
    
 .D"\\DBG,,DV[VbVbD	u||U\\5<<E	FD Dr0   r   c                   ~   ^  \ rS rSrS\S\\   SS4U 4S jjrS rS\R                  S\R                  4S	 jr
S
rU =r$ )EasyAnimateRotaryPosEmbedD   
patch_sizerope_dimr   Nc                 :   > [         TU ]  5         Xl        X l        g N)r#   r$   rT   rU   )r,   rT   rU   r-   s      r.   r$   "EasyAnimateRotaryPosEmbed.__init__E   s    $ r0   c                    UnUnUu  pgXg-  nXU-  :  a  Un	[        [        XV-  U-  5      5      n
OUn
[        [        XG-  U-  5      5      n	[        [        XY-
  S-  5      5      n[        [        XJ-
  S-  5      5      nX4X-   X-   44$ )Ng       @)rG   round)r,   src	tgt_width
tgt_heighttwthhwrresize_heightresize_widthcrop_top	crop_lefts                r.   get_resize_crop_region_for_grid9EasyAnimateRotaryPosEmbed.get_resize_crop_region_for_gridK   s    ER=MuRVaZ01LLbfqj 12Mub0C789r0C789	$x'?AY&ZZZr0   r1   c                    UR                  5       u  p#pEnXPR                  -  nX`R                  -  nSU R                  -  nSU R                  -  nU R                  XV4Xx5      n	[        U R                  U	XV4UR                  S5      SS9n
U
$ )NZ   <   r	   T)	grid_sizetemporal_sizeuse_real)sizerT   rg   r   rU   )r,   r1   bsc
num_framesgrid_height
grid_widthbase_size_widthbase_size_heightgrid_crops_coordsimage_rotary_embs              r.   r@   !EasyAnimateRotaryPosEmbed.forward\   s    5B5G5G5I2z
!__4??2
/0 @@%
 3MM"/',,Q/
  r0   )rT   rU   )rC   rD   rE   rF   rG   listr$   rg   rK   rL   r@   rN   rO   rP   s   @r.   rR   rR   D   sG    !3 !$s) ! ![" U\\  ell    r0   rR   c                       \ rS rSrSrS r  SS\S\R                  S\R                  S\R                  S-  S	\R                  S-  S
\R                  4S jjr	Sr
g)EasyAnimateAttnProcessor2_0p   z
Processor for implementing scaled dot-product attention (enabled by default if you're using PyTorch 2.0). This is
used in the EasyAnimateTransformer3DModel model.
c                 D    [        [        S5      (       d  [        S5      eg )Nscaled_dot_product_attentionzaEasyAnimateAttnProcessor2_0 requires PyTorch 2.0 or above. To use it, please install PyTorch 2.0.)hasattrFImportError)r,   s    r.   r$   $EasyAnimateAttnProcessor2_0.__init__v   s%    q899s  :r0   Nattnr1   r2   attention_maskrx   r   c           	      0   UR                   c  Ub  [        R                  " X2/SS9nUR                  U5      nUR	                  U5      nUR                  U5      nUR                  SUR                  S45      R                  SS5      nUR                  SUR                  S45      R                  SS5      nUR                  SUR                  S45      R                  SS5      nUR                  b  UR                  U5      nUR                  b  UR                  U5      nUR                   Gb?  UGb;  UR                  U5      n	UR                  U5      n
UR                  U5      nU	R                  SUR                  S45      R                  SS5      n	U
R                  SUR                  S45      R                  SS5      n
UR                  SUR                  S45      R                  SS5      nUR                  b  UR                  U	5      n	UR                  b  UR                  U
5      n
[        R                  " X/SS9n[        R                  " X/SS9n[        R                  " X/SS9nUb  SSKJn  U" US S 2S S 2UR"                  S   S 24   U5      US S 2S S 2UR"                  S   S 24'   UR$                  (       d;  U" US S 2S S 2UR"                  S   S 24   U5      US S 2S S 2UR"                  S   S 24'   [&        R(                  " XgXSSS9nUR                  SS5      R+                  SS	5      nUR-                  UR.                  5      nUb  US S 2S UR"                  S   24   US S 2UR"                  S   S 24   p#[1        US
S 5      b*  UR2                  S   " U5      nUR2                  S   " U5      n[1        USS 5      b  UR5                  U5      nX#4$ [1        US
S 5      b*  UR2                  S   " U5      nUR2                  S   " U5      nX#4$ )Nr5   r6   r	   )apply_rotary_emb        F)	attn_mask	dropout_p	is_causalr   to_outr   
to_add_out)
add_q_projrK   catto_qto_kto_v	unflattenheads	transposenorm_qnorm_k
add_k_proj
add_v_projnorm_added_qnorm_added_k
embeddingsr   shapeis_cross_attentionr   r   flattentodtypegetattrr   r   )r,   r   r1   r2   r   rx   querykeyvalueencoder_queryencoder_keyencoder_valuer   s                r.   __call__$EasyAnimateAttnProcessor2_0.__call__|   s    ??"'<'H!II'<&LRSTM 		-(ii&		-(DJJ#34>>q!DmmA

B/0::1a@DJJ#34>>q!D ;;"KK&E;;"++c"C ??&+@+L OO,ABM//*?@K OO,ABM)33A

B7GHRRSTVWXM%//DJJ3CDNNqRSTK)33A

B7GHRRSTVWXM  , $ 1 1- @  ,"//<II}4!<E))[.A6CII}4!<E'5<La177:<<=?O=E!Q-33A6889 **>N1399!<>>?AQ?Aq/55a8::;
 663RW
 &//15==aC%((5 !,a!A#8#>#>q#A!AABa!6!<!<Q!?!AAB $1
 tXt,8 $A} = $A} =t\40<(,8M(N% 33	 tXt,8 $A} = $A} =33r0    )NN)rC   rD   rE   rF   __doc__r$   r
   rK   rL   r   rN   r   r0   r.   r|   r|   p   s~    
 /304Q4Q4 ||Q4  %||	Q4
 t+Q4  ,,-Q4 
Q4 Q4r0   r|   c                   R  ^  \ rS rSr           SS\S\S\S\S\S\S	\S
\S\S\S-  S\S\S\S\S\4U 4S jjjr SS\	R                  S\	R                  S\	R                  S\\	R                  \	R                  4   S-  S\\	R                  \	R                  4   4
S jjrSrU =r$ )EasyAnimateTransformerBlock   Nr7   num_attention_headsattention_head_dimtime_embed_dimdropoutactivation_fnnorm_elementwise_affinenorm_epsfinal_dropoutff_inner_dimff_biasqk_norm
after_normr   is_mmdit_blockc                 ~  > [         TU ]  5         [        XAXxUSS9U l        [	        UUUU(       a  SOS SSSU(       a  UOS U(       a  SOS [        5       S9
U l        [        XAXxUSS9U l        [        UUUU	U
US9U l	        S U l
        U(       a  [        UUUU	U
US9U l
        S U l        U(       a  [        XUS9U l        g g )	NT)r   r   r    ư>F)
	query_dimdim_headr   r   r   r   added_proj_biasadded_kv_proj_dimcontext_pre_only	processor)r   r   r   	inner_dimr   r!   )r#   r$   r   norm1r
   r|   attn1norm2r   fftxt_ffnorm3r   )r,   r7   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r-   s                   r.   r$   $EasyAnimateTransformerBlock.__init__   s    $ 	 .!8i^b

 '%$+L %3c&4U$13

 .!8i^b

 ''"
 %++&DK 
&s\deDJ r0   r1   r2   r3   rx   r   c                    U R                  XU5      u  pVpxU R                  UUUS9u  pXR                  S5      U	-  -   nX(R                  S5      U
-  -   nU R                  XU5      u  pVpU R                  bo  U R	                  U R                  U5      5      nU R                  b!  U R	                  U R                  U5      5      nObU R	                  U R                  U5      5      nOAU R                  U5      nU R                  b  U R                  U5      nOU R                  U5      nXR                  S5      U-  -   nX,R                  S5      U-  -   nX4$ )N)r1   r2   rx   r5   )r   r   r9   r   r   r   r   )r,   r1   r2   r3   rx   norm_hidden_statesnorm_encoder_hidden_statesgate_msaenc_gate_msaattn_hidden_statesattn_encoder_hidden_statesgate_ffenc_gate_ffs                r.   r@   #EasyAnimateTransformerBlock.forward  so    RVQ[Q[$R
N :>,"<- :D :
6
 &(:(:1(=@R(RR 58N8Nq8QTn8n n PTzz$P
L ::!!%DGG4F,G!H{{&-1ZZD^8_-`*-1ZZ@Z8[-\*!%);!<{{&-1[[9S-T*-1WW5O-P*%(9(9!(<?Q(QQ 58M8Ma8PSm8m m33r0   )r   r   r   r   r   r   )r   gelu-approximateTr   TNTTFr"   TrW   )rC   rD   rE   rF   rG   rI   rJ   rH   r$   rK   rL   rM   r@   rN   rO   rP   s   @r.   r   r      sa    /(,"#' *#!@f@f !@f  	@f
 @f @f @f "&@f @f @f Dj@f @f @f @f @f  !@f @fN FJ%4||%4  %||%4 ll	%4
  ell :;dB%4 
u||U\\)	*%4 %4r0   r   c            2         ^  \ rS rSrSrSrS/r/ SQr\                         S+S\	S\	S	\	S-  S
\	S-  S\	S-  S\	S\	S\
S\
S\	S\	S\	S\S\	S\S\	S\	S\S\S\S\
S\S\S\40U 4S jjj5       r      S,S \R                  S!\R                  S"\R                  S-  S#\R                  S-  S$\R                  S-  S%\R                  S-  S&\R                  S-  S'\S(\\R                     \-  4S) jjrS*rU =r$ )-EasyAnimateTransformer3DModeli<  a`	  
A Transformer model for video-like data in [EasyAnimate](https://github.com/aigc-apps/EasyAnimate).

Parameters:
    num_attention_heads (`int`, defaults to `48`):
        The number of heads to use for multi-head attention.
    attention_head_dim (`int`, defaults to `64`):
        The number of channels in each head.
    in_channels (`int`, defaults to `16`):
        The number of channels in the input.
    out_channels (`int`, *optional*, defaults to `16`):
        The number of channels in the output.
    patch_size (`int`, defaults to `2`):
        The size of the patches to use in the patch embedding layer.
    sample_width (`int`, defaults to `90`):
        The width of the input latents.
    sample_height (`int`, defaults to `60`):
        The height of the input latents.
    activation_fn (`str`, defaults to `"gelu-approximate"`):
        Activation function to use in feed-forward.
    timestep_activation_fn (`str`, defaults to `"silu"`):
        Activation function to use when generating the timestep embeddings.
    num_layers (`int`, defaults to `30`):
        The number of layers of Transformer blocks to use.
    mmdit_layers (`int`, defaults to `1000`):
        The number of layers of Multi Modal Transformer blocks to use.
    dropout (`float`, defaults to `0.0`):
        The dropout probability to use.
    time_embed_dim (`int`, defaults to `512`):
        Output dimension of timestep embeddings.
    text_embed_dim (`int`, defaults to `4096`):
        Input dimension of text embeddings from the text encoder.
    norm_eps (`float`, defaults to `1e-5`):
        The epsilon value to use in normalization layers.
    norm_elementwise_affine (`bool`, defaults to `True`):
        Whether to use elementwise affine in normalization layers.
    flip_sin_to_cos (`bool`, defaults to `True`):
        Whether to flip the sin to cos in the time embedding.
    time_position_encoding_type (`str`, defaults to `3d_rope`):
        Type of time position encoding.
    after_norm (`bool`, defaults to `False`):
        Flag to apply normalization after.
    resize_inpaint_mask_directly (`bool`, defaults to `True`):
        Flag to resize inpaint mask directly.
    enable_text_attention_mask (`bool`, defaults to `True`):
        Flag to enable text attention mask.
    add_noise_in_inpaint_model (`bool`, defaults to `False`):
        Flag to add noise in inpaint model.
Tr   )z^proj$r*   z
^proj_out$Nr   r   in_channelsout_channelsrT   sample_widthsample_heightr   timestep_activation_fn
freq_shift
num_layersmmdit_layersr   r   add_norm_text_encodertext_embed_dimtext_embed_dim_t5r   r   flip_sin_to_costime_position_encoding_typeresize_inpaint_mask_directlyenable_text_attention_maskadd_noise_in_inpaint_modelc                   > [         TU ]  5         X-  n[        UUU
5      U l        [	        UX5      U l        [        XR5      U l        [        R                  " UUXU4USS9U l
        S U l        S U l        U(       d<  [        R                  " UU5      U l        Ub  [        R                  " UU5      U l        Ow[        R                  " [        USSS9[        R                  " UU5      5      U l        Ub:  [        R                  " [        USSS9[        R                  " UU5      5      U l        [        R                   " [#        U5       Vs/ s H  n[%        UUUUUUUUUUU:  a  SOSS9
PM     sn5      U l        [        R(                  " UUU5      U l        [-        USU-  UUSS	9U l        [        R                  " UXU-  U-  5      U l        SU l        g s  snf )
NT)kernel_sizestrider   r   )r   F)
r7   r   r   r   r   r   r   r   r   r   r	   r5   )r   
output_dimr   r   	chunk_dim)r#   r$   r   	time_projr   time_embeddingrR   rope_embeddingr   Conv2dproj	text_projtext_proj_t5r'   
Sequentialr   
ModuleListranger   transformer_blocksr)   
norm_finalr   norm_outproj_outgradient_checkpointing)r,   r   r   r   r   rT   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   _r-   s                               r.   r$   &EasyAnimateTransformer3DModel.__init__s  s   : 	'<	 #9ozJ/	>b7
W II0HQ[bf
	
  $YY~yADN ,$&II.?$K!]]F		R`bkHlDN !,$&MMNDTJBIIVgirLs%!
 #%-- z* +A ,!(;'9#1#"/,C%)+,|+;4 +#
" ,,y(<ST %(9}$;
 		)Z-D|-ST&+#9s   
$Gr1   timesteptimestep_condr2   encoder_hidden_states_t5inpaint_latentscontrol_latentsreturn_dictr   c	           	      
   UR                  5       u  ppnU R                  R                  nX-  nX-  nU R                  U5      R	                  UR
                  S9nU R                  UU5      nU R                  U5      nUb  [        R                  " X/S5      nUb  [        R                  " X/S5      nUR                  SSSSS5      R                  SS5      nU R                  U5      nUR                  SU	S45      R                  SSSSS5      nUR                  SS5      R                  SS5      nU R                  U5      nUb5  U R!                  U5      n[        R"                  " XE/SS9R%                  5       nU R&                   HR  n[        R(                  " 5       (       a)  U R*                  (       a  U R-                  UXUU5      u  pMF  U" XUU5      u  pMT     U R/                  U5      nU R1                  UUS	9nU R3                  U5      nU R                  R                  nUR5                  XUUXU5      nUR                  SSSSS
SS5      R                  S
S5      R                  SS5      nU(       d  U4$ [7        US9$ )N)r   r5   r   r	   r      r   r6   )r3      r   )sample)ro   configrT   r   r   r   r   r   rK   concatpermuter   r   r   r   r   r   r   
contiguousr   is_grad_enabledr   _gradient_checkpointing_funcr   r   r   reshaper   )r,   r1   r  r  r2   r  r  r  r  
batch_sizechannelsvideo_lengthheightwidthppost_patch_heightpost_patch_widthr3   rx   blockoutputs                        r.   r@   %EasyAnimateTransformer3DModel.forward  s    =J<N<N<P9
lEKK"""K : ~~h'**1D1D*E""47..}= &!LL-)I1MM&!LL-)I1MM%--aAq!<DDQJ		-0%//J3CDLLq!Q
 &--a3==aC !%/D E#/'+'8'89Q'R$$)II/D._ef$g$r$r$t! ,,E$$&&4+F+F7;7X7X=GW844 8=!$@P844 - 6 m$?m4 KK""&&zARTdfnstu1aAq!4<<QBJJ1aP9'v66r0   )r   r   r   r   r   r   r   r   r   r   r   )0   @   NNNrj   rk   r   r&   r   r  r  r   i   Fi   NrB   TT3d_ropeFTTT)NNNNNT)rC   rD   rE   rF   r    _supports_gradient_checkpointing_no_split_modules _skip_layerwise_casting_patternsr   rG   rJ   rI   rH   r$   rK   rL   rM   r   r@   rN   rO   rP   s   @r.   r   r   <  sY   0d (,$67'G$ $&"$"&#'!%/&,!&+"!%(, $+4-1+/+/5W, W,  W, 4Z	W,
 DjW, $JW, W, W, W, !$W, W, W, W, W, W,   $!W," #W,$ %W,& 'W,( "&)W,* +W,, &)-W,0 '+1W,2 %)3W,4 %)5W, W,z .2598</3/3 @7||@7 ,,@7 ||d*	@7
  %||d2@7 #(,,"5@7 ,@7 ,@7 @7 
u||	7	7@7 @7r0   r   )$rK   torch.nn.functionalr   
functionalr   configuration_utilsr   r   utilsr   utils.torch_utilsr   	attentionr
   r   r   r   r   r   modeling_outputsr   modeling_utilsr   normalizationr   r   r   
get_loggerrC   loggerModuler   rR   r|   r   r   r   r0   r.   <module>r/     s         B  5 . N N 7 ' @ @ 
		H	% Dryy  DF) 		 ) X]4 ]4@ h4")) h4 h4VQ7J Q7r0   