
    
3j                       S SK JrJr  S SKrS SKJr  S SKJs  Jr  SSK	J
r
Jr  SSKJrJrJr  SSKJr  SSKJrJrJrJrJrJr  SS	KJrJrJr  SS
KJr  SSKJ r J!r!J"r"J#r#J$r$  \" 5       (       a  S SK%r&OSr&\RN                  " \(5      r) " S S5      r* " S S5      r+S\RX                  S\RZ                  S\.S\.4S jr/\ " S S\RX                  5      5       r0\ " S S\RX                  5      5       r1\ " S S\RX                  5      5       r2 " S S\RX                  5      r3\ " S S\RX                  5      5       r4 " S S \RX                  5      r5\ " S! S"\RX                  5      5       r6 " S# S$\RX                  5      r7g)%    )AnyCallableN   )	deprecatelogging)is_torch_npu_availableis_torch_xla_availableis_xformers_available)maybe_allow_in_graph   )GEGLUGELUApproximateGELUFP32SiLULinearActivationSwiGLU)	AttentionAttentionProcessorJointAttnProcessor2_0)SinusoidalPositionalEmbedding)AdaLayerNormAdaLayerNormContinuousAdaLayerNormZeroRMSNormSD35AdaLayerNormZeroXc                   `    \ rS rSr\S\\\4   4S j5       rS\\\\4   -  4S jr	S r
S rSrg	)
AttentionMixin'   returnc                    ^ 0 nS[         S[        R                  R                  S[        [         [
        4   4U4S jjmU R                  5        H  u  p#T" X#U5        M     U$ )z
Returns:
    `dict` of attention processors: A dictionary containing all attention processors used in the model with
    indexed by its weight name.
namemodule
processorsc                    > [        US5      (       a  UR                  5       X  S3'   UR                  5        H  u  p4T" U  SU 3XB5        M     U$ )Nget_processor
.processor.)hasattrr%   named_children)r!   r"   r#   sub_namechildfn_recursive_add_processorss        T/home/wildlama/miniconda3/lib/python3.13/site-packages/diffusers/models/attention.pyr,   CAttentionMixin.attn_processors.<locals>.fn_recursive_add_processors2   sZ    v//282F2F2H
V:./#)#8#8#:+tfAhZ,@%T $;     )strtorchnnModuledictr   r)   )selfr#   r!   r"   r,   s       @r-   attn_processorsAttentionMixin.attn_processors(   sb     
	c 	588?? 	X\]`bt]tXu 	 !//1LD'jA 2 r/   	processorc           	      d  ^ [        U R                  R                  5       5      n[        U[        5      (       a-  [        U5      U:w  a  [        S[        U5       SU SU S35      eS[        S[        R                  R                  4U4S jjmU R                  5        H  u  p4T" X4U5        M     g)	a  
Sets the attention processor to use to compute attention.

Parameters:
    processor (`dict` of `AttentionProcessor` or only `AttentionProcessor`):
        The instantiated processor class or a dictionary of processor classes that will be set as the processor
        for **all** `Attention` layers.

        If `processor` is a dict, the key needs to define the path to the corresponding cross attention
        processor. This is strongly recommended when setting trainable attention processors.

z>A dict of processors was passed, but the number of processors z0 does not match the number of attention layers: z. Please make sure to pass z processor classes.r!   r"   c                 
  > [        US5      (       aJ  [        U[        5      (       d  UR                  U5        O#UR                  UR	                  U  S35      5        UR                  5        H  u  p4T" U  SU 3XB5        M     g )Nset_processorr&   r'   )r(   
isinstancer4   r;   popr)   )r!   r"   r8   r*   r+   fn_recursive_attn_processors        r-   r>   FAttentionMixin.set_attn_processor.<locals>.fn_recursive_attn_processorU   ss    v//!)T22((3(($z7J)KL#)#8#8#:+tfAhZ,@%S $;r/   N)lenr6   keysr<   r4   
ValueErrorr0   r1   r2   r3   r)   )r5   r8   countr!   r"   r>   s        @r-   set_attn_processor!AttentionMixin.set_attn_processor@   s     D((--/0i&&3y>U+BPQTU^Q_P` a005w6QRWQXXkm 
	Tc 	T588?? 	T !//1LD'i@ 2r/   c                 H   U R                   R                  5        H3  u  pS[        UR                  R                  5      ;   d  M*  [        S5      e   U R                  5        H=  n[        U[        5      (       d  M  UR                  (       d  M-  UR                  5         M?     g)z
Enables fused QKV projections. For self-attention modules, all projection matrices (i.e., query, key, value)
are fused. For cross-attention modules, key and value projection matrices are fused.
AddedzQ`fuse_qkv_projections()` is not supported for models having added KV projections.N)r6   itemsr0   	__class____name__rB   modulesr<   AttentionModuleMixin_supports_qkv_fusionfuse_projections)r5   _attn_processorr"   s       r-   fuse_qkv_projections#AttentionMixin.fuse_qkv_projectionsb   s|    
 "&!5!5!;!;!=A#n66??@@ !tuu "> llnF&"677F<W<W<W'') %r/   c                     U R                  5        H=  n[        U[        5      (       d  M  UR                  (       d  M-  UR	                  5         M?     g)u]   Disables the fused QKV projection if enabled.

> [!WARNING] > This API is 🧪 experimental.
N)rK   r<   rL   rM   unfuse_projections)r5   r"   s     r-   unfuse_qkv_projections%AttentionMixin.unfuse_qkv_projectionso   s9    
 llnF&"677F<W<W<W))+ %r/    N)rJ   
__module____qualname____firstlineno__propertyr4   r0   r   r6   rD   rQ   rU   __static_attributes__rW   r/   r-   r   r   '   sP    c+=&=!>  . A,>cK]F]A^,^  AD*,r/   r   c                      \ rS rSrSr/ rSrSrS\SS4S jr	S(S\
SS	4S
 jjrS\4S jrS\
SS4S jr  S)S\
S\\S-  S4   S-  SS4S jjr S*S\
S\S-  SS4S jjr\R(                  " 5       S 5       r\R(                  " 5       S 5       rS\SS4S jrS\R2                  S\R2                  4S jrS+S\R2                  S\S\R2                  4S jjr S*S\R2                  S\R2                  S \R2                  S-  S\R2                  4S! jjr S+S \R2                  S"\S#\S\S\R2                  4
S$ jjrS%\R2                  S\R2                  4S& jrS'rg),rL   y   NTFr8   r   c                 j   [        U S5      (       a  [        U R                  [        R                  R
                  5      (       ai  [        U[        R                  R
                  5      (       d@  [        R                  SU R                   SU 35        U R                  R                  S5        Xl        g)zu
Set the attention processor to use.

Args:
    processor (`AttnProcessor`):
        The attention processor to use.
r8   z-You are removing possibly trained weights of z with N)
r(   r<   r8   r1   r2   r3   loggerinfo_modulesr=   )r5   r8   s     r-   r;   "AttentionModuleMixin.set_processor   sx     D+&&4>>588??;;y%((//::KKGGWW]^g]hijMMk*"r/   return_deprecated_lorar   c                 *    U(       d  U R                   $ g)z
Get the attention processor in use.

Args:
    return_deprecated_lora (`bool`, *optional*, defaults to `False`):
        Set to `True` to return the deprecated LoRA attention processor.

Returns:
    "AttentionProcessor": The attention processor in use.
N)r8   )r5   rd   s     r-   r%   "AttentionModuleMixin.get_processor   s     &>>! &r/   backendc                    SSK Jn  UR                  R                  5        Vs1 s H  o3R                  iM     nnX;  a"  [        SU< S3SR                  U5      -   5      eU" UR                  5       5      nXR                  l	        g s  snf )Nr   )AttentionBackendNamez	`backend=z ` must be one of the following: z, )
attention_dispatchri   __members__valuesvaluerB   joinlowerr8   _attention_backend)r5   rg   ri   xavailable_backendss        r-   set_attention_backend*AttentionModuleMixin.set_attention_backend   sw    </C/O/O/V/V/XY/X!gg/XY,z
*JKdiiXjNkkll&w}}7,3) Zs   Buse_npu_flash_attentionc                 h    U(       a  [        5       (       d  [        S5      eU R                  S5        g)z
Set whether to use NPU flash attention from `torch_npu` or not.

Args:
    use_npu_flash_attention (`bool`): Whether to use NPU flash attention or not.
ztorch_npu is not available_native_npuN)r   ImportErrorrs   )r5   ru   s     r-   set_use_npu_flash_attention0AttentionModuleMixin.set_use_npu_flash_attention   s*     #)++!">??""=1r/   use_xla_flash_attentionpartition_spec.c                 h    U(       a  [        5       (       d  [        S5      eU R                  S5        g)a  
Set whether to use XLA flash attention from `torch_xla` or not.

Args:
    use_xla_flash_attention (`bool`):
        Whether to use pallas flash attention kernel from `torch_xla` or not.
    partition_spec (`tuple[]`, *optional*):
        Specify the partition specification if using SPMD. Otherwise None.
    is_flux (`bool`, *optional*, defaults to `False`):
        Whether the model is a Flux model.
ztorch_xla is not available_native_xlaN)r	   rx   rs   )r5   r{   r|   is_fluxs       r-   set_use_xla_flash_attention0AttentionModuleMixin.set_use_xla_flash_attention   s*    " #)++!">??""=1r/   'use_memory_efficient_attention_xformersattention_opc                    U(       a  [        5       (       d
  [        SSS9e[        R                  R	                  5       (       d  [        S5      e [        5       (       aL  SnUb  Uu  pEUR                  tp6[        R                  " SSUS9n[        R                  R                  XwU5      nU R                  S5        gg! [         a  nUeSnAff = f)	ax  
Set whether to use memory efficient attention from `xformers` or not.

Args:
    use_memory_efficient_attention_xformers (`bool`):
        Whether to use memory efficient attention from `xformers` or not.
    attention_op (`Callable`, *optional*):
        The attention operation to use. Defaults to `None` which uses the default attention operation from
        `xformers`.
zeRefer to https://github.com/facebookresearch/xformers for more information on how to install xformersxformers)r!   zvtorch.cuda.is_available() should be True but is False. xformers' memory efficient attention is only available for GPU N)r   r   (   cudadevicedtype)r
   ModuleNotFoundErrorr1   r   is_availablerB   SUPPORTED_DTYPESrandnxopsopsmemory_efficient_attention	Exceptionrs   )	r5   r   r   r   op_fwop_bwrO   qes	            r-   +set_use_memory_efficient_attention_xformers@AttentionModuleMixin.set_use_memory_efficient_attention_xformers   s     3(**){#  ZZ,,.. / 

,.. $'3+7LE(-(>(>IE!KK
6O HH??aH **:61 3* ! Gs   AB> >
CC

Cc                 
   U R                   (       d-  [        R                  U R                  R                   S35        g[        U SS5      (       a  gU R                  R                  R                  R                  nU R                  R                  R                  R                  n[        U S5      (       Gak  U R                  (       GaY  [        R                  " U R                  R                  R                  U R                   R                  R                  /5      nUR"                  S   nUR"                  S   n[$        R&                  " XEU R(                  XS9U l        U R*                  R                  R-                  U5        [        U S	5      (       a  U R(                  (       ay  [        R                  " U R                  R.                  R                  U R                   R.                  R                  /5      nU R*                  R.                  R-                  U5        GO[        R                  " U R                  R                  R                  U R                  R                  R                  U R                   R                  R                  /5      nUR"                  S   nUR"                  S   n[$        R&                  " XEU R(                  XS9U l        U R0                  R                  R-                  U5        [        U S	5      (       a  U R(                  (       a  [        R                  " U R                  R.                  R                  U R                  R.                  R                  U R                   R.                  R                  /5      nU R0                  R.                  R-                  U5        [        U S
S5      Gb  [        U SS5      Gb  [        U SS5      Gb  [        R                  " U R2                  R                  R                  U R4                  R                  R                  U R6                  R                  R                  /5      nUR"                  S   nUR"                  S   n[$        R&                  " XEU R8                  XS9U l        U R:                  R                  R-                  U5        U R8                  (       a  [        R                  " U R2                  R.                  R                  U R4                  R.                  R                  U R6                  R.                  R                  /5      nU R:                  R.                  R-                  U5        SU l        g)zU
Fuse the query, key, and value projections into a single projection for efficiency.
zK does not support fusing QKV projections, so `fuse_projections` will no-op.Nfused_projectionsFis_cross_attentionr   r   )biasr   r   use_bias
add_q_proj
add_k_proj
add_v_projT)rM   r`   debugrI   rJ   getattrto_qweightdatar   r   r(   r   r1   catto_kto_vshaper2   Linearr   to_kvcopy_r   to_qkvr   r   r   added_proj_biasto_added_qkvr   )r5   r   r   concatenated_weightsin_featuresout_featuresconcatenated_biass          r-   rN   %AttentionModuleMixin.fuse_projections   s    ((LL>>**++vw  4,e44!!&&--		  %%++4-..43J3J3J#(99dii.>.>.C.CTYYEUEUEZEZ-[#\ .44Q7K/55a8L;4==Y_mDJJJ##$89tZ((T]]$)IItyy~~/B/BDIINNDWDW.X$Y!

%%&78 $)99dii.>.>.C.CTYYEUEUEZEZ\`\e\e\l\l\q\q-r#s .44Q7K/55a8L))KDMMZ`nDKKK$$%9:tZ((T]]$)IItyy~~/B/BDIINNDWDWY]YbYbYgYgYlYl.m$n!  &&'89 D,-9lD1=lD1=#(99'',,doo.D.D.I.I4??KaKaKfKfg$  /44Q7K/55a8L "		0D0DV!D $$**+?@##$)II__))..0D0D0I0I4??K_K_KdKde%! !!&&,,->?!%r/   c                 
   U R                   (       d  g[        U SS5      (       d  g[        U S5      (       a  [        U S5        [        U S5      (       a  [        U S5        [        U S5      (       a  [        U S5        SU l        g)zL
Unfuse the query, key, and value projections back to separate projections.
Nr   Fr   r   r   )rM   r   r(   delattrr   )r5   s    r-   rT   'AttentionModuleMixin.unfuse_projections:  st     (( t0%88 4""D(#4!!D'"4((D.)!&r/   
slice_sizec                     [        U S5      (       a.  Ub+  XR                  :  a  [        SU SU R                   S35      eSnUb  U R                  S5      nUc  U R	                  5       nU R                  U5        g)z
Set the slice size for attention computation.

Args:
    slice_size (`int`):
        The slice size for attention computation.
sliceable_head_dimNzslice_size z has to be smaller or equal to r'   sliced)r(   r   rB   _get_compatible_processordefault_processor_clsr;   )r5   r   r8   s      r-   set_attention_slice(AttentionModuleMixin.set_attention_sliceT  s     4-..:3Ij[r[rNr{:,6UVZVmVmUnnopqq	 !66x@I 224I9%r/   tensorc                     U R                   nUR                  u  p4nUR                  X2-  X$U5      nUR                  SSSS5      R                  X2-  XEU-  5      nU$ )z
Reshape the tensor from `[batch_size, seq_len, dim]` to `[batch_size // heads, seq_len, dim * heads]`.

Args:
    tensor (`torch.Tensor`): The tensor to reshape.

Returns:
    `torch.Tensor`: The reshaped tensor.
r   r   r      )headsr   reshapepermute)r5   r   	head_size
batch_sizeseq_lendims         r-   batch_to_head_dim&AttentionModuleMixin.batch_to_head_dimk  s_     JJ	#)<< 
S
 7SQ1a+33J4KW\eVefr/   out_dimc                    U R                   nUR                  S:X  a  UR                  u  pEnSnOUR                  u  pGpVUR                  XEU-  X6U-  5      nUR	                  SSSS5      nUS:X  a  UR                  XC-  XW-  Xc-  5      nU$ )z
Reshape the tensor for multi-head attention processing.

Args:
    tensor (`torch.Tensor`): The tensor to reshape.
    out_dim (`int`, *optional*, defaults to `3`): The output dimension of the tensor.

Returns:
    `torch.Tensor`: The reshaped tensor.
r   r   r   r   )r   ndimr   r   r   )r5   r   r   r   r   r   r   	extra_dims           r-   head_to_batch_dim&AttentionModuleMixin.head_to_batch_dim{  s     JJ	;;!'-||$JI28,,/J7
i,?S\L\]1a+a<^^J$:G<OQTQabFr/   querykeyattention_maskc                 $   UR                   nU R                  (       a   UR                  5       nUR                  5       nUcV  [        R                  " UR
                  S   UR
                  S   UR
                  S   UR                   UR                  S9nSnOUnSn[        R                  " UUUR                  SS5      UU R                  S9nAU R                  (       a  UR                  5       nUR                  SS9nAUR                  U5      nU$ )a  
Compute the attention scores.

Args:
    query (`torch.Tensor`): The query tensor.
    key (`torch.Tensor`): The key tensor.
    attention_mask (`torch.Tensor`, *optional*): The attention mask to use.

Returns:
    `torch.Tensor`: The attention probabilities/scores.
r   r   r   r   )betaalphar   )r   upcast_attentionfloatr1   emptyr   r   baddbmm	transposescaleupcast_softmaxsoftmaxto)	r5   r   r   r   r   baddbmm_inputr   attention_scoresattention_probss	            r-   get_attention_scores)AttentionModuleMixin.get_attention_scores  s       KKME))+C!!KKAA		!EKKX]XdXdM D*MD ==MM"b!**
 /557*22r2:),,U3r/   target_lengthr   c                    U R                   nUc  U$ UR                  S   nXb:w  a  UR                  R                  S:X  a_  UR                  S   UR                  S   U4n[        R
                  " XqR                  UR                  S9n[        R                  " X/SS9nO[        R                  " USU4SS	9nUS
:X  a'  UR                  S   X5-  :  a  UR                  USS9nU$ US:X  a!  UR                  S5      nUR                  USS9nU$ )a  
Prepare the attention mask for the attention computation.

Args:
    attention_mask (`torch.Tensor`): The attention mask to prepare.
    target_length (`int`): The target length of the attention mask.
    batch_size (`int`): The batch size for repeating the attention mask.
    out_dim (`int`, *optional*, defaults to `3`): Output dimension.

Returns:
    `torch.Tensor`: The prepared attention mask.
r   mpsr   r   r   r   r           )rm   r      )r   r   r   typer1   zerosr   r   Fpadrepeat_interleave	unsqueeze)	r5   r   r   r   r   r   current_lengthpadding_shapepaddings	            r-   prepare_attention_mask+AttentionModuleMixin.prepare_attention_mask  s!    JJ	!!!,2226*$$))U2 "0!5!5a!8.:N:Nq:QS` a++m;O;OXfXmXmn!&N+D!!L "#~=7IQT!Ua<##A&)??!/!A!A)QR!A!S
 	 \+55a8N+==iQ=ONr/   encoder_hidden_statesc                 f   U R                   c   S5       e[        U R                   [        R                  5      (       a  U R                  U5      nU$ [        U R                   [        R                  5      (       a7  UR                  SS5      nU R                  U5      nUR                  SS5      nU$  e)z
Normalize the encoder hidden states.

Args:
    encoder_hidden_states (`torch.Tensor`): Hidden states of the encoder.

Returns:
    `torch.Tensor`: The normalized encoder hidden states.
zGself.norm_cross must be defined to call self.norm_encoder_hidden_statesr   r   )
norm_crossr<   r2   	LayerNorm	GroupNormr   )r5   r   s     r-   norm_encoder_hidden_states/AttentionModuleMixin.norm_encoder_hidden_states  s     *u,uu*door||44$(OO4I$J! %$ 66 %:$C$CAq$I!$(OO4I$J!$9$C$CAq$I! %$ 5r/   )r   r8   r   r   r   )F)NFN)r   ) rJ   rX   rY   rZ   _default_processor_cls_available_processorsrM   r   r   r;   boolr%   r0   rs   ry   tupler   r   r   r1   no_gradrN   rT   intr   Tensorr   r   r   r   r   r\   rW   r/   r-   rL   rL   y   s   !#'9 #d #("D "EY "4S 424 2D 2" 9=	2!%2 cDj#o.52
 
20 ^b%77;%7KSVZ?%7	%7N ]]_@& @&D ]]_' '2&c &d &.    s 5<< 4 ]a-\\-(--FKllUYFY-	-` ab)#ll);>)LO)Z])	)V% %QVQ]Q] %r/   rL   ffhidden_states	chunk_dim
chunk_sizec                    UR                   U   U-  S:w  a  [        SUR                   U    SU S35      eUR                   U   U-  n[        R                  " UR	                  XBS9 Vs/ s H
  oP" U5      PM     snUS9nU$ s  snf )Nr   z)`hidden_states` dimension to be chunked: z$ has to be divisible by chunk size: z[. Make sure to set an appropriate `chunk_size` when calling `unet.enable_forward_chunking`.r   )r   rB   r1   r   chunk)r	  r
  r  r  
num_chunks	hid_slice	ff_outputs          r-   _chunked_feed_forwardr  
  s    9%
2a778K8KI8V7WW{  }G  |H  Hc  d
 	
 $$Y/:=J		(5(;(;J(;(VW(V9I(VWI  	Xs   )Bc                      ^  \ rS rSrSrS\S\S\S\4U 4S jjrS\R                  S	\R                  S
\R                  4S jr	Sr
U =r$ )GatedSelfAttentionDensei  aX  
A gated self-attention dense layer that combines visual features and object features.

Parameters:
    query_dim (`int`): The number of channels in the query.
    context_dim (`int`): The number of channels in the context.
    n_heads (`int`): The number of heads to use for attention.
    d_head (`int`): The number of channels in each head.
	query_dimcontext_dimn_headsd_headc                   > [         TU ]  5         [        R                  " X!5      U l        [        XUS9U l        [        USS9U l        [        R                  " U5      U l
        [        R                  " U5      U l        U R                  S[        R                  " [        R                  " S5      5      5        U R                  S[        R                  " [        R                  " S5      5      5        SU l        g )N)r  r   dim_headgegluactivation_fn
alpha_attnr   alpha_denseT)super__init__r2   r   linearr   attnFeedForwardr	  r   norm1norm2register_parameter	Parameterr1   r   enabled)r5   r  r  r  r  rI   s        r-   r!   GatedSelfAttentionDense.__init__%  s     ii7	6R	iw?\\),
\\),
bll5<<;L.MNr||ELL<M/NOr/   rq   objsr   c                    U R                   (       d  U$ UR                  S   nU R                  U5      nXR                  R	                  5       U R                  U R                  [        R                  " X/SS95      5      S S 2S U2S S 24   -  -   nXR                  R	                  5       U R                  U R                  U5      5      -  -   nU$ )Nr   r   )r)  r   r"  r  tanhr#  r%  r1   r   r  r	  r&  )r5   rq   r+  n_visuals       r-   forwardGatedSelfAttentionDense.forward6  s    ||H771:{{4 $$&4::eii	WX>Y3Z)[\]_h`h_hjk\k)lll  %%'$''$**Q-*@@@r/   )r#  r)  r	  r"  r%  r&  )rJ   rX   rY   rZ   __doc__r  r!  r1   r  r/  r\   __classcell__rI   s   @r-   r  r    sT    # C # s "
 
U\\ 
ell 
 
r/   r  c                     ^  \ rS rSrSr   SS\S\S\S\S\S-  S	\4U 4S
 jjjrSS\S-  S\4S jjr	 SS\
R                  S\
R                  S\
R                  S\\\4   S-  S\\
R                  \
R                  4   4
S jjrSrU =r$ )JointTransformerBlockiC  a  
A Transformer block following the MMDiT architecture, introduced in Stable Diffusion 3.

Reference: https://huggingface.co/papers/2403.03206

Parameters:
    dim (`int`): The number of channels in the input and output.
    num_attention_heads (`int`): The number of heads to use for multi-head attention.
    attention_head_dim (`int`): The number of channels in each head.
    context_pre_only (`bool`): Boolean to determine if we should add some blocks associated with the
        processing of `context` conditions.
Nr   num_attention_headsattention_head_dimcontext_pre_onlyqk_normuse_dual_attentionc                   > [         T	U ]  5         X`l        X@l        U(       a  SOSnU(       a  [	        U5      U l        O[        U5      U l        US:X  a  [        XSSSSS9U l        O&US:X  a  [        U5      U l        O[        SU S	35      e[        [        S
5      (       a  [        5       nO[        S5      e[        US UUUUUSUUSS9U l        U(       a  [        US UUUSUUSS9	U l        OS U l        [         R"                  " USSS9U l        ['        XSS9U l        U(       d+  [         R"                  " USSS9U l        ['        XSS9U l        OS U l        S U l        S U l        SU l        g )Nada_norm_continousada_norm_zeroFư>T
layer_norm)elementwise_affineepsr   	norm_typezUnknown context_norm_type: z>, currently only support `ada_norm_continous`, `ada_norm_zero`scaled_dot_product_attentionzYThe current PyTorch version does not support the `scaled_dot_product_attention` function.)r  cross_attention_dimadded_kv_proj_dimr  r   r   r8  r   r8   r9  rA  )	r  rD  r  r   r   r   r8   r9  rA  r@  rA  gelu-approximate)r   dim_outr  r   )r   r!  r:  r8  r   r%  r   r   norm1_contextrB   r(   r   r   r   r#  attn2r2   r   r&  r$  r	  norm2_context
ff_context_chunk_size
_chunk_dim)
r5   r   r6  r7  r8  r9  r:  context_norm_typer8   rI   s
            r-   r!  JointTransformerBlock.__init__R  s    	"4 04D0/.s3DJ)#.DJ 44!7U4S_"D /1!1#!6D-.?-@@~  1455-/Ik   $!'%-
	 "$(+)#
DJ DJ\\#%TJ
#BTU!#ceQU!VD)cN`aDO!%D"DO  r/   r  c                     Xl         X l        g r  rM  rN  r5   r  r   s      r-   set_chunk_feed_forward,JointTransformerBlock.set_chunk_feed_forward      %r/   r
  r   tembjoint_attention_kwargsr   c                 h   U=(       d    0 nU R                   (       a  U R                  XS9u  pVpxpnOU R                  XS9u  pVpxn	U R                  (       a  U R                  X#5      nOU R                  X#S9u  ppnU R                  " SUUS.UD6u  nnUR                  S5      U-  nUU-   nU R                   (       a-  U R                  " SSW
0UD6nWR                  S5      U-  nUU-   nU R                  U5      nUSUS S 2S 4   -   -  US S 2S 4   -   nU R                  b,  [        U R                  XPR                  U R                  5      nOU R                  U5      nU	R                  S5      U-  nUU-   nU R                  (       a  S nX!4$ WR                  S5      U-  nUU-   nU R                  U5      nUSWS S 2S 4   -   -  WS S 2S 4   -   nU R                  b,  [        U R                  XR                  U R                  5      nOU R                  U5      nUWR                  S5      U-  -   nX!4$ )N)emb)r
  r   r   r
  rW   )r:  r%  r8  rI  r#  r   rJ  r&  rM  r  r	  rN  rK  rL  )r5   r
  r   rW  rX  norm_hidden_statesgate_msa	shift_mlp	scale_mlpgate_mlpnorm_hidden_states2	gate_msa2r   
c_gate_msac_shift_mlpc_scale_mlp
c_gate_mlpattn_outputcontext_attn_outputattn_output2r  context_ff_outputs                         r-   r/  JointTransformerBlock.forward  s    "8!=2""kokuku lv lh)_h LP::Vc:KnH)  )-););<Q)X&[_[m[m% \n \X&Kj
 ,099 ,
,"<,
 %,
(( ((+k9%3""::b4GbKabL$..q1L@L)L8M!ZZ6/1yD7I3IJYWXZ^W^M__'-dgg7I??\`\l\lmI 23I&&q)I5	%	1   $(!  %33 #-"6"6q"9<O"O$9<O$O!)-););<Q)R&)Cq;WXZ^W^K_G_)`cnoprvovcw)w&+$9OO%?RVRbRb%! %)OO4N$O!$9J<P<PQR<SVg<g$g!$33r/   )rN  rM  r#  rJ  r8  r	  rL  r%  rI  r&  rK  r:  )FNFr   r  )rJ   rX   rY   rZ   r1  r  r  r0   r!  rT  r1   FloatTensorr4   r   r  r  r/  r\   r2  r3  s   @r-   r5  r5  C  s    $ "'"#(OO !O  	O
 O tO !O Odt #  9=C4((C4  %00C4 	C4
 !%S#X 5C4 
u||U\\)	*C4 C4r/   r5  c            -         ^  \ rS rSrSr                    S(S\S\S\S\S-  S\S	\S-  S
\S\S\S\S\S\S\S\S\S\S-  S\S-  S\S-  S\S-  S\S-  S\S\4,U 4S jjjr	S)S\S-  S\4S jjr
       S*S\R                  S\R                  S-  S\R                  S-  S \R                  S-  S!\R                  S-  S"\\\4   S#\R                  S-  S$\\\R                  4   S-  S%\R                  4S& jjrS'rU =r$ )+BasicTransformerBlocki  ah  
A basic Transformer block.

Parameters:
    dim (`int`): The number of channels in the input and output.
    num_attention_heads (`int`): The number of heads to use for multi-head attention.
    attention_head_dim (`int`): The number of channels in each head.
    dropout (`float`, *optional*, defaults to 0.0): The dropout probability to use.
    cross_attention_dim (`int`, *optional*): The size of the encoder_hidden_states vector for cross attention.
    activation_fn (`str`, *optional*, defaults to `"geglu"`): Activation function to be used in feed-forward.
    num_embeds_ada_norm (:
        obj: `int`, *optional*): The number of diffusion steps used during training. See `Transformer2DModel`.
    attention_bias (:
        obj: `bool`, *optional*, defaults to `False`): Configure if the attentions should contain a bias parameter.
    only_cross_attention (`bool`, *optional*):
        Whether to use only cross-attention layers. In this case two cross attention layers are used.
    double_self_attention (`bool`, *optional*):
        Whether to use two self-attention layers. In this case no cross attention layers are used.
    upcast_attention (`bool`, *optional*):
        Whether to upcast the attention computation to float32. This is useful for mixed precision training.
    norm_elementwise_affine (`bool`, *optional*, defaults to `True`):
        Whether to use learnable elementwise affine parameters for normalization.
    norm_type (`str`, *optional*, defaults to `"layer_norm"`):
        The normalization layer to use. Can be `"layer_norm"`, `"ada_norm"` or `"ada_norm_zero"`.
    final_dropout (`bool` *optional*, defaults to False):
        Whether to apply a final dropout after the last feed-forward layer.
    attention_type (`str`, *optional*, defaults to `"default"`):
        The type of attention to use. Can be `"default"` or `"gated"` or `"gated-text-image"`.
    positional_embeddings (`str`, *optional*, defaults to `None`):
        The type of positional embeddings to apply to.
    num_positional_embeddings (`int`, *optional*, defaults to `None`):
        The maximum number of positional embeddings to apply.
Nr   r6  r7  rD  r  num_embeds_ada_normattention_biasonly_cross_attentiondouble_self_attentionr   norm_elementwise_affinerB  norm_epsfinal_dropoutattention_typepositional_embeddingsnum_positional_embeddings-ada_norm_continous_conditioning_embedding_dimada_norm_biasff_inner_dimff_biasattention_out_biasc                   > [         TU ]  5         Xl        X l        X0l        X@l        XPl        X`l        Xl        Xl	        Xl
        UU l        UU l        Xl        US L=(       a    US:H  U l        US L=(       a    US:H  U l        US:H  U l        US:H  U l        US:H  U l        US;   a  Uc  ['        SU SU S	35      eXl        Xpl        U(       a  Uc  ['        S
5      eUS:X  a  [-        UUS9U l        OS U l        US:X  a  [1        X5      U l        OMUS:X  a  [5        X5      U l        O6US:X  a  [7        UUUUUS5      U l        O[8        R:                  " XUS9U l        [=        UUUUUU	(       a  UOS UUS9U l        Uc  U
(       an  US:X  a  [1        X5      U l         O8US:X  a  [7        UUUUUS5      U l         O[8        R:                  " XU5      U l         [=        UU
(       d  UOS UUUUUUS9U l!        O1US:X  a  [8        R:                  " XU5      U l         OS U l         S U l!        US:X  a  [7        UUUUUS5      U l"        O0US;   a  [8        R:                  " XU5      U l"        OUS:X  a  S U l"        [G        UUUUUUS9U l$        US:X  d  US:X  a  [K        XX#5      U l&        US:X  a6  [8        RN                  " [P        RR                  " SU5      US-  -  5      U l*        S U l+        SU l,        g )Nr=  ada_normada_norm_singler?  ada_norm_continuousr  r=  `norm_type` is set to w, but `num_embeds_ada_norm` is not defined. Please make sure to define `num_embeds_ada_norm` if setting `norm_type` to r'   \If `positional_embedding` type is defined, `num_positition_embeddings` must also be defined.
sinusoidalmax_seq_lengthrms_normrF  r  r   r  dropoutr   rD  r   out_biasr  rD  r   r  r  r   r   r  )r=  r  r?  layer_norm_i2vgenr  r  ru  	inner_dimr   gatedzgated-text-image   g      ?r   )-r   r!  r   r6  r7  r  rD  r  rp  rr  rs  rw  rx  rq  use_ada_layer_norm_zerouse_ada_layer_normuse_ada_layer_norm_singleuse_layer_normuse_ada_layer_norm_continuousrB   rB  ro  r   	pos_embedr   r%  r   r   r2   r   r   attn1r&  rJ  norm3r$  r	  r  fuserr(  r1   r   scale_shift_tablerM  rN  )r5   r   r6  r7  r  rD  r  ro  rp  rq  rr  r   rs  rB  rt  ru  rv  rw  rx  ry  rz  r{  r|  r}  rI   s                           r-   r!  BasicTransformerBlock.__init__  sL   4 	#6 "4#6 *,%:"'>$%:")B&$8! )<4(G'iYZiMi$#6d#B"_	U_H_)26G)G&'<7-6:O-O*55:M:U( 4KKT+UVX 
 ##6  &?&Gn  !L0:3OhiDN!DN 
"%c?DJ/))#CDJ///='DJ c[cdDJ%'7K 3QU-'	

 *.C J&)#C
333A+!
  \\#9PQ
"?T$7Z^)+#!1+	DJ --\\#9PQ
!
DJ --/='DJ EEc5LMDJ--DJ''"
 W$:L(L0K^sDJ ))%'\\%++a2ES2P%QD"  r/   r  c                     Xl         X l        g r  rR  rS  s      r-   rT  ,BasicTransformerBlock.set_chunk_feed_forward  rV  r/   r
  r   r   encoder_attention_masktimestepcross_attention_kwargsclass_labelsadded_cond_kwargsr   c	                    Ub(  UR                  SS 5      b  [        R                  S5        UR                  S   n	U R                  S:X  a  U R                  X5      n
OU R                  S:X  a  U R                  XXqR                  S9u  ppnOU R                  S;   a  U R                  U5      n
OU R                  S:X  a  U R                  XS	   5      n
OnU R                  S
:X  aS  U R                  S    UR                  U	SS5      -   R                  SSS9u  nnppU R                  U5      n
U
SU-   -  U-   n
O[        S5      eU R                  b  U R                  U
5      n
Ub  UR                  5       O0 nUR                  SS 5      nU R                  " U
4U R                  (       a  UOS US.UD6nU R                  S:X  a  WR!                  S5      U-  nOU R                  S
:X  a  WU-  nUU-   nUR"                  S:X  a  UR%                  S5      nUb  U R'                  UUS   5      nU R(                  b  U R                  S:X  a  U R+                  X5      n
OeU R                  S;   a  U R+                  U5      n
OCU R                  S
:X  a  Un
O0U R                  S:X  a  U R+                  XS	   5      n
O[        S5      eU R                  b!  U R                  S
:w  a  U R                  U
5      n
U R(                  " U
4UUS.UD6nUU-   nU R                  S:X  a  U R-                  XS	   5      n
O!U R                  S
:X  d  U R-                  U5      n
U R                  S:X  a  U
SWS S 2S 4   -   -  WS S 2S 4   -   n
U R                  S
:X  a  U R+                  U5      n
U
SW-   -  W-   n
U R.                  b,  [1        U R2                  XR4                  U R.                  5      nOU R3                  U
5      nU R                  S:X  a  WR!                  S5      U-  nOU R                  S
:X  a  WU-  nUU-   nUR"                  S:X  a  UR%                  S5      nU$ )Nr   SPassing `scale` to `cross_attention_kwargs` is deprecated. `scale` will be ignored.r   r  r=  )hidden_dtype)r?  r  r  pooled_text_embr  r  r   r   r   zIncorrect norm usedgligenr   r   r   r+  )r=  r?  r  zIncorrect norm)getr`   warningr   rB  r%  r   r  r   r  rB   r  copyr=   r  rq  r   r   squeezer  rJ  r&  r  rM  r  r	  rN  )r5   r
  r   r   r  r  r  r  r  r   r[  r\  r]  r^  r_  	shift_msa	scale_msagligen_kwargsrf  r  s                       r-   r/  BasicTransformerBlock.forward  sd    "-%))'48Dtu #((+
>>Z'!%M!D^^.KO::DWDW LV LH) ^^BB!%M!:^^44!%MM^;_!`^^00&&t,x/?/?
Ar/RReA1eo KIy(y "&M!:!3q9}!E	!Q233>>%!%0B!C CYBd!7!<!<!>jl.228TBjj
;?;T;T"7Z^)
 %	
 >>_,",,Q/+=K^^00"[0K#m3")11!4M $ JJ}mF6KLM ::!~~+%)ZZ%H"#WW%)ZZ%>"#44 &3"#88%)ZZQb?c%d" !122~~)dnn@Q.Q%)^^4F%G"**"&;5 )	K (-7M >>22!%MM^;_!`#44!%M!:>>_,!3q9QW;M7M!NQZ[\^b[bQc!c>>..!%M!:!3q9}!E	!Q'-dgg7I??\`\l\lmI 23I>>_, **1-	9I^^00 9,I!M1")11!4Mr/   )rN  rM  r  rp  r7  r  rJ  rD  r   rr  r  r	  r  r%  r&  r  rs  rB  r6  ro  rx  rq  r  rw  r  r  r  r  r  r  )r   Nr  NFFFFTr?  h㈵>FdefaultNNNNNTTrk  )NNNNNNN)rJ   rX   rY   rZ   r1  r  r0   r  r   r!  rT  r1   r  
LongTensorr4   r   r/  r\   r2  r3  s   @r-   rn  rn    sO    N *.$*.$%*&+!&(,%#',004DH$(#'#'1ff !f  	f !4Zf f !4Zf f #f  $f f "&f f f  !f" #f$  #Tz%f& $':'f( 8;Tz)f* Tz+f, Dj-f. /f0 !1f fPt #  /3596:,01504<@x||x t+x  %||d2	x
 !&t 3x ""T)x !%S#Xx &&-x  U\\ 12T9x 
x xr/   rn  c            
       Z   ^  \ rS rSrSr  SS\S\S\S-  S\S-  4U 4S jjjrS	 rS
r	U =r
$ )LuminaFeedForwardi;  a  
A feed-forward layer.

Parameters:
    hidden_size (`int`):
        The dimensionality of the hidden layers in the model. This parameter determines the width of the model's
        hidden representations.
    intermediate_size (`int`): The intermediate dimension of the feedforward layer.
    multiple_of (`int`, *optional*): Value to ensure hidden dimension is a multiple
        of this value.
    ffn_dim_multiplier (float, *optional*): Custom multiplier for hidden
        dimension. Defaults to None.
Nr   r  multiple_offfn_dim_multiplierc                   > [         TU ]  5         Ub  [        XB-  5      nX2U-   S-
  U-  -  n[        R                  " UUSS9U l        [        R                  " UUSS9U l        [        R                  " UUSS9U l        [        5       U l	        g )Nr   Fr   )
r   r!  r  r2   r   linear_1linear_2linear_3r   silu)r5   r   r  r  r  rI   s        r-   r!  LuminaFeedForward.__init__J  s     	).:;I$;a$?K#OP			

 		

 		

 J	r/   c                     U R                  U R                  U R                  U5      5      U R                  U5      -  5      $ r  )r  r  r  r  )r5   rq   s     r-   r/  LuminaFeedForward.forwardh  s1    }}TYYt}}Q'784==;KKLLr/   )r  r  r  r  )   N)rJ   rX   rY   rZ   r1  r  r   r!  r/  r\   r2  r3  s   @r-   r  r  ;  sT    $ #&+/  4Z	
 "DL <M Mr/   r  c                      ^  \ rS rSrSr SS\S\S\S\S\S-  4
U 4S	 jjjrS
\S-  4S jr SS\R                  S\S\R                  S-  S\R                  4S jjr
SrU =r$ )TemporalBasicTransformerBlockil  a  
A basic Transformer block for video like data.

Parameters:
    dim (`int`): The number of channels in the input and output.
    time_mix_inner_dim (`int`): The number of channels for temporal attention.
    num_attention_heads (`int`): The number of heads to use for multi-head attention.
    attention_head_dim (`int`): The number of channels in each head.
    cross_attention_dim (`int`, *optional*): The size of the encoder_hidden_states vector for cross attention.
Nr   time_mix_inner_dimr6  r7  rD  c                   > [         TU ]  5         X:H  U l        [        R                  " U5      U l        [        UUSS9U l        [        R                  " U5      U l        [        UUUS S9U l
        Ub-  [        R                  " U5      U l        [        UUUUS9U l        OS U l        S U l        [        R                  " U5      U l        [        USS9U l        S U l        S U l        g )Nr  )rH  r  )r  r   r  rD  )r  rD  r   r  r  )r   r!  is_resr2   r   norm_inr$  ff_inr%  r   r  r&  rJ  r  r	  rM  rN  )r5   r   r  r6  r7  rD  rI   s         r-   r!  &TemporalBasicTransformerBlock.__init__y  s     	/||C( !&!

 \\"45
(%' $	

 * &89DJ",$7)+	DJ DJDJ \\"45
0H  r/   r  c                     Xl         SU l        g )Nr   rR  )r5   r  kwargss      r-   rT  4TemporalBasicTransformerBlock.set_chunk_feed_forward  s    %r/   r
  
num_framesr   r   c                    UR                   S   nUR                   u  pVnXR-  nUS S S 24   R                  XBXg5      nUR                  SSSS5      nUR                  XF-  X'5      nUnU R                  U5      nU R                  b,  [        U R                  XR                  U R                  5      nOU R                  U5      nU R                  (       a  X-   nU R                  U5      n	U R                  U	S S9n
X-   nU R                  b$  U R                  U5      n	U R                  XS9n
X-   nU R                  U5      n	U R                  b,  [        U R                  XR                  U R                  5      nOU R                  U	5      nU R                  (       a  X-   nOUnUS S S 24   R                  XFX'5      nUR                  SSSS5      nUR                  XB-  Xg5      nU$ )Nr   r   r   r   )r   )r   r   r   r  rM  r  r  rN  r  r%  r  rJ  r&  r  r	  )r5   r
  r  r   r   batch_frames
seq_lengthchannelsresidualr[  rf  r  s               r-   r/  %TemporalBasicTransformerBlock.forward  s    #((+
-:-@-@*(!/
%dAg.66zzd%--aAq9%--j.Ez\ ]3'1$**m__^b^n^noM JJ}5M;;)4M!ZZ6jj!34jP#3 ::!!%M!:**%7*eK'7M "ZZ6'-dgg7I??\`\l\lmI 23I;;%5M%M%dAg.66zzd%--aAq9%--j.Ez\r/   )rN  rM  r  rJ  r	  r  r  r%  r&  r  r  r  )rJ   rX   rY   rZ   r1  r  r!  rT  r1   r  r/  r\   r2  r3  s   @r-   r  r  l  s    	" +/33  3 !	3
  3 !4Z3 3jt  6:	7||7 7  %||d2	7
 
7 7r/   r  c                   d   ^  \ rS rSr    SS\S\S\S\S\S\S-  S	\S
\4U 4S jjjrS rSrU =r	$ )SkipFFTransformerBlocki  Nr   r6  r7  kv_input_dimkv_input_dim_proj_use_biasrD  rp  r}  c
           
        > [         T
U ]  5         XA:w  a  [        R                  " XAU5      U l        OS U l        [        US5      U l        [        UUUUUUU	S9U l        [        US5      U l	        [        UUUUUUU	S9U l
        g )Nr>  )r  r   r  r  r   rD  r  )r  rD  r   r  r  r   r  )r   r!  r2   r   	kv_mapperr   r%  r   r  r&  rJ  )r5   r   r6  r7  r  r  r  rD  rp  r}  rI   s             r-   r!  SkipFFTransformerBlock.__init__  s     	YY|:TUDN!DNS%(
%' 3'

 S%(
 3%''

r/   c                 <   Ub  UR                  5       O0 nU R                  b%  U R                  [        R                  " U5      5      nU R	                  U5      nU R
                  " U4SU0UD6nXQ-   nU R                  U5      nU R                  " U4SU0UD6nXQ-   nU$ )Nr   )r  r  r   r  r%  r  r&  rJ  )r5   r
  r   r  r[  rf  s         r-   r/  SkipFFTransformerBlock.forward  s    BXBd!7!<!<!>jl>>%$(NN166:O3P$Q!!ZZ6jj
"7
 %
 $3!ZZ6jj
"7
 %
 $3r/   )r  rJ  r  r%  r&  )r   NFT)
rJ   rX   rY   rZ   r  r  r!  r/  r\   r2  r3  s   @r-   r  r    sx     *.$#'(
(
 !(
  	(

 (
 %)(
 !4Z(
 (
 !(
 (
T r/   r  c            /         ^  \ rS rSrSr                    S*S\S\S\S\S\S-  S	\S
\S-  S\S\S\S\S\S\S\S\S\S-  S\S-  S\S-  S\S\S\S\S\4.U 4S jjjr	S\S\
\\\4      4S jrS+S\S\S\
\   4S jjr S+S\S\S\SS4S  jjrS,S!\S-  S\SS4S" jjr    S-S#\R"                  S$\R"                  S-  S%\R"                  S-  S&\R"                  S-  S'\\\4   S\R"                  4S( jjrS)rU =r$ ).FreeNoiseTransformerBlocki6  a  
A FreeNoise Transformer block.

Parameters:
    dim (`int`):
        The number of channels in the input and output.
    num_attention_heads (`int`):
        The number of heads to use for multi-head attention.
    attention_head_dim (`int`):
        The number of channels in each head.
    dropout (`float`, *optional*, defaults to 0.0):
        The dropout probability to use.
    cross_attention_dim (`int`, *optional*):
        The size of the encoder_hidden_states vector for cross attention.
    activation_fn (`str`, *optional*, defaults to `"geglu"`):
        Activation function to be used in feed-forward.
    num_embeds_ada_norm (`int`, *optional*):
        The number of diffusion steps used during training. See `Transformer2DModel`.
    attention_bias (`bool`, defaults to `False`):
        Configure if the attentions should contain a bias parameter.
    only_cross_attention (`bool`, defaults to `False`):
        Whether to use only cross-attention layers. In this case two cross attention layers are used.
    double_self_attention (`bool`, defaults to `False`):
        Whether to use two self-attention layers. In this case no cross attention layers are used.
    upcast_attention (`bool`, defaults to `False`):
        Whether to upcast the attention computation to float32. This is useful for mixed precision training.
    norm_elementwise_affine (`bool`, defaults to `True`):
        Whether to use learnable elementwise affine parameters for normalization.
    norm_type (`str`, defaults to `"layer_norm"`):
        The normalization layer to use. Can be `"layer_norm"`, `"ada_norm"` or `"ada_norm_zero"`.
    final_dropout (`bool` defaults to `False`):
        Whether to apply a final dropout after the last feed-forward layer.
    attention_type (`str`, defaults to `"default"`):
        The type of attention to use. Can be `"default"` or `"gated"` or `"gated-text-image"`.
    positional_embeddings (`str`, *optional*):
        The type of positional embeddings to apply to.
    num_positional_embeddings (`int`, *optional*, defaults to `None`):
        The maximum number of positional embeddings to apply.
    ff_inner_dim (`int`, *optional*):
        Hidden dimension of feed-forward MLP.
    ff_bias (`bool`, defaults to `True`):
        Whether or not to use bias in feed-forward MLP.
    attention_out_bias (`bool`, defaults to `True`):
        Whether or not to use bias in attention output project layer.
    context_length (`int`, defaults to `16`):
        The maximum number of frames that the FreeNoise block processes at once.
    context_stride (`int`, defaults to `4`):
        The number of frames to be skipped before starting to process a new batch of `context_length` frames.
    weighting_scheme (`str`, defaults to `"pyramid"`):
        The weighting scheme to use for weighting averaging of processed latent frames. As described in the
        Equation 9. of the [FreeNoise](https://huggingface.co/papers/2310.15169) paper, "pyramid" is the default
        setting used.
Nr   r6  r7  r  rD  r  ro  rp  rq  rr  r   rs  rB  rt  ru  rw  rx  r{  r|  r}  context_lengthcontext_strideweighting_schemec                   > [         TU ]  5         Xl        X l        X0l        X@l        XPl        X`l        Xl        Xl	        Xl
        UU l        UU l        Xl        U R                  UUU5        US L=(       a    US:H  U l        US L=(       a    US:H  U l        US:H  U l        US:H  U l        US:H  U l        US;   a  Uc  [)        SU SU S	35      eXl        Xpl        U(       a  Uc  [)        S
5      eUS:X  a  [/        UUS9U l        OS U l        [2        R4                  " XUS9U l        [9        UUUUUU	(       a  UOS UUS9U l        Uc  U
(       a:  [2        R4                  " XU5      U l        [9        UU
(       d  UOS UUUUUUS9U l        [A        UUUUUUS9U l!        [2        R4                  " XU5      U l"        S U l#        SU l$        g )Nr=  r  r  r?  r  r  r  r  r'   r  r  r  rF  r  r  r  r   )%r   r!  r   r6  r7  r  rD  r  rp  rr  rs  rw  rx  rq  set_free_noise_propertiesr  r  r  r  r  rB   rB  ro  r   r  r2   r   r%  r   r  r&  rJ  r$  r	  r  rM  rN  )r5   r   r6  r7  r  rD  r  ro  rp  rq  rr  r   rs  rB  rt  ru  rw  rx  r{  r|  r}  r  r  r  rI   s                           r-   r!  "FreeNoiseTransformerBlock.__init__n  s   4 	#6 "4#6 *,%:"'>$%:")B&$8!&&~~GWX )<4(G'iYZiMi$#6d#B"_	U_H_)26G)G&'<7-6:O-O*55:M:U( 4KKT+UVX 
 ##6  &?&Gn  !L0:3OhiDN!DN \\#W_`
%'7K 3QU-'	

 *.Cc5LMDJ"?T$7Z^)+#!1+	DJ ''"
 \\#1HI
  r/   r  r   c                     / n[        SXR                  -
  S-   U R                  5       H/  nUn[        XU R                  -   5      nUR	                  XE45        M1     U$ )Nr   r   )ranger  r  minappend)r5   r  frame_indicesiwindow_start
window_ends         r-   _get_frame_indices,FreeNoiseTransformerBlock._get_frame_indices  sb    q*':'::Q>@S@STALZT-@-@)@AJ  ,!;< U r/   c                    US:X  a  S/U-  nU$ US:X  a`  US-  S:X  a)  US-  n[        [        SUS-   5      5      nX3S S S2   -   nU$ US-   S-  n[        [        SU5      5      nX4/-   US S S2   -   n U$ US:X  a`  US-  S:X  a-  US-  nS	/US-
  -  U/-   nU[        [        USS5      5      -   nU$ US-   S-  nS	/U-  nU[        [        USS5      5      -   n U$ [        S
U 35      e)Nflatg      ?pyramidr   r   r   r   delayed_reverse_sawtoothg{Gz?z'Unsupported value for weighting_scheme=)listr  rB   )r5   r  r  weightsmids        r-   _get_frame_weights,FreeNoiseTransformerBlock._get_frame_weights  sL   v%ej(G8 5 *A~" AouQa01!DbDM1* % "A~!+uQ}-!E/GDbDM9   !;;A~" Ao&C!G,u4!DsAr):$;;  "A~!+&3,!DsAr):$;;  FGWFXYZZr/   c                 (    Xl         X l        X0l        g r  )r  r  r  )r5   r  r  r  s       r-   r  3FreeNoiseTransformerBlock.set_free_noise_properties  s     -, 0r/   r  c                     Xl         X l        g r  rR  rS  s      r-   rT  0FreeNoiseTransformerBlock.set_chunk_feed_forward  rV  r/   r
  r   r   r  r  c                 z   Ub(  UR                  SS 5      b  [        R                  S5        Ub  UR                  5       O0 nUR                  nUR
                  n	UR                  S5      n
U R                  U
5      nU R                  U R                  U R                  5      n[        R                  " XU	S9R                  S5      R                  S5      nUS   S   U
:H  nU(       dU  XR                  :  a  [        SU
< SU R                  < 35      eXS   S   -
  nUR                  XR                  -
  U
45        [        R                   " SU
S4US	9n[        R"                  " U5      n[%        U5       GH  u  nu  nn[        R&                  " US S 2UU24   5      nUU-  nUS S 2UU24   nU R)                  U5      nU R*                  b  U R+                  U5      nU R,                  " U4U R.                  (       a  UOS US
.UD6nUU-   nUR0                  S:X  a  UR3                  S5      nU R4                  bZ  U R7                  U5      nU R*                  b!  U R8                  S:w  a  U R+                  U5      nU R4                  " U4UUS
.UD6nUU-   nU[;        U5      S-
  :X  aQ  U(       dJ  US S 2W* S 24==   US S 2U* S 24   US S 2U* S 24   -  -  ss'   US S 2U* S 24==   US S 2U* 4   -  ss'   GM  US S 2UU24==   UU-  -  ss'   US S 2UU24==   U-  ss'   GM     [        R<                  " [?        URA                  U R                  SS9URA                  U R                  SS95       VVs/ s H$  u  nn[        RB                  " US:  UU-  U5      PM&     snnSS9RE                  U	5      nU RG                  U5      nU RH                  b-  [K        U RL                  UU RN                  U RH                  5      nOU RM                  U5      nUU-   nUR0                  S:X  a  UR3                  S5      nU$ s  snnf )Nr   r  r   r   r   r   zExpected num_frames=z1 to be greater or equal than self.context_length=)r   r  r   r  r   )(r  r`   r  r  r   r   sizer  r  r  r  r1   r   r   rB   r  r   
zeros_like	enumerate	ones_liker%  r  r  rq  r   r  rJ  r&  rB  r@   r   zipsplitwherer   r  rM  r  r	  rN  )r5   r
  r   r   r  r  argsr  r   r   r  r  frame_weightsis_last_frame_batch_completelast_frame_batch_lengthnum_times_accumulatedaccumulated_valuesr  frame_start	frame_endr  hidden_states_chunkr[  rf  accumulated_splitnum_times_splitr  s                              r-   r/  !FreeNoiseTransformerBlock.forward  s    "-%))'48DtuBXBd!7!<!<!>jl %%##"''*
//
;//0C0CTEZEZ[]OYYZ[\ffgij'4R'8';z'I$
 ,/// #8ZM9kW[WjWjVl!mnn&03DQ3G&G#  */B/B"BJ!OP %Q
A,>v N"--m<+4]+C'A'Y oo&;A{9?T<T&UVG}$G"/;y3H0H"I "&,?!@~~)%)^^4F%G"**"?C?X?X&;^b- )	K #.0C"C"''1,&9&A&A!&D# zz%%)ZZ0C%D">>-$..DU2U)-8J)K&"jj&*?#9 -	 '24G&G#C&**3O"1'>&>&?#?@',C+C+D(DEPQTkSkSlPlHmm@ &a*A)A)B&BCwqSjRjOjGkkC"1k)&;#;<@SV]@]]<%aY)>&>?7J?c ,D| 		 ;>&,,T-@-@a,H)//0C0C/K;;6% Oa/1B_1TVgh; 	
 "U) 	 "ZZ6'-dgg7I4??\`\l\lmI 23I!M1")11!4M-s   2+P7
)rN  rM  r  rp  r7  r  rJ  r  r  rD  r   rr  r  r	  r%  r&  r  rs  rB  r6  ro  rx  rq  r  rw  r  r  r  r  r  r  )r   Nr  NFFFFTr?  r  FNNNTT   r   r  )r  rk  )NNNN)rJ   rX   rY   rZ   r1  r  r   r0   r  r!  r  r  r  r  r  rT  r1   r  r4   r   r/  r\   r2  r3  s   @r-   r  r  6  st   4v *.$*.$%*&+!&(,%#,004#'#'  )1pp !p  	p
 p !4Zp p !4Zp p #p  $p p "&p p p  !p"  #Tz#p$ $':%p& Dj'p( )p* !+p, -p. /p0 1p pdS T%S/5J S C X\]bXc B QZ1!1361JM1	1t # d  /3596:15{||{ t+{  %||d2	{
 !&t 3{ !%S#X{ 
{ {r/   r  c                      ^  \ rS rSrSr       SS\S\S-  S\S\S\S	\S
\4U 4S jjjr	S\
R                  S\
R                  4S jrSrU =r$ )r$  i  a  
A feed-forward layer.

Parameters:
    dim (`int`): The number of channels in the input.
    dim_out (`int`, *optional*): The number of channels in the output. If not given, defaults to `dim`.
    mult (`int`, *optional*, defaults to 4): The multiplier to use for the hidden dimension.
    dropout (`float`, *optional*, defaults to 0.0): The dropout probability to use.
    activation_fn (`str`, *optional*, defaults to `"geglu"`): Activation function to be used in feed-forward.
    final_dropout (`bool` *optional*, defaults to False): Apply a final dropout.
    bias (`bool`, defaults to True): Whether to use a bias in the linear layer.
Nr   rH  multr  r  ru  r   c	                   > [         T
U ]  5         Uc  [        X-  5      nUb  UOUnUS:X  a
  [        XUS9n	US:X  a  [        XSUS9n	ODUS:X  a  [	        XUS9n	O3US:X  a  [        XUS9n	O"US:X  a  [        XUS9n	OUS	:X  a  [        XUS
S9n	[        R                  " / 5      U l
        U R                  R                  W	5        U R                  R                  [        R                  " U5      5        U R                  R                  [        R                  " XrUS95        U(       a0  U R                  R                  [        R                  " U5      5        g g )Ngelur  rG  r-  )approximater   r  zgeglu-approximateswigluzlinear-silur  )r   
activation)r   r!  r  r   r   r   r   r   r2   
ModuleListnetr  Dropoutr   )r5   r   rH  r  r  r  ru  r  r   act_fnrI   s             r-   r!  FeedForward.__init__  s     	CJI$0'cF"#t4F..#f4HFg%35F11$S$?Fh&C6Fm+%c4FSF==$

7+,		)4@AHHOOBJJw/0 r/   r
  r   c                     [        U5      S:  d  UR                  SS 5      b  Sn[        SSU5        U R                   H  nU" U5      nM     U$ )Nr   r   zThe `scale` argument is deprecated and will be ignored. Please remove it, as passing it will raise an error in the future. `scale` should directly be passed while calling the underlying pipeline component i.e., via `cross_attention_kwargs`.z1.0.0)r@   r  r   r  )r5   r
  r  r  deprecation_messager"   s         r-   r/  FeedForward.forward  sQ    t9q=FJJw5A #Ugw(;<hhF"=1M r/   )r  )Nr   r   r  FNT)rJ   rX   rY   rZ   r1  r  r   r0   r  r!  r1   r  r/  r\   r2  r3  s   @r-   r$  r$    s      #$#&1&1 t&1 	&1
 &1 &1 &1 &1 &1PU\\ u||  r/   r$  )8typingr   r   r1   torch.nnr2   torch.nn.functional
functionalr   utilsr   r   utils.import_utilsr   r	   r
   utils.torch_utilsr   activationsr   r   r   r   r   r   attention_processorr   r   r   
embeddingsr   normalizationr   r   r   r   r   r   r   
get_loggerrJ   r`   r   rL   r3   r  r  r  r  r5  rn  r  r  r  r  r$  rW   r/   r-   <module>r.     s   !     & f f 4 Y Y U U 5 q q D 
		H	%O, O,dN% N%bbii  QT be  &bii & &R h4BII h4 h4V HBII H HV
.M		 .Mb ~BII ~ ~BERYY EP X		 X Xv
<")) <r/   