
    
3j}V                        S SK r S SKrS SKJr  S SKrS SKJr  SSKJrJ	r	  SSK
JrJr  SSKJrJrJrJr  SSKJr  SS	KJrJr  SS
KJrJrJr  SSKJr  SSKJr  SSKJ r   SSK!J"r"  SSK#J$r$  SSK%J&r&J'r'  \RP                  " \)5      r* " S S5      r+ " S S5      r, " S S\R                  RZ                  \5      r. " S S\RZ                  5      r/\ " S S\RZ                  5      5       r0\ " S S\$\\\\\5      5       r1S r2g)    N)Any   )ConfigMixinregister_to_config)FromOriginalModelMixinPeftAdapterMixin)apply_lora_scale	deprecateis_torch_versionlogging)maybe_allow_in_graph   )ContextParallelInputContextParallelOutput)AttentionMixinAttentionModuleMixinFeedForward)dispatch_attention_fn)
CacheMixin)PixArtAlphaTextProjection)Transformer2DModelOutput)
ModelMixin)AdaLayerNormSingleRMSNormc                       \ rS rSrS rSrg)LTXVideoAttentionProcessor2_0(   c                 6    Sn[        SSU5        [        U0 UD6$ )Nz~`LTXVideoAttentionProcessor2_0` is deprecated and this will be removed in a future version. Please use `LTXVideoAttnProcessor`r   z1.0.0)r
   LTXVideoAttnProcessor)clsargskwargsdeprecation_messages       g/home/wildlama/miniconda3/lib/python3.13/site-packages/diffusers/models/transformers/transformer_ltx.py__new__%LTXVideoAttentionProcessor2_0.__new__)   s)     _17<OP$d5f55     N)__name__
__module____qualname____firstlineno__r%   __static_attributes__r(   r'   r$   r   r   (   s    6r'   r   c                       \ rS rSrSrSrSrS r   SSSS\R                  S\R                  S-  S	\R                  S-  S
\R                  S-  S\R                  4S jjr
Srg)r   0   z
Processor for implementing attention (SDPA is used by default if you're using PyTorch 2.0). This is used in the LTX
model. It applies a normalization layer and rotary embedding on the query and key vector.
Nc                 <    [        SS5      (       a  [        S5      eg )N<z2.0zlLTX attention processors require a minimum PyTorch version of 2.0. Please upgrade your PyTorch installation.)r   
ValueError)selfs    r$   __init__LTXVideoAttnProcessor.__init__9   s$    C''~  (r'   attnLTXAttentionhidden_statesencoder_hidden_statesattention_maskimage_rotary_embreturnc                 P   Uc  UR                   OUR                   u  pgnUb<  UR                  XGU5      nUR                  XaR                  SUR                   S   5      nUc  UnUR	                  U5      n	UR                  U5      n
UR                  U5      nUR                  U	5      n	UR                  U
5      n
Ub  [        X5      n	[        X5      n
U	R                  SUR                  S45      n	U
R                  SUR                  S45      n
UR                  SUR                  S45      n[        U	U
UUSSU R                  U R                  S9nUR                  SS5      nUR                  U	R                   5      nUR"                  S   " U5      nUR"                  S   " U5      nU$ )	Nr           F)	attn_mask	dropout_p	is_causalbackendparallel_configr   r      )shapeprepare_attention_maskviewheadsto_qto_kto_vnorm_qnorm_kapply_rotary_emb	unflattenr   _attention_backend_parallel_configflattentodtypeto_out)r3   r6   r8   r9   r:   r;   
batch_sizesequence_length_querykeyvalues               r$   __call__LTXVideoAttnProcessor.__call__?   s    $9#@MF[FaFa 	'
Q %!88ZdeN+00ZZ^MaMabdMefN ($1!		-(ii-.		/0E"kk#'$U=E"39CDJJ#34mmA

B/0DJJ#34-$++ 11	
 &--a3%((5A}5A}5r'   r(   NNN)r)   r*   r+   r,   __doc__rQ   rR   r4   torchTensorr]   r-   r(   r'   r$   r   r   0   s    
  6:.20411 ||1  %||d2	1
 t+1  ,,-1 
1 1r'   r   c                     ^  \ rS rSr\r\/r         SS\S\S\S\S\S\	S	\S-  S
\	S\
4U 4S jjjr   SS\R                  S\R                  S-  S\R                  S-  S\R                  S-  S\R                  4
S jjrSrU =r$ )r7   s   N	query_dimrI   kv_headsdim_headdropoutbiascross_attention_dimout_biasqk_normc                 T  > [         TU ]  5         U	S:w  a  [        S5      eX@l        XB-  U l        Uc  U R                  OXC-  U l        Xl        Ub  UOUU l        X`l        XPl	        Xl
        X l        SnSn[        R                  R                  XB-  XS9U l        [        R                  R                  XC-  XS9U l        [        R                  R#                  XR                  US9U l        [        R                  R#                  U R                  U R
                  US9U l        [        R                  R#                  U R                  U R
                  US9U l        [        R                  R+                  / 5      U l        U R,                  R/                  [        R                  R#                  U R                  U R                  US95        U R,                  R/                  [        R                  R1                  U5      5        U
c  U R3                  5       n
U R5                  U
5        g )Nrms_norm_across_headszIOnly 'rms_norm_across_heads' is supported as a valid value for `qk_norm`.gh㈵>Tepselementwise_affine)ri   )superr4   NotImplementedErrorhead_dim	inner_diminner_kv_dimre   rj   use_biasrh   out_dimrI   ra   nnr   rM   rN   LinearrJ   rK   rL   
ModuleListrV   appendDropout_default_processor_clsset_processor)r3   re   rI   rf   rg   rh   ri   rj   rk   rl   	processornorm_epsnorm_elementwise_affine	__class__s                r$   r4   LTXAttention.__init__w   s    	--%&qrr !).6.>DNNHDW":M:Y#6_h  
"&hh&&x'7X&rhh&&x':&uHHOOI~~DOI	HHOOD$<$<d>O>OVZO[	HHOOD$<$<d>O>OVZO[	hh))"-588??4>>4<<h?WX588++G45335I9%r'   r8   r9   r:   r;   r<   c                    [        [        R                  " U R                  R                  5      R
                  R                  5       5      nUR                  5        VVs/ s H  u  pxXv;  d  M  UPM     n	nn[        U	5      S:  a:  [        R                  SU	 SU R                  R                  R                   S35        UR                  5        VV
s0 s H  u  pzXv;   d  M  Xz_M     nnn
U R                  " XX#U40 UD6$ s  snnf s  sn
nf )Nr   zattention_kwargs z are not expected by z and will be ignored.)setinspect	signaturer   r]   
parameterskeysitemslenloggerwarningr   r)   )r3   r8   r9   r:   r;   r"   attn_parameterskrY   unused_kwargsws              r$   forwardLTXAttention.forward   s     g//0G0GHSSXXZ['-||~R~tq9Q~R}!NN#M?2GH`H`HiHiGjj  A $*<<>J>41Q5I$!$>J~~d3HZjuntuu S
 Ks   D .D D"D)rj   rh   rt   rI   ru   rv   rN   rM   rx   re   rK   rV   rJ   rL   rw   )	   r   @   r?   TNTrn   Nr_   )r)   r*   r+   r,   r   r~   _available_processorsintfloatboolstrr4   ra   rb   r   r-   __classcell__r   s   @r$   r7   r7   s   s   223
 *..(&(& (& 	(&
 (& (& (& !4Z(& (& (& (&Z 6:.204v||v  %||d2v t+	v
  ,,-v 
v vr'   r7   c                     ^  \ rS rSr      SS\S\S\S\S\S\S\S	S
4U 4S jjjrS\S\S\S\S\\R                  \\4   S\R                  S	\R                  4S jr     SS\R                  S\S
-  S\S
-  S\S
-  S\\R                  \\4   S
-  S\R                  S
-  S	\\R                  \R                  4   4S jjrSrU =r$ )LTXVideoRotaryPosEmbed   dimbase_num_framesbase_height
base_width
patch_sizepatch_size_tthetar<   Nc                 v   > [         TU ]  5         Xl        X l        X0l        X@l        XPl        X`l        Xpl        g )N)	rr   r4   r   r   r   r   r   r   r   )	r3   r   r   r   r   r   r   r   r   s	           r$   r4   LTXVideoRotaryPosEmbed.__init__   s6     	.&$$(
r'   rW   
num_framesheightwidthrope_interpolation_scaledevicec                    [         R                  " U[         R                  US9n[         R                  " U[         R                  US9n[         R                  " U[         R                  US9n	[         R                  " XUSS9n
[         R                  " U
SS9n
U
R                  S5      R                  USSSS5      n
Ub  U
S S 2SS24   US   -  U R                  -  U R                  -  U
S S 2SS24'   U
S S 2SS24   US   -  U R                  -  U R                  -  U
S S 2SS24'   U
S S 2SS24   US   -  U R                  -  U R                  -  U
S S 2SS24'   U
R                  SS	5      R                  SS5      n
U
$ )
N)rU   r   ij)indexingr   r   rE   r   r      )ra   arangefloat32meshgridstack	unsqueezerepeatr   r   r   r   r   rS   	transpose)r3   rW   r   r   r   r   r   grid_hgrid_wgrid_fgrids              r$   _prepare_video_coords,LTXVideoRotaryPosEmbed._prepare_video_coords   sj    fEMM&Ie5==HjfM~~fftD{{4Q'~~a ''
Aq!Q?#/1Q3<*B1*EEHYHYY\`\p\ppDAaCL1Q3<*B1*EEWZ^ZjZjjDAaCL1Q3<*B1*EEWZ^ZiZiiDAaCL||Aq!++Aq1r'   r8   video_coordsc           	         UR                  S5      nUc  U R                  UUUUUUR                  S9nOT[        R                  " US S 2S4   U R
                  -  US S 2S4   U R                  -  US S 2S4   U R                  -  /SS9nSn	U R                  n
U R                  [        R                  " [        R                  " XR                  5      [        R                  " XR                  5      U R                  S-  UR                  [        R                  S	9-  nU[        R                  -  S
-  nXR                  S5      S-  S-
  -  nUR!                  SS5      R#                  S5      nUR%                  5       R'                  SSS9nUR)                  5       R'                  SSS9nU R                  S-  S:w  a  [        R*                  " US S 2S S 2S U R                  S-  24   5      n[        R,                  " US S 2S S 2S U R                  S-  24   5      n[        R.                  " X/SS9n[        R.                  " X/SS9nX4$ )Nr   )r   r   rE   r   r>   r   g      ?   )r   rU   g       @)sizer   r   ra   r   r   r   r   r   linspacemathlogr   r   pir   r   rS   cosrepeat_interleavesin	ones_like
zeros_likecat)r3   r8   r   r   r   r   r   rW   r   startendfreqs	cos_freqs	sin_freqscos_paddingsin_paddings                   r$   r   LTXVideoRotaryPosEmbed.forward   s    #''*
--)A$++ . D ;; A&)=)== A&)9)99 A&8
 D jj

ennHHUJJ'HHS**%HHM ''--
 
 #%+a/!34B'//2IIK11!1<	IIK11!1<	88a<1//)Aq.DHHqL.4H*IJK**9Q>TXX\>5I+JKK		;":CI		;":CI##r'   )r   r   r   r   r   r   r   )      r   rE   rE        @)NNNNN)r)   r*   r+   r,   r   r   r4   tuplera   rb   r   r   r   r-   r   r   s   @r$   r   r      so     "  	
     
 (  	
  #(eU(B"C  
: "&! MQ,04$||4$ $J4$ d
	4$
 Tz4$ #(eU(B"Cd"J4$ llT)4$ 
u||U\\)	*4$ 4$r'   r   c                   4  ^  \ rS rSrSr      SS\S\S\S\S\S\S	\S
\S\S\4U 4S jjjr	  SS\
R                  S\
R                  S\
R                  S\\
R                  \
R                  4   S-  S\
R                  S-  S\
R                  4S jjrSrU =r$ )LTXVideoTransformerBlocki  ai  
Transformer block used in [LTX](https://huggingface.co/Lightricks/LTX-Video).

Args:
    dim (`int`):
        The number of channels in the input and output.
    num_attention_heads (`int`):
        The number of heads to use for multi-head attention.
    attention_head_dim (`int`):
        The number of channels in each head.
    qk_norm (`str`, defaults to `"rms_norm"`):
        The normalization layer to use.
    activation_fn (`str`, defaults to `"gelu-approximate"`):
        Activation function to use in feed-forward.
    eps (`float`, defaults to `1e-6`):
        Epsilon value for normalization layers.
r   num_attention_headsattention_head_dimrj   rl   activation_fnattention_biasattention_out_biasrp   rq   c                 :  > [         TU ]  5         [        XU
S9U l        [	        UUUUUS UUS9U l        [        XU
S9U l        [	        UUUUUUUUS9U l        [        XS9U l	        [        R                  " [        R                  " SU5      US-  -  5      U l        g )Nro   )re   rI   rf   rg   ri   rj   rk   rl   )re   rj   rI   rf   rg   ri   rk   rl   )r   r         ?)rr   r4   r   norm1r7   attn1norm2attn2r   ffry   	Parameterra   randnscale_shift_table)r3   r   r   r   rj   rl   r   r   r   rp   rq   r   s              r$   r4   !LTXVideoTransformerBlock.__init__-  s     	S>PQ
!%(' $'	

 S>PQ
! 3%(''	

 c?!#ekk!S.ACH.L!Mr'   Nr8   r9   tembr;   encoder_attention_maskr<   c                    UR                  S5      nU R                  U5      nU R                  R                  S   nU R                  S   R	                  UR
                  5      UR                  XcR                  S5      US5      -   n	U	R                  SS9u  pppUSU-   -  U
-   nU R                  US US9nUUU-  -   nU R                  UUS US9nUU-   nU R                  U5      SU-   -  U-   nU R                  U5      nUUU-  -   nU$ )	Nr   NNrE   r>   r   r   )r8   r9   r;   )r9   r;   r:   )r   r   r   rF   rT   r   reshapeunbindr   r   r   r   )r3   r8   r9   r   r;   r   rW   norm_hidden_statesnum_ada_params
ada_values	shift_msa	scale_msagate_msa	shift_mlp	scale_mlpgate_mlpattn_hidden_states	ff_outputs                     r$   r    LTXVideoTransformerBlock.forwardX  sH    #''*
!ZZ6//55a8++J7::4;;G$,,		!nbK
 

 JTIZIZ_`IZIaF	h9/1y=AIM!ZZ,"&- ( 

 &(:X(EE!ZZ"7!1	 ( 
 &(::!ZZ6!i-H9TGG./	%	H(<<r'   )r   r   r   r   r   r   )rn   gelu-approximateTTư>Fr   )r)   r*   r+   r,   r`   r   r   r   r   r4   ra   rb   r   r   r-   r   r   s   @r$   r   r     s   0 //##'#()N)N !)N  	)N
 !)N )N )N )N !)N )N !)N )N` FJ6:%||%  %||% ll	%
  ell :;dB% !&t 3% 
% %r'   r   c            "       :  ^  \ rS rSrSrSrS/rS/r\" SSSS	9\" SSSS	9\" SS
SS	9S.\" SSSS	9\" SSSS	9S.\	" SSS9S.r
\               S.S\S\S\S\S\S\S\S\S\S\S\S\S\S\S\SS4 U 4S  jjj5       r\" S!5             S/S"\R&                  S#\R&                  S$\R(                  S%\R&                  S&\S-  S'\S-  S(\S-  S)\\\\4   \R&                  -  S-  S*\R&                  S-  S!\\\4   S-  S+\S\R&                  4S, jj5       rS-rU =r$ )0LTXVideoTransformer3DModeli  a  
A Transformer model for video-like data used in [LTX](https://huggingface.co/Lightricks/LTX-Video).

Args:
    in_channels (`int`, defaults to `128`):
        The number of channels in the input.
    out_channels (`int`, defaults to `128`):
        The number of channels in the output.
    patch_size (`int`, defaults to `1`):
        The size of the spatial patches to use in the patch embedding layer.
    patch_size_t (`int`, defaults to `1`):
        The size of the tmeporal patches to use in the patch embedding layer.
    num_attention_heads (`int`, defaults to `32`):
        The number of heads to use for multi-head attention.
    attention_head_dim (`int`, defaults to `64`):
        The number of channels in each head.
    cross_attention_dim (`int`, defaults to `2048 `):
        The number of channels for cross attention heads.
    num_layers (`int`, defaults to `28`):
        The number of layers of Transformer blocks to use.
    activation_fn (`str`, defaults to `"gelu-approximate"`):
        Activation function to use in feed-forward.
    qk_norm (`str`, defaults to `"rms_norm_across_heads"`):
        The normalization layer to use.
Tnormr   rE   r   F)	split_dimexpected_dimssplit_outputr   )r8   r9   r   )r   rE   )
gather_dimr  ) ropeproj_outin_channelsout_channelsr   r   r   r   rj   
num_layersr   rl   r   r   caption_channelsr   r   r<   Nc                 X  > [         TU ]  5         U=(       d    UnXV-  n[        R                  " UU5      U l        [        R
                  " [        R                  " SU5      US-  -  5      U l        [        USS9U l
        [        UUS9U l        [        USSSUUSS	9U l        [        R                  " [!        U5       Vs/ s H  n[#        UUUUU
U	UUUUS
9
PM     sn5      U l        [        R&                  " USSS9U l        [        R                  " UU5      U l        SU l        g s  snf )Nr   r   F)use_additional_conditions)in_featureshidden_sizer   r   r   )r   r   r   r   r   r   r   )
r   r   r   rj   rl   r   r   r   rp   rq   r  ro   )rr   r4   ry   rz   proj_inr   ra   r   r   r   
time_embedr   caption_projectionr   r
  r{   ranger   transformer_blocks	LayerNormnorm_outr  gradient_checkpointing)r3   r  r  r   r   r   r   rj   r  r   rl   r   r   r  r   r   ru   rY   r   s                     r$   r4   #LTXVideoTransformer3DModel.__init__  s,   & 	#2{'<	yyi8!#ekk!Y.G)UX..X!Y,YRWX";HXfo"p*!%
	 #%-- z* +A )!(;'9(;#"/#1'9 '> +#
$ YDUS		)\:&+#)s    D'attention_kwargsr8   r9   timestepr   r   r   r   r   r   return_dictc           	         U R                  XXgX5      nUbB  UR                  S:X  a2  SUR                  UR                  5      -
  S-  nUR	                  S5      nUR                  S5      nU R                  U5      nU R                  UR                  5       UUR                  S9u  pUR                  USUR                  S5      5      nUR                  USUR                  S5      5      nU R                  U5      nUR                  USUR                  S5      5      nU R                   HP  n[        R                  " 5       (       a)  U R                  (       a  U R                  UUUUUU5      nMF  U" UUUUUS9nMR     U R                   S   US S 2S S 2S 4   -   nUS S 2S S 2S4   US S 2S S 2S4   nnU R#                  U5      nUSU-   -  U-   nU R%                  U5      nU(       d  U4$ ['        US	9$ )
Nr   rE   g     r   )rW   hidden_dtyper>   )r8   r9   r   r;   r   r   )sample)r
  ndimrT   rU   r   r   r  r  rS   rH   r  r  ra   is_grad_enabledr  _gradient_checkpointing_funcr   r  r  r   )r3   r8   r9   r  r   r   r   r   r   r   r  r  r;   rW   r   embedded_timestepblockscale_shift_valuesshiftscaleoutputs                        r$   r   "LTXVideoTransformer3DModel.forward  s     99]Ogv "-2H2M2MQR2R&'*@*C*CMDWDW*X&X\d%d"%;%E%Ea%H""''*
]3"&//!&,, #2 #
 yyR27-22:rCTCYCYZ\C]^ $ 7 78M N 5 : ::r=K]K]^`Ka b,,E$$&&4+F+F $ A A!)$*! !&"/*?%5+A! -& "33J?BSTUWXZ^T^B__)!Q'24Fq!Qw4Oum4%U3e;}-9'v66r'   )	r  r  r  r  r  r
  r   r  r  )   r-  rE   rE       r   r      r   rn   Fr  i   TT)NNNNNNT)r)   r*   r+   r,   r`    _supports_gradient_checkpointing _skip_layerwise_casting_patterns_repeated_blocksr   r   _cp_planr   r   r   r   r   r4   r	   ra   rb   
LongTensorr   dictr   r   r-   r   r   s   @r$   r  r    sl   4 (,$(.x$23 2AQ]bc%9AUVej%k&:QVWfk&l
 $aqtT#aqtT
 *QaHH  #%"$#'/.(- $##'!=,=, =, 	=,
 =, !=,  =, !=, =, =, =, "&=, =, =, =,  !!=," 
#=, =,~ () "&! UY,026 @7||@7  %||@7 ""	@7
 !&@7 $J@7 d
@7 Tz@7 #(ue(;"<u||"Kd"R@7 llT)@7 sCx.4/@7 @7 
@7 *@7r'   r  c                 "   Uu  p#U R                  SS5      R                  S5      u  pE[        R                  " U* U/SS9R	                  S5      nU R                  5       U-  UR                  5       U-  -   R                  U R                  5      nU$ )Nr   )r>   r   r>   r   )rP   r   ra   r   rS   r   rT   rU   )xr   r   r   x_realx_imag	x_rotatedouts           r$   rO   rO   2  s|    HC[[G,33B7NFfWf-26>>qAI779s?Y__.44
8
8
ACJr'   )3r   r   typingr   ra   torch.nnry   configuration_utilsr   r   loadersr   r   utilsr	   r
   r   r   utils.torch_utilsr   _modeling_parallelr   r   	attentionr   r   r   attention_dispatchr   cache_utilsr   
embeddingsr   modeling_outputsr   modeling_utilsr   normalizationr   r   
get_loggerr)   r   r   r   Moduler7   r   r   r  rO   r(   r'   r$   <module>rL     s          B ? K K 5 L I I 6 $ 2 7 ' 7 
		H	%6 6@ @F=v588??$8 =v@c$RYY c$L cryy c cL n7^-CEUWan7 n7br'   