
    
3j/B                     R   S SK r S SKJr  S SKrS SKrS SKrS SKJrJ	r	J
r
JrJrJrJr  S SKJr  SSKJr   " S S\R$                  5      r " S	 S
\R$                  5      rSr\R,                  \R.                  \
R0                  S4S jr\R,                  4S jr\R,                  4S jrg)    N)Optional)CrossAttentionFeedForwardLTXFrequenciesPrecisionLTXRopeTypegenerate_freq_grid_npinterleaved_freqs_cissplit_freqs_cis)nn   )POS_EMBEDDING_EXP_VALUESc                   b   ^  \ rS rSrSr      SU 4S jjrSS\R                  4S jjrSr	U =r
$ )	BasicTransformerBlock1D   a  
A basic Transformer block.

Parameters:

    dim (`int`): The number of channels in the input and output.
    num_attention_heads (`int`): The number of heads to use for multi-head attention.
    attention_head_dim (`int`): The number of channels in each head.
    dropout (`float`, *optional*, defaults to 0.0): The dropout probability to use.
    activation_fn (`str`, *optional*, defaults to `"geglu"`): Activation function to be used in feed-forward.
    attention_bias (:
        obj: `bool`, *optional*, defaults to `False`): Configure if the attentions should contain a bias parameter.
    upcast_attention (`bool`, *optional*):
        Whether to upcast the attention computation to float32. This is useful for mixed precision training.
    norm_elementwise_affine (`bool`, *optional*, defaults to `True`):
        Whether to use learnable elementwise affine parameters for normalization.
    standardization_norm (`str`, *optional*, defaults to `"layer_norm"`): The type of pre-normalization to use. Can be `"layer_norm"` or `"rms_norm"`.
    norm_eps (`float`, *optional*, defaults to 1e-5): Epsilon value for normalization layers.
    qk_norm (`str`, *optional*, defaults to None):
        Set to 'layer_norm' or `rms_norm` to perform query and key normalization.
    final_dropout (`bool` *optional*, defaults to False):
        Whether to apply a final dropout after the last feed-forward layer.
    ff_inner_dim (`int`, *optional*): Dimension of the inner feed-forward layer. If not provided, defaults to `dim * 4`.
    ff_bias (`bool`, *optional*, defaults to `True`): Whether to use bias in the feed-forward layer.
    attention_out_bias (`bool`, *optional*, defaults to `True`): Whether to use bias in the attention output layer.
    use_rope (`bool`, *optional*, defaults to `False`): Whether to use Rotary Position Embeddings (RoPE).
    ffn_dim_mult (`int`, *optional*, defaults to 4): Multiplier for the inner dimension of the feed-forward layer.
c
                 r   > [         T
U ]  5         [        UUUS UUUU	S9U l        [	        UUSUUU	S9U l        g )N)	query_dimheadsdim_headcontext_dimapply_gated_attentiondtypedevice
operationsT)dim_outglur   r   r   )super__init__r   attn1r   ff)selfdimn_headsd_headr   attn_precisionr   r   r   r   	__class__s             R/home/wildlama/comfy/ComfyUI/custom_nodes/ComfyUI-LTXVideo/embeddings_connector.pyr    BasicTransformerBlock1D.__init__3   sV     	 $"7!	

 !
    returnc                    [         R                  R                  R                  U5      nUR	                  S5      nU R                  XBUS9nXQ-   nUR                  S:X  a  UR	                  S5      n[         R                  R                  R                  U5      nU R                  U5      nXa-   nUR                  S:X  a  UR	                  S5      nU$ )Nr   )maskpe   )comfyldm
common_ditrms_normsqueezer   ndimr   )r    hidden_statesattention_maskr,   norm_hidden_statesattn_output	ff_outputs          r&   forwardBasicTransformerBlock1D.forwardX   s    
 #YY11::=I/77: jj!3RjP#3")11!4M #YY11::=I GG./	!1")11!4Mr(   )r   r   )NNFNNN)NN)__name__
__module____qualname____firstlineno____doc__r   torchFloatTensorr9   __static_attributes____classcell__r%   s   @r&   r   r      s:    D ##
JeFWFW  r(   r   c                      ^  \ rS rSrSrSSSSSSS/S	SS	S
S
S
S	S	4S\\   4U 4S jjjrS rS r	S r
SS jr SS\R                  S\\R                     4S jjrSrU =r$ )Embeddings1DConnectoru   T   i         g     @r   FNnum_learnable_registersc                   > [         TU ]  5         Xl        Xl        X@l        XC-  U l        Xl        X`l        Xpl        Xl	        Xl
        [        R                  " [        U5       Vs/ s H  n[        U R
                  UUUU
UUUS9PM     sn5      U l        XC-  nXl        U R                   (       a@  [        R"                  " [$        R&                  " U R                   UXS9S-  S-
  5      U l        g g s  snf )N)r   r   r   r   r   r   r   g       @      ?)r   r   r   out_channelsnum_attention_heads	inner_dimcausal_temporal_positioningpositional_embedding_thetapositional_embedding_max_pos
split_ropedouble_precision_roper   
ModuleListranger   transformer_1d_blocksrK   	Parameterr@   randlearnable_registers)r    in_channelscross_attention_dimattention_head_dimrP   
num_layersrS   rT   rR   rK   r   r   r   r   rU   rV   kwargs_rQ   r%   s                      r&   r   Embeddings1DConnector.__init__x   s   & 	
'#6 ,A+F(*D',H)$%:"%']] z* +A (NN'& 3*?!)	 +&
"  (<	'>$''')||

00)5  	(D$ (#s   &$C2c                     [         R                  " [        S5       Vs/ s H  nUS S 2U4   U R                  U   -  PM     snSS9nU$ s  snf )Nr   )r!   )r@   stackrX   rT   )r    indices_gridifractional_positionss       r&   get_fractional_positions.Embeddings1DConnector.get_fractional_positions   s]    ${{ q!A QT"T%F%Fq%II!  
 $#s   #A	c                 t   UR                   nU[        R                  [        R                  4;   a  [        R                  OUnU R                  U5      nU R                  (       a.  [        U R                  UR                  S   U R                  5      OU R                  X$UR                  5      R                  UR                  S9nUS:X  a4  XeR                  S5      -  R                  SS5      R!                  S5      nU$ XeR                  S5      S-  S-
  -  R                  SS5      R!                  S5      nU$ )Nr   r   exp_2re   rJ   )r   r@   bfloat16float16float32rj   rV   r   rS   shaperQ   generate_freq_gridr   to	unsqueeze	transposeflatten)r    rg   spacingsource_dtyper   ri   indicesfreqss           r&   precompute_freqs&Embeddings1DConnector.precompute_freqs   s&   #)) >> MM 	  $<<\J )) "//""1% ((9M9T9TU
"(//"
0 	 g99"==2r"  	 ::2>BQFG2r" 
 r(   c           	         U R                   nU R                  nSnSU-  nSnUn	US:X  a  [        R                  " [        X#S9n
OUS:X  a/  SU[        R
                  " SXGUS9U-  -  -  n
U
R                  US	9n
ORUS
:X  a  [        R                  " XXG-  X2S9n
O3US:X  a-  [        R                  " US-  U	S-  XG-  X2S9R                  5       n
W
[        R                  -  S-  n
U
$ )Nr   rJ   exprM   rn   rN   r   rm   )r   linear)r   r   sqrt)rQ   rS   r@   tensorr   arangeru   linspacer   mathpi)r    ry   r   r   r!   theta
n_pos_dimsn_elemstartendr{   s              r&   rt   (Embeddings1DConnector.generate_freq_grid   s    nn//
Zell#;5XGEell1c&&QTW&WXXGjjuj-G nnCM&G nnq#q&#-df  DGG#a'r(   c                 f   U R                   nSnU R                  X5      nU R                  (       a1  US-  nUR                  S   nXg-
  n[	        XXU R
                  5      u  pO[        XSU-  5      u  pU	R                  U R                  5      U
R                  U R                  5      U R                  4$ )NrJ   re   )	rQ   r}   rU   rs   r
   rP   r	   ru   r   )r    rg   ry   r!   r   r|   expected_freqscurrent_freqspad_sizecos_freqsin_freqs              r&   precompute_freqs_cis*Embeddings1DConnector.precompute_freqs_cis   s    nn%%l<?? AXN!KKOM%5H!0!9!9"Hh "7uFl!KH{{4::&DJJ(?PPr(   r4   r5   c                    U R                   (       Ga  UR                  S   U R                   -  S:X  d%   SUR                  S    SU R                    S35       eUR                  S   U R                   -  n[        R                  " U R                  US45      R                  UR                  5      nUR                  S5      R                  S5      R                  S5      S:  R                  5       nUSS2UR                  5       R                  5       SS24   nUR                  S   nUR                  S   U-
  n[        R                  R                  R                  USSSU4SS	9n	[        R                  " US/S
9n
X-  SU
-
  U-  -   n[        R                  " USUR                   UR                  S9n[        R"                  " UR                  S   [        R$                  UR                  S9nUSSSS24   nU R'                  U5      n[)        U R*                  5       H  u  pU" XUS9nM     [,        R.                  R0                  R3                  U5      nX4$ )ag  
The [`Transformer2DModel`] forward method.

Args:
    hidden_states (`torch.LongTensor` of shape `(batch size, num latent pixels)` if discrete, `torch.FloatTensor` of shape `(batch size, channel, height, width)` if continuous):
        Input `hidden_states`.
    indices_grid (`torch.LongTensor` of shape `(batch size, 3, num latent pixels)`):
    attention_mask ( `torch.Tensor`, *optional*):
        An attention mask of shape `(batch, key_tokens)` is applied to `encoder_hidden_states`. If `1` the mask
        is kept, otherwise if `0` it is discarded. Mask will be converted into a bias, which adds large
        negative values to the attention scores corresponding to "discard" tokens.
Returns:
    If `return_dict` is True, an [`~models.transformer_2d.Transformer2DModelOutput`] is returned, otherwise a
    `tuple` where the first element is the sample tensor.
r   r   zHidden states sequence length z. must be divisible by num_learnable_registers .re   g     N)padvalue)dimsg        rM   )r5   r,   )rK   rs   r@   tiler\   ru   r   r2   rv   intboolr   
functionalr   flip	full_liker   r   rr   r   	enumeraterY   r.   r/   r0   r1   )r    r4   r5   num_registers_duplicationsr\   attention_mask_binarynon_zero_hidden_statesnon_zero_nums
pad_lengthadjusted_hidden_statesflipped_maskrg   	freqs_cis	block_idxblocks                  r&   r9   Embeddings1DConnector.forward  s~   , ''' ##A&)E)EEJV/0C0CA0F/GGuvz  wS  wS  vT  TU  VVJ ##A&$*F*FF ' #(**((+Eq*I#b%%&  
 &&q)11!4>>rBgMce " &3(0027791<&" 388;M&,,Q/-?J%*XX%8%8%<%<&Q1j,A &= &" !::&;1#FL5|#'::; 
 #__$**%,,	N ||"%--@T@T
 $D$M2--l;	 !*$*D*D EI!M !F 		,,55mD,,r(   )rR   rV   r   rQ   r\   rP   rK   rO   rT   rS   rU   rY   )r   )N)r;   r<   r=   r>    _supports_gradient_checkpointingr   r   r   rj   r}   rt   r   r@   Tensorr9   rB   rC   rD   s   @r&   rF   rF   u   s    '+$  #*&'S$)14##!6 "*#6 6p$B6Q$ 26N-||N- !.N- N-r(   rF   zmodel.diffusion_model.c                    U R                  5        VVs0 s H,  u  pxUR                  U5      (       d  M  U[        U5      S  U_M.     n	nn[        U	5      S:X  a  g [        R                  R                  X3SS9n
[        US   US   US   UR                  SS5      UU
Ub  UOS	/U[        R                  :H  U[        R                  :H  S
9	nUR                  U	5        U$ s  snnf )Nr   T)disable_fast_fp8rP   r_   r`   r   Fr   )	rP   r_   r`   r   r   r   rT   rU   rV   )items
startswithlenr.   opspick_operationsrF   getr   SPLITr   FLOAT64load_state_dict)sdconnector_prefixconnector_configr   	rope_typefrequencies_precision
pe_max_poskvsd_connectorr   	connectors               r&   load_embeddings_connectorr   ]  s     HHJDA<<() 	&#

!"A%   <A**5$*OJ%,-BC+,@A#L1.223JER3=3IZPQs 1 1137N7V7VV
I l+-s
   CCc           	      v   [         R                  " U5      n[        R                  " U5      nUR                  SS/5      n[         S3n[         S3n[         S3U ;   a  UOUnUR                  SS5      UR                  SS	5      UR                  S
S5      UR                  SS5      S.n	[        U UU	UUUU5      $ )N&connector_positional_embedding_max_posr   zembeddings_connector.zvideo_embeddings_connector.z audio_adaln_single.linear.weightconnector_num_attention_headsrI   connector_attention_head_dimrH   connector_num_layersrJ   connector_apply_gated_attentionFrP   r_   r`   r   )r   	from_dictr   r   _PREFIX_BASEr   )
r   transformer_configr   r   r   r   video_only_connector_prefixav_connector_prefixprefixr   s
             r&   load_video_embeddings_connectorr     s    %%&89I3==>PQ#''(PSTRUVJ%1N2G"H)N*EF ^;<B 	(   255+R 
 144*C
 ),,-CQG!3!7!7-u"
 %
 r(   c           	         [         R                  " U5      n[        R                  " U5      nUR                  SS/5      nUR                  SUR                  SS5      5      UR                  SUR                  SS5      5      UR                  S	UR                  S
S5      5      UR                  SS5      S.n[	        U [
         S3UUUUU5      $ )Nr   r   #audio_connector_num_attention_headsr   rI   "audio_connector_attention_head_dimr   rH   audio_connector_num_layersr   rJ   r   Fr   zaudio_embeddings_connector.)r   r   r   r   r   r   )r   r   r   r   r   r   r   s          r&   load_audio_embeddings_connectorr     s    %%&89I3==>PQ#''(PSTRUVJ  2551""#BBG 
 1440""#A3G
 ),,(""#91=
 "4!7!7-u"
$ %
.34 r(   )r   typingr   comfy.ldm.common_ditr.   	comfy.opsr@   comfy.ldm.lightricks.modelr   r   r   r   r   r	   r
   r   pos_embedding_exp_valuesr   Moduler   rF   r   rp   INTERLEAVEDFLOAT32r   r   r    r(   r&   <module>r      s            >]bii ]@^-BII ^-J ( ..%%199D CH.. "J CH.. r(   