
    
3j:                     h   S SK rS SKrS SKJr  SSKJrJr  SSKJ	r	  SSK
Jr  SSKJr  SSKJr  SS	KJr  SS
KJr  SSKJr  SSKJr  SSKJr  \" 5       (       a  S SKJr   " S S\R8                  5      r " S S\R8                  5      r " S S\R8                  5      r " S S\R8                  5      r  " S S\R8                  5      r! " S S5      r" " S S5      r# " S S\5      r$ " S S \R8                  5      r% " S! S"\R8                  5      r& " S# S$\R8                  5      r' " S% S&\\\	5      r(g)'    N   )ConfigMixinregister_to_config)FromOriginalModelMixin)is_torchvision_available   )FeedForward)dispatch_attention_fn)	Attention)	Timesteps)Transformer2DModelOutput)
ModelMixin)RMSNorm)
transformsc                      ^  \ rS rSr SS\S\S\\\\4   S\SS4
U 4S jjjrS	\R                  S\R                  4S
 jr
SrU =r$ )CosmosPatchEmbed#   in_channelsout_channels
patch_sizebiasreturnNc                    > [         TU ]  5         X0l        [        R                  " XS   -  US   -  US   -  X$S9U l        g )Nr      r   r   )super__init__r   nnLinearproj)selfr   r   r   r   	__class__s        j/home/wildlama/miniconda3/lib/python3.13/site-packages/diffusers/models/transformers/transformer_cosmos.pyr   CosmosPatchEmbed.__init__$   sB     	$IIkqM9JqMIJWXMY[gs	    hidden_statesc           
          UR                   u  p#pEnU R                  u  pxn	UR                  X#XG-  XuU-  XU	-  U	5      nUR                  SSSSSSSS5      R	                  SS5      nU R                  U5      nU$ )	Nr   r         r   r         )shaper   reshapepermuteflattenr    )
r!   r&   
batch_sizenum_channels
num_framesheightwidthp_tp_hp_ws
             r#   forwardCosmosPatchEmbed.forward,   s    >K>Q>Q;
*e#%--j&7mS[^R^`c
 &--aAq!Q1EMMaQRS		-0r%   )r   r    )T)__name__
__module____qualname____firstlineno__inttupleboolr   torchTensorr8   __static_attributes____classcell__r"   s   @r#   r   r   #   sj    bftt.1t?DS#s]?St[_t	t tU\\ ell  r%   r   c                   r   ^  \ rS rSrS\S\SS4U 4S jjrS\R                  S\R                  4S jrS	r	U =r
$ )
CosmosTimestepEmbedding7   in_featuresout_featuresr   Nc                    > [         TU ]  5         [        R                  " XSS9U l        [        R
                  " 5       U l        [        R                  " USU-  SS9U l        g )NFr   r   )r   r   r   r   linear_1SiLU
activationlinear_2)r!   rI   rJ   r"   s      r#   r    CosmosTimestepEmbedding.__init__8   sF    		+%H'')		,L0@uMr%   	timestepsc                 l    U R                  U5      nU R                  U5      nU R                  U5      nU$ N)rL   rN   rO   )r!   rQ   embs      r#   r8   CosmosTimestepEmbedding.forward>   s1    mmI&ooc"mmC 
r%   )rN   rL   rO   r:   r;   r<   r=   r>   r   rA   rB   r8   rC   rD   rE   s   @r#   rG   rG   7   sA    NC Ns Nt N %,,  r%   rG   c                      ^  \ rS rSrS\S\SS4U 4S jjrS\R                  S\R                  S\R                  4S	 jr	S
r
U =r$ )CosmosEmbeddingE   embedding_dimcondition_dimr   Nc                    > [         TU ]  5         [        USSS9U l        [	        X5      U l        [        USSS9U l        g )NT        )flip_sin_to_cosdownscale_freq_shiftư>epselementwise_affine)r   r   r   	time_projrG   
t_embedderr   norm)r!   rZ   r[   r"   s      r#   r   CosmosEmbedding.__init__F   s;    "=$]`a1-OMtM	r%   r&   timestepc                     U R                  U5      R                  U5      nU R                  U5      nU R                  U5      nXE4$ rS   )rd   type_asre   rf   )r!   r&   rh   timesteps_projtembembedded_timesteps         r#   r8   CosmosEmbedding.forwardM   sA    199-H~. IIn5&&r%   )rf   re   rd   )r:   r;   r<   r=   r>   r   rA   rB   
LongTensorr8   rC   rD   rE   s   @r#   rX   rX   E   sQ    Nc N# N$ N'U\\ 'U=M=M 'RWR^R^ ' 'r%   rX   c            	          ^  \ rS rSrS\S\SS4U 4S jjr SS\R                  S\R                  S	\R                  S-  S\R                  4S
 jjrSr	U =r
$ )CosmosAdaLayerNormT   rI   hidden_featuresr   Nc                   > [         TU ]  5         Xl        [        R                  " 5       U l        [        R                  " USSS9U l        [        R                  " XSS9U l	        [        R                  " USU-  SS9U l
        g )NFr`   rc   rb   r   r   )r   r   rZ   r   rM   rN   	LayerNormrf   r   rL   rO   r!   rI   rs   r"   s      r#   r   CosmosAdaLayerNorm.__init__U   s_    ('')LLDQ			+UK		/1{?Or%   r&   rm   rl   c                 6   U R                  U5      nU R                  U5      nU R                  U5      nUb  X#SS SU R                  -  24   -   nUR	                  SSS9u  pEU R                  U5      nUR                  S:X  a  S XE4 5       u  pEUSU-   -  U-   nU$ )N.r   dimc              3   B   #    U  H  oR                  S 5      v   M     g7fr   N	unsqueeze.0xs     r#   	<genexpr>-CosmosAdaLayerNorm.forward.<locals>.<genexpr>l   s     CNqKKNNN   r   )rN   rL   rO   rZ   chunkrf   ndim)r!   r&   rm   rl   shiftscales         r#   r8   CosmosAdaLayerNorm.forward^   s     !OO,=> MM*;< MM*;< 1>VDDVDV@V>V9V4W W(..qb.9		-0!!Q&CUNCLE%U3e;r%   )rN   rZ   rL   rO   rf   rS   rV   rE   s   @r#   rq   rq   T   sj    PC P# P$ P im"\\>CllRWR^R^aeRe	 r%   rq   c            	          ^  \ rS rSrSS\S\S-  SS4U 4S jjjr SS\R                  S\R                  S	\R                  S-  S\R                  4S
 jjrSr	U =r
$ )CosmosAdaLayerNormZeror   NrI   rs   r   c                 8  > [         TU ]  5         [        R                  " USSS9U l        [        R
                  " 5       U l        Uc  [        R                  " 5       U l        O[        R                  " XSS9U l        [        R                  " USU-  SS9U l
        g )NFr`   ru   r   r   )r   r   r   rv   rf   rM   rN   IdentityrL   r   rO   rw   s      r#   r   CosmosAdaLayerNormZero.__init__s   sl    LLDQ	'')"KKMDMIIkODM		/1{?Or%   r&   rm   rl   c                    U R                  U5      nU R                  U5      nU R                  U5      nUb  X#-   nUR                  SSS9u  pEnU R	                  U5      nUR
                  S:X  a  S XEU4 5       u  pEnUSU-   -  U-   nX4$ )Nr   rz   r{   r   c              3   B   #    U  H  oR                  S 5      v   M     g7fr~   r   r   s     r#   r   1CosmosAdaLayerNormZero.forward.<locals>.<genexpr>   s     !O:NQ++a..:Nr   r   )rN   rL   rO   r   rf   r   )r!   r&   rm   rl   r   r   gates          r#   r8   CosmosAdaLayerNormZero.forward   s     !OO,=> MM*;< MM*;< 1 8.44QB4?d		-0!!Q&!O5:N!OE$%U3e;""r%   )rN   rL   rO   rf   rS   rV   rE   s   @r#   r   r   r   sv    PC P#* PPT P P" %)	#||# !<<# llT!	#
 
# #r%   r   c                       \ rS rSrS r   SS\S\R                  S\R                  S-  S\R                  S-  S\R                  S-  S	\R                  4S
 jjrSr	g)CosmosAttnProcessor2_0   c                 l    [        [        R                  R                  S5      (       d  [	        S5      eg )Nscaled_dot_product_attentionzVCosmosAttnProcessor2_0 requires PyTorch 2.0. To use it, please upgrade PyTorch to 2.0.hasattrrA   r   
functionalImportErrorr!   s    r#   r   CosmosAttnProcessor2_0.__init__   s,    uxx**,JKKvww Lr%   Nattnr&   encoder_hidden_statesattention_maskimage_rotary_embr   c           	         Uc  UnUR                  U5      nUR                  U5      nUR                  U5      nUR                  SUR                  S45      R                  SS5      nUR                  SUR                  S45      R                  SS5      nUR                  SUR                  S45      R                  SS5      nUR                  U5      nUR                  U5      nUb  SSKJ	n	  U	" XeSSS9nU	" XuSSS9n[        R                  R                  5       (       a  [        R                  " UR                  S5      UR                  S	9n
[        R                  " UR                  S5      UR                  S	9n[        R                  " UR                  S5      UR                  S	9nO3UR                  S5      n
UR                  S5      nUR                  S5      nUR!                  X-  SS
9nUR!                  X-  SS
9n[#        UR                  SS5      UR                  SS5      UR                  SS5      USSS9nUR%                  SS5      R'                  U5      nUR(                  S   " U5      nUR(                  S   " U5      nU$ )Nr   rz   r   apply_rotary_embTuse_realuse_real_unbind_dimr   devicer{   r]   F	attn_mask	dropout_p	is_causalr   )to_qto_kto_v	unflattenheads	transposenorm_qnorm_k
embeddingsr   rA   onnxis_in_onnx_exporttensorsizer   repeat_interleaver
   r/   rj   to_out)r!   r   r&   r   r   r   querykeyvaluer   	query_idxkey_idx	value_idxs                r#   __call__CosmosAttnProcessor2_0.__call__   s=    !($1!		-(ii-.		/0DJJ#34>>q!DmmA

B/0::1a@DJJ#34>>q!D E"kk# '5$UtacdE"34]_`C ::''))UZZ]5<<HIll388A;szzBGUZZ]5<<HI

1IhhqkG

1I##I$8a#@''	(>A'F .OOAq!MM!QOOAq!$
 &--a3;;EBA}5A}5r%    )NNN)
r:   r;   r<   r=   r   r   rA   rB   r   rC   r   r%   r#   r   r      sz    x 6:.20488 ||8  %||d2	8
 t+8  ,,-8 
8 8r%   r   c                       \ rS rSrS r SS\S\R                  S\\R                  \R                  4   S\\R                  \R                  4   S\R                  4
S	 jjr	S
r
g)CosmosAttnProcessor2_5   c                 l    [        [        R                  R                  S5      (       d  [	        S5      eg )Nr   zTCosmosAttnProcessor2_5 requires PyTorch 2.0. Please upgrade PyTorch to 2.0 or newer.r   r   s    r#   r   CosmosAttnProcessor2_5.__init__   s,    uxx**,JKKtuu Lr%   Nr   r&   r   r   r   c           	      (	   [        U[        5      (       d  [        S5      eU(       a  UOSu  pgU(       a  UOSu  pUc  UnUR                  U5      n
UR	                  U5      nUR                  U5      nU
R                  SUR                  S45      R                  SS5      n
UR                  SUR                  S45      R                  SS5      nUR                  SUR                  S45      R                  SS5      nUR                  U
5      n
UR                  U5      nUb  SSKJn  U" XSSS	9n
U" XSSS	9n[        R                  R                  5       (       a  [        R                   " U
R#                  S
5      U
R$                  S9n[        R                   " UR#                  S
5      UR$                  S9n[        R                   " UR#                  S
5      UR$                  S9nO3U
R#                  S
5      nUR#                  S
5      nUR#                  S
5      nUR'                  X-  S
S9nUR'                  UU-  S
S9n[)        U
R                  SS5      UR                  SS5      UR                  SS5      USSS9nUR+                  SS
5      R-                  U
5      nUGb  UR/                  U5      nUR1                  U5      nUR3                  U5      nUR4                  S   nUR6                  UR                  -  nUR9                  USUR                  U5      R                  SS5      nUR9                  USUR                  U5      R                  SS5      nUR9                  USUR                  U5      R                  SS5      nUR;                  U5      nUR=                  U5      nUR#                  S
5      nUR#                  S
5      nUR#                  S
5      nUR'                  UU-  S
S9nUR'                  UU-  S
S9n[)        UR                  SS5      UR                  SS5      UR                  SS5      U	SSS9nUR+                  SS
5      R-                  U5      nUU-   nOUnUR>                  S   " U5      nUR>                  S   " U5      nU$ )NzDExpected encoder_hidden_states as (text_context, img_context) tuple.)NNr   rz   r   r   Tr   r   r   r   r{   r]   Fr   r   ) 
isinstancer?   
ValueErrorr   r   r   r   r   r   r   r   r   r   rA   r   r   r   r   r   r   r
   r/   rj   q_imgk_imgv_imgr,   out_dimview
q_img_norm
k_img_normr   )r!   r   r&   r   r   r   text_contextimg_context	text_maskimg_maskr   r   r   r   r   r   r   attn_outr   r   r   r0   dim_head	q_img_idx	k_img_idx	v_img_idximg_outs                              r#   r   CosmosAttnProcessor2_5.__call__   s    /77cdd=R$9Xd!0>nL	(L		-(ii%		,'DJJ#34>>q!DmmA

B/0::1a@DJJ#34>>q!DE"kk#'5$UtacdE"34]_`C::''))UZZ]5<<HIll388A;szzBGUZZ]5<<HI

1IhhqkG

1I##I$8a#@''	Y(>A'F(OOAq!MM!QOOAq!
 ##Aq)11%8"JJ}-EJJ{+EJJ{+E&,,Q/J||tzz1HJJz2tzz8DNNqRSTEJJz2tzz8DNNqRSTEJJz2tzz8DNNqRSTEOOE*EOOE*E

1I

1I

1I++I,B+JE++I,B+JE+1%1%1%"G ooa+33E:G$w.M$MA}5A}5r%   r   rS   )r:   r;   r<   r=   r   r   rA   rB   r?   r   rC   r   r%   r#   r   r      sz    v [[ ||[  %U\\5<<%?@	[
 ellELL89[ 
[ [r%   r   c            	          ^  \ rS rSrU 4S jr S
S\R                  S\\R                  \R                  4   S\R                  S-  S\R                  4U 4S jjjrS	r	U =r
$ )CosmosAttentioni:  c                 (  > [         TU ]  " U0 UD6  U R                  U R                  R                  -  U R                  -  n[
        R                  " U R                  USS9U l        [
        R                  " U R                  USS9U l	        [
        R                  " U R                  USS9U l
        [        U R                  R                  U R                  -  SSS9U l        [        U R                  R                  U R                  -  SSS9U l        g )NFr   r`   Tra   )r   r   r   r   rJ   r   r   	query_dimr   r   r   r   r   r   r   )r!   argskwargs	inner_dimr"   s       r#   r   CosmosAttention.__init__;  s    $)&) JJ!7!774::E	YYt~~yuE
YYt~~yuE
YYt~~yuE
!$))"8"8DJJ"FDeij!$))"8"8DJJ"FDeijr%   Nr&   r   r   r   c                 ,   > [         TU ]  " SUUUS.UD6$ )N)r&   r   r   r   )r   r8   )r!   r&   r   r   cross_attention_kwargsr"   s        r#   r8   CosmosAttention.forwardF  s.     w 
'"7)	

 %
 	
r%   )r   r   r   r   r   rS   )r:   r;   r<   r=   r   rA   rB   r?   r8   rC   rD   rE   s   @r#   r   r   :  sa    	k /3	
||
  %U\\5<<%?@
 t+	
 

 
r%   r   c                   &  ^  \ rS rSr       SS\S\S\S\S\S\S\S	\S
\S\SS4U 4S jjjr       SS\	R                  S\	R                  S-  \\	R                  S-  \	R                  S-  4   -  S\	R                  S\	R                  S-  S\	R                  S-  S\	R                  S-  S\	R                  S-  S\	R                  S-  S\	R                  S-  S\S-  S\	R                  \\	R                  \	R                  4   -  4S jjrSrU =r$ )CosmosTransformerBlockiV  num_attention_headsattention_head_dimcross_attention_dim	mlp_ratioadaln_lora_dimqk_normout_biasr   before_proj
after_projr   Nc                   > [         TU ]  5         X-  n[        XS9U l        Xl        [        US UUUSU[        5       S9U l        [        XS9U l        U(       a  [        UUUUUSU[        5       S9U l        O[        UUUUUSU[        5       S9U l        [        XS9U l        [        XSUS9U l        S U l        S U l        U	(       a  ["        R$                  " X5      U l        U
(       a  ["        R$                  " X5      U l        g g )N)rI   rs   T)r   r   r   r   r   rc   r   	processorgelu)multactivation_fnr   )r   r   r   norm1r   r   r   attn1norm2r   r   attn2norm3r	   ffr   r   r   r   )r!   r   r   r   r   r   r   r   r   r   r   hidden_sizer"   s               r#   r   CosmosTransformerBlock.__init__W  s    	)>+d
&! $%'#,.	

 ,d
(%$7)+#'!02	DJ #%$7)+#'!02	DJ ,d
kV^_  !yyBD iiADO r%   r&   r   rm   rl   r   extra_pos_embr   controlnet_residuallatents	block_idxc                    U R                   b  U R                  U5      U	-   nUb  X-   nU R                  XU5      u  pU R                  XS9nXU-  -   nU R                  XU5      u  pU R	                  XUS9nXU-  -   nU R                  XU5      u  pU R                  U5      nXU-  -   nUb  U R                  b   eX-  nU R                  b  Ub   eU R                  U5      nX4$ U$ )N)r   )r   r   )r   r   r  r  r  r  r  r   )r!   r&   r   rm   rl   r   r  r   r	  r
  r  norm_hidden_statesr   attn_output	ff_outpuths_projs                   r#   r8   CosmosTransformerBlock.forward  s+    ' ,,];gEM$)9M $(::mPT#U jj!3jW%{(:: $(::mPT#U jj\j ! 
 &{(:: $(::mPT#U GG./	%y(88*??***0M??&&...oom4G ))r%   )	r   r  r  r   r  r   r   r  r  )      @   rms_normFFFF)NNNNNNN)r:   r;   r<   r=   r>   floatstrr@   r   rA   rB   r?   r8   rC   rD   rE   s   @r#   r   r   V  s    !!!! ?B ?B  ?B !	?B
 ?B ?B ?B ?B ?B ?B ?B 
?B ?BL %)04-1.237'+ $.||.  %||d2U5<<$;NPUP\P\_cPc;c5dd. !<<	.
 llT!.  ,,-. ||d*. t+. #\\D0. $. :. 
ellELL89	9. .r%   r   c                      ^  \ rS rSr    SS\S\\\\4   S\\\\4   S\S\\\\4   SS4U 4S	 jjjrSS
\R                  S\S-  S\\R                  \R                  4   4S jjr
SrU =r$ )CosmosRotaryPosEmbedi  r  max_sizer   base_fps
rope_scaler   Nc                   > [         TU ]  5         [        X#5       VVs/ s H	  u  pgXg-  PM     snnU l        X0l        X@l        US-  S-  U l        US-  S-  U l        XR                  -
  U R                  -
  U l        US   U R                  U R                  S-
  -  -  U l	        US   U R                  U R                  S-
  -  -  U l
        US   U R                  U R                  S-
  -  -  U l        g s  snnf )Nr)   r   r   r   )r   r   zipr  r   r  dim_hdim_wdim_th_ntk_factorw_ntk_factort_ntk_factor)	r!   r  r  r   r  r  r   patchr"   s	           r#   r   CosmosRotaryPosEmbed.__init__  s     	:=h:ST:S;4:ST$  A%)
 A%)
 ::-

:
&qMdjjDJJN.KL&qMdjjDJJN.KL&qMdjjDJJN.KL Us   C3r&   fpsc                    UR                   u  p4pVnXPR                  S   -  X`R                  S   -  XpR                  S   -  /nUR                  n	SU R                  -  n
SU R                  -  nSU R
                  -  n[        R                  " [        U R                  5      U	[        R                  S9n[        R                  " SU R                  SU	[        R                  S9S U R                  S-   U R                  -  n[        R                  " SU R                  SU	[        R                  S9S U R                  S-   U R                  -  n[        R                  " SU R                  SU	[        R                  S9S U R                  S-   U R                  -  nSX-  -  nSX-  -  nSUU-  -  n[        R                  " US US    U5      S S S 2S S S 24   R                  US   SUS   S5      n[        R                  " US US    U5      S S S S 2S S 24   R                  US   US   SS5      nUc  [        R                  " US US    U5      nO-[        R                  " US US    U-  U R                   -  U5      nUS S 2S S S S 24   R                  SUS   US   S5      n[        R"                  " UUU/S-  SS9R%                  SS5      R'                  5       n[        R(                  " U5      n[        R*                  " U5      nUU4$ )	Nr   r   r   g     @)r   dtype      ?rz   r{   )r,   r   r   r!  r"  r#  rA   arangemaxr  float32r  r  r   outerrepeatr  catr/   r  cossin)r!   r&   r&  r0   r1   r2   r3   r4   pe_sizer   h_thetaw_thetat_thetaseqdim_h_rangedim_w_rangedim_t_rangeh_spatial_freqsw_spatial_freqstemporal_freqsemb_hemb_wemb_tfreqsr0  r1  s                             r#   r8   CosmosRotaryPosEmbed.forward  s   >K>Q>Q;
*e!33Vq?Q5QSX\k\klm\nSno%%D---D---D---ll3t}}-fEMMRLLDJJ&NObRVR\R\`aRacfjfpfpp 	 LLDJJ&NObRVR\R\`aRacfjfpfpp 	 LLDJJ&NObRVR\R\`aRacfjfpfpp 	 !56!56 45C'!*-?aq@PQXXY`abYcefhopqhrtuvC'!*-?dAq@PQXXY`abYcelmneoqrtuv ;KKLgaj 1>BE KKLgaj 1C 7$-- GXEatQ&'..q'!*gaj!L		5%/!3<DDQJPPRiiiiCxr%   )	r  r  r   r  r!  r  r   r#  r"  )      rD  r   r   r      g       @r)  r)  rS   r:   r;   r<   r=   r>   r?   r  r   rA   rB   r8   rC   rD   rE   s   @r#   r  r    s     *9+41@MM S#&M #sC-(	M
 M %-.M 
M M,&U\\ &d
 &eTYT`T`bgbnbnTnNo & &r%   r  c                      ^  \ rS rSr SS\S\\\\4   S\\\\4   S\SS4
U 4S jjjrS	\R                  S\R                  4S
 jr
SrU =r$ )CosmosLearnablePositionalEmbedi	  r  r  r   rb   r   Nc                   > [         TU ]  5         [        X#5       VVs/ s H	  u  pVXV-  PM     snnU l        X0l        X@l        [        R                  " [        R                  " U R                  S   U5      5      U l
        [        R                  " [        R                  " U R                  S   U5      5      U l        [        R                  " [        R                  " U R                  S   U5      5      U l        g s  snnf )Nr   r   r   )r   r   r  r  r   rb   r   	ParameterrA   zeros	pos_emb_t	pos_emb_h	pos_emb_w)r!   r  r  r   rb   r   r$  r"   s          r#   r   'CosmosLearnablePositionalEmbed.__init__
  s     	:=h:ST:S;4:ST$ekk$--2BK&PQekk$--2BK&PQekk$--2BK&PQ Us   C;r&   c           	      &   UR                   u  p#pEnX@R                  S   -  XPR                  S   -  X`R                  S   -  /nU R                  S US    S S S 2S S S S 24   R                  USUS   US   S5      nU R                  S US    S S S S 2S S S 24   R                  X'S   SUS   S5      n	U R
                  S US    S S S S S 2S S 24   R                  X'S   US   SS5      n
X-   U
-   nUR                  SS5      n[        R                  R                  USS[        R                  S9n[        R                  " U R                  U[        R                  " UR                  5       UR                  5       -  5      S9nX-  R!                  U5      $ )	Nr   r   r   r   rz   T)r|   keepdimr(  )alpha)r,   r   rN  r.  rO  rP  r/   rA   linalgvector_normr,  addrb   npsqrtnumelrj   )r!   r&   r0   r1   r2   r3   r4   r2  r?  r=  r>  rT   rf   s                r#   r8   &CosmosLearnablePositionalEmbed.forward  s   >K>Q>Q;
*e!33Vq?Q5QSX\k\klm\nSno|,T1dD!-CDKKJXY[bcd[egnopgqstu|,T4D!-CDKKJ`aXbdegnopgqstu|,T4q!-CDKKJ`aXbdklmdnpqstume#kk!Q||''T'Wyy4rwwtzz|ciik7Q/RS
##M22r%   )rb   r  r   rO  rN  rP  )r`   rH  rE   s   @r#   rJ  rJ  	  s     RR S#&R #sC-(	R
 R 
R R"3U\\ 3ell 3 3r%   rJ  c            ,         ^  \ rS rSrSrSr/ SQrS/rS/r\	                    S)S\
S	\
S
\
S\
S\
S\S\
S\
S\\
\
\
4   S\\
\
\
4   S\\\\4   S\S\S-  S\S\
S\
S\
S-  S\
S-  S\
S\
SS4*U 4S jjj5       r      S*S\R"                  S\R"                  S \R"                  S!\\R"                     S-  S"\R"                  S-  S#\
S-  S$\R"                  S-  S%\R"                  S-  S&\S\\R"                     \-  4S' jjrS(rU =r$ )+CosmosTransformer3DModeli*  a	  
A Transformer model for video-like data used in [Cosmos](https://github.com/NVIDIA/Cosmos).

Args:
    in_channels (`int`, defaults to `16`):
        The number of channels in the input.
    out_channels (`int`, defaults to `16`):
        The number of channels in the output.
    num_attention_heads (`int`, defaults to `32`):
        The number of heads to use for multi-head attention.
    attention_head_dim (`int`, defaults to `128`):
        The number of channels in each attention head.
    num_layers (`int`, defaults to `28`):
        The number of layers of transformer blocks to use.
    mlp_ratio (`float`, defaults to `4.0`):
        The ratio of the hidden layer size to the input size in the feedforward network.
    text_embed_dim (`int`, defaults to `4096`):
        Input dimension of text embeddings from the text encoder.
    adaln_lora_dim (`int`, defaults to `256`):
        The hidden dimension of the Adaptive LayerNorm LoRA layer.
    max_size (`tuple[int, int, int]`, defaults to `(128, 240, 240)`):
        The maximum size of the input latent tensors in the temporal, height, and width dimensions.
    patch_size (`tuple[int, int, int]`, defaults to `(1, 2, 2)`):
        The patch size to use for patchifying the input latent tensors in the temporal, height, and width
        dimensions.
    rope_scale (`tuple[float, float, float]`, defaults to `(2.0, 1.0, 1.0)`):
        The scaling factor to use for RoPE in the temporal, height, and width dimensions.
    concat_padding_mask (`bool`, defaults to `True`):
        Whether to concatenate the padding mask to the input latent tensors.
    extra_pos_embed_type (`str`, *optional*, defaults to `learnable`):
        The type of extra positional embeddings to use. Can be one of `None` or `learnable`.
    controlnet_block_every_n (`int`, *optional*):
        Interval between transformer blocks that should receive control residuals (for example, `7` to inject after
        every seventh block). Required for Cosmos Transfer2.5.
    img_context_dim_in (`int`, *optional*):
        The dimension of the input image context feature vector, i.e. it is the D in [B, N, D].
    img_context_num_tokens (`int`):
        The number of tokens in the image context feature vector, i.e. it is the N in [B, N, D]. If
        `img_context_dim_in` is not provided, then this parameter is ignored.
    img_context_dim_out (`int`):
        The output dimension of the image context projection layer. If `img_context_dim_in` is not provided, then
        this parameter is ignored.
T)patch_embedfinal_layerrf   r   learnable_pos_embedNr   r   r   r   
num_layersr   text_embed_dimr   r  r   r  concat_padding_maskextra_pos_embed_typeuse_crossattn_projectioncrossattn_proj_in_channelsencoder_hidden_states_channelscontrolnet_block_every_nimg_context_dim_inimg_context_num_tokensimg_context_dim_outr   c                 2  > [         TU ]  5         X4-  nU(       a  US-   OUn[        UUU
SS9U l        [	        XIXS9U l        S U l        US:X  a  [        UU	U
S9U l        [        UU5      U l	        [        R                  " [        U5       Vs/ s HI  n[        UUUUUSSU R                  R                  S L=(       a    U R                  R                  S:  S	9PMK     sn5      U l        [#        UU5      U l        [        R&                  " UU
S   U
S   -  U
S
   -  U-  SS9U l        U R                  R*                  (       aC  [        R,                  " [        R&                  " UUSS9[        R.                  " 5       5      U l        SU l        U R                  R                  (       al  [        R,                  " [        R&                  " U R                  R                  U R                  R4                  SS9[        R.                  " 5       5      U l        g g s  snf )Nr   Fr   )r  r  r   r  	learnable)r  r  r   r  r   )r   r   r   r   r   r   r   r   r   T)r   r   r   r^  r  roper`  rJ  rX   
time_embedr   
ModuleListranger   configri  transformer_blocksrq   norm_outr   proj_outre  
SequentialGELUcrossattn_projgradient_checkpointingrk  img_context_proj)r!   r   r   r   r   ra  r   rb  r   r  r   r  rc  rd  re  rf  rg  rh  ri  rj  rk  r  patch_embed_in_channels_r"   s                           r#   r   !CosmosTransformer3DModel.__init__\  s   0 	)> 6I+/k+,C[R\chi )**
	 $( ;.'E'!%(D$ *+{C #%-- z* +A '(;'9(6'#1&" $ > >d J qt{{OmOmpqOq	 +#
" +;G		AA6AFU\a
 ;;//"$--		46T[_`	#D
 ',#;;))$&MM		$++88$++:Y:Y`de	%D! *;s   AHr&   rh   r   block_controlnet_hidden_statesr   r&  condition_maskpadding_maskreturn_dictc
                   ^^ ^!^" UR                   u  mppUb  [        R                  " X/SS9nU R                  R                  (       a  [
        R                  R                  U[        UR                   SS  5      [
        R                  R                  S9n[        R                  " XR                  S5      R                  TSUSS5      /SS9nUb   UR                  S5      R                  S5      nU R                  XS9nU R                  R                  (       a  U R                  U5      OS nU R                  R                   u  nnnUU-  m!UU-  m UU-  m"U R#                  U5      nUR%                  SS5      nUR&                  S:X  a  U R)                  X5      u  nnOUR&                  S:X  ab  UR                   TSUSS4:X  d   S	UR                    35       eUR%                  5       nU R)                  X5      u  nnUU U!U"4S
 jUU4 5       u  nnO[+        SUR                    35      e[-        U[.        5      (       a  UOUS 4u  nnU R                  R0                  (       a  U R3                  U5      nUb,  U R                  R4                  (       a  U R7                  U5      n[-        U[.        5      (       a  UU4OUn0 nUba  [9        U R:                  5      n[        [=        [?        SUU R                  R@                  5      5      5       VVs0 s H  u  nnUUU   _M     nnn[=        U R:                  5       Hl  u  nnURC                  U5      n[        RD                  " 5       (       a,  U RF                  (       a  U RI                  UUUUUUUUU5	      nM]  U" UUUUUUUU5      nMn     U RK                  UUU5      nU RM                  U5      nURO                  SUUUS45      nURO                  ST!T T"45      nURQ                  SSSSSSSS5      nUR%                  SS5      R%                  SS5      R%                  SS5      nU	(       d  U4$ [S        US9$ s  snnf )Nr   r{   r   )interpolationr   )r&  r   r*   z9Expected timestep to have shape [B, 1, T, 1, 1], but got c              3      >#    U  H=  nUR                  TTS S S5      R                  SSTTS5      R                  S S5      v   M?     g7f)r   rz   r   N)r   expandr/   )r   r   r0   post_patch_heightpost_patch_num_framespost_patch_widths     r#   r   3CosmosTransformer3DModel.forward.<locals>.<genexpr>  sR      ' 3A z#8!QCB 13CRHA 3s   AAz@Expected timestep to have shape [B, 1, T, 1, 1] or [T], but got r   rz   r+   r)   r(   )sample)*r,   rA   r/  rr  rc  r   r   resizelistInterpolationModeNEARESTr   r.  rn  rd  r`  r   r^  r/   r   ro  r   r   r?   re  rx  ri  rz  lenrs  	enumeraterq  rh  getis_grad_enabledry  _gradient_checkpointing_funcrt  ru  r   r.   r   )#r!   r&   rh   r   r~  r   r&  r  r  r  r1   r2   r3   r4   padding_mask_resizedr   r  r5   r6   r7   rl   rm   r   r   processed_encoder_hidden_statescontrolnet_block_index_mapn_blocksidxr  blockr	  r0   r  r  r  s#                                  @@@@r#   r8    CosmosTransformer3DModel.forward  sq    ?L>Q>Q;
Lf %!II}&E1MM;;**#-#8#8#?#?d=#6#6rs#;<JLhLhLpLp $@ $  "II > >q A H HUVXbdegh ijpqM %+55a8BB1EN  99]9<CG;;CcCc00?im ..S# *c 1"cM C<((7%--a3 ==A&*oom&N#D#]]a>>j!ZA%FF KHNNK[\F  '')H&*oom&N#D#'  12	'#D# _`h`n`n_opqq &00Eu%M%M!TikoSp 	"k ;;//..|<L"t{{'E'E//<K ,66KU+S+S\;'Ye 	(
 &(")54223H '+9U1hHlHl5m+n&o*&oNC 9#>>&o ' * !*$*A*A BIu"<"@"@"K$$&&4+F+F $ A A!3%$!"'
! !&!3%$!"'	! !C6 m5FMm4%//Cc23FG%//4IK\^n3op &--aAq!Q1E%--a3;;AqAII!QO!##'}==[*s   Q)
rx  ry  rz  r`  rt  r^  ru  rn  ro  rs  )   r      rC     r     r  rB  rE  rG  Trm  Fr  r  NNr  i   )NNNNNT)r:   r;   r<   r=   __doc__ _supports_gradient_checkpointing _skip_layerwise_casting_patterns_no_split_modules_keep_in_fp32_modulesr   r>   r  r?   r@   r  r   rA   rB   r  r   r8   rC   rD   rE   s   @r#   r]  r]  *  se   *X (,$'M$1223 #%"%"!)8+41@$(+6).*..2/3)-&)#'+QQ Q !	Q
  Q Q Q Q Q S#&Q #sC-(Q %-.Q "Q "DjQ #'Q  %(!Q" ),#Q$ #&*%Q&  $J'Q( !$)Q* !+Q, 
-Q Qp EI.2.2,0 |>|||> ,,|>  %||	|>
 )-U\\(:T(A|> t+|> 4Z|> t+|> llT)|> |> 
u||	7	7|> |>r%   r]  ))numpyrX  rA   torch.nnr   configuration_utilsr   r   loadersr   utilsr   	attentionr	   attention_dispatchr
   attention_processorr   r   r   modeling_outputsr   modeling_utilsr   normalizationr   torchvisionr   Moduler   rG   rX   rq   r   r   r   r   r   r  rJ  r]  r   r%   r#   <module>r     s       B - - # 6 + " 7 ' # &ryy (bii 'bii ' <"#RYY "#J= =@` `F
i 
8pRYY pf=299 =@3RYY 3BB>z;8N B>r%   