
    
3jd`              
       <   S SK r S SKrS SKJr  S SKrS SKJr  S SKJs  Jr	  S SKJ
r
  SSKJrJr  SSKJrJr  SSKJr  SS	KJrJr  SS
KJrJr  SSKJr  SSKJr  SSKJr  \R>                  " \ 5      r!S2S jr"S3S jr#S3S jr$S4S jr%S4S jr& S5S\
S\
S\
S\'4S jjr( " S S\RR                  5      r* " S S\RR                  5      r+ " S S\RR                  5      r, " S S\RR                  5      r- " S  S!\RR                  5      r. " S" S#\RR                  5      r/ " S$ S%5      r0 " S& S'\RR                  \5      r1 " S( S)\RR                  5      r2 " S* S+\RR                  5      r3 " S, S-\RR                  5      r4 " S. S/\RR                  5      r5 " S0 S1\\\\\\5      r6g)6    N)Any)Tensor   )ConfigMixinregister_to_config)FromOriginalModelMixinPeftAdapterMixin)logging   )AttentionMixinAttentionModuleMixin)_CAN_USE_FLEX_ATTNdispatch_attention_fn)
CacheMixin)Transformer2DModelOutput)
ModelMixinc           	          [         R                  " [        R                  " U5      * [         R                  " SU [         R
                  S9-  U -  5      nU$ )Nr   )startenddtype)torchexpmathlogarangefloat32)dim
max_periodfreqss      m/home/wildlama/miniconda3/lib/python3.13/site-packages/diffusers/models/transformers/transformer_kandinsky.py	get_freqsr!   '   s@    IItxx
++ellSXS`S`.aadgghEL    c                     U(       aC  Sn[        XSXD4SS9n [        XSXD4SS9nU R                  SS5      n UR                  SS5      nX4$ U R                  SS5      n UR                  SS5      nX4$ )N      r   r   r   )local_patchingflatten)xropeshape
block_mask
pixel_sizes        r    fractal_flattenr.   ,   s    
1a%@aHdAz+FANIIaO||Aq! 7N IIaO||Aq!7Nr"   c                     U(       aC  SnU R                   " U R                  S   SUS-  /U R                  SS  Q76 n [        XSX34SS9n U $ U R                   " / UQU R                  SS  Q76 n U $ )Nr$   r   r   r%   r&   )reshaper+   local_merge)r)   r+   r,   r-   s       r    fractal_unflattenr3   9   sx    
IIaggaj"j!mBaggabkB1j"=1E H II+u+qwwqr{+Hr"   c           
         Uu  pEpgUu  pn
U R                   " / U R                  S U QXX-  PUPXi-  PU	PXz-  PU
PU R                  US-   S  Q76 n U R                  " / [        [	        U R                  S U 5      5      QUPUS-   PUS-   PUS-   PUS-   PUS-   P[        US-   [	        U R                  5      5      Q76 n U R                  X3S-   5      R                  US-   US-   5      n U $ )Nr   r      r%         r1   r+   permuterangelenr(   r)   r+   
group_sizer   
batch_sizedurationheightwidthg1g2g3s              r    r'   r'   C   sS   */'J&JBB			 			
#				 			 				
 			 			 			 
q			A 	
		 			s1774C=!	"				 	a		 	a			
 	a		 	a		 	a		 
sQwAGG	%		A 	
		#Qw''aq9AHr"   c           
         Uu  pEpgUu  pn
U R                   " / U R                  S U QXX-  PXi-  PXz-  PUPU	PU
PU R                  US-   S  Q76 n U R                  " / [        [	        U R                  S U 5      5      QUPUS-   PUS-   PUS-   PUS-   PUS-   P[        US-   [	        U R                  5      5      Q76 n U R                  X3S-   5      R                  US-   US-   5      R                  US-   US-   5      n U $ )Nr   r   r%   r5   r6   r7   r8   r<   s              r    r2   r2   ^   si   */'J&JBB			 			
#				 			 				
 			 			 			 
q			A 	
		 			s1774C=!	"				 	a		 	a			
 	a		 	a		 	a		 
sQwAGG	%		A 	
		#Qw''aq9AA#'3QR7SAHr"   qkstathrc           	         [         (       a  SSKJn  O[        S5      eU R	                  SS5      R                  5       n UR	                  SS5      R                  5       nU R                  u  pVpxUS-  n	U R                  XVU	SU5      R                  S5      n
UR                  XVU	SU5      R                  S5      R	                  SS5      nX-  n[        R                  " U[        R                  " U5      -  SS	9nUR                  S5      u  pUR                  S5      nUSU-
  :  R                  5       nUR!                  SUR#                  S5      5      n[        R$                  " UU5      nUR'                  S5      R)                  [        R*                  5      nUR#                  SS
S9R)                  [        R*                  5      nUR-                  [        R.                  " U5      UUUSS S9$ )Nr   )	BlockMaskz=Nabla attention is not supported with this version of PyTorchr%   r   @   r0   r&   T)r   
descending)
BLOCK_SIZEmask_mod)r   !torch.nn.attention.flex_attentionrK   
ValueError	transpose
contiguousr+   r1   meanr   softmaxr   sqrtsortcumsum_intgatherargsort
logical_orsumtoint32from_kv_blocks
zeros_like)rF   rG   rH   rI   rK   BhSDs1qakamapvalsindscvalsmaskkv_nbkv_indss                      r    	nablaT_v2rq   y   s    ?XYY	Aq$$&A	Aq$$&A JA!	
bB	
1R	#	(	(	,B	
1R	#	(	(	,	6	6r2	>B
'C
--diil*
3C"JDLLEQW!!#D;;r4<<+,DD#&D HHRLOOEKK(Ellrdl366u{{CG##E$4$4U$;WeWacnr#ssr"   c                   2   ^  \ rS rSrSU 4S jjrS rSrU =r$ )Kandinsky5TimeEmbeddings   c                 <  > [         TU ]  5         US-  S:X  d   eXl        X0l        [	        U R                  S-  U R                  5      U l        [        R                  " XSS9U l        [        R                  " 5       U l
        [        R                  " X"SS9U l        g )Nr   r   Tbias)super__init__	model_dimr   r!   r   nnLinearin_layerSiLU
activation	out_layer)selfrz   time_dimr   	__class__s       r    ry   !Kandinsky5TimeEmbeddings.__init__   sv    1}!!!"$t~~2DOOD
		)DA'')8DAr"   c                    [         R                  " UR                  [         R                  5      U R                  R                  UR
                  S95      n[         R                  " [         R                  " U5      [         R                  " U5      /SS9nU R                  U R                  U R                  U5      5      5      nU$ )N)devicer0   r&   )r   outerr_   r   r   r   catcossinr   r   r}   )r   timeargs
time_embeds       r    forward Kandinsky5TimeEmbeddings.forward   s}    {{4775==14::===3TUYY		$4ArJ
^^DOODMM*4M$NO
r"   )r   r   r}   r   rz   r        @__name__
__module____qualname____firstlineno__ry   r   __static_attributes____classcell__r   s   @r    rs   rs      s    B r"   rs   c                   .   ^  \ rS rSrU 4S jrS rSrU =r$ )Kandinsky5TextEmbeddings   c                    > [         TU ]  5         [        R                  " XSS9U l        [        R
                  " USS9U l        g )NTrv   elementwise_affine)rx   ry   r{   r|   r}   	LayerNormnorm)r   text_dimrz   r   s      r    ry   !Kandinsky5TextEmbeddings.__init__   s2    		(DALLtD	r"   c                 d    U R                  U5      nU R                  U5      R                  U5      $ N)r}   r   type_as)r   
text_embeds     r    r    Kandinsky5TextEmbeddings.forward   s*    ]]:.
yy$,,Z88r"   )r}   r   r   r   s   @r    r   r      s    E
9 9r"   r   c                   .   ^  \ rS rSrU 4S jrS rSrU =r$ )Kandinsky5VisualEmbeddings   c                    > [         TU ]  5         X0l        [        R                  " [
        R                  " U5      U-  U5      U l        g r   )rx   ry   
patch_sizer{   r|   r   prodr}   )r   
visual_dimrz   r   r   s       r    ry   #Kandinsky5VisualEmbeddings.__init__   s3    $		$))J"7*"DiPr"   c           
      h   UR                   u  p#pEnUR                  UX0R                  S   -  U R                  S   X@R                  S   -  U R                  S   XPR                  S   -  U R                  S   U5      R                  SSSSSSSS5      R	                  SS5      nU R                  U5      $ )	Nr   r%   r   r   r6   r5   r7      )r+   viewr   r9   r(   r}   )r   r)   r>   r?   r@   rA   r   s          r    r   "Kandinsky5VisualEmbeddings.forward   s    34770
fSFFOOA.."//!,,"++"	 WQ1aAq!,WQ] 	
 }}Qr"   )r}   r   r   r   s   @r    r   r      s    Q
   r"   r   c                   2   ^  \ rS rSrSU 4S jjrS rSrU =r$ )Kandinsky5RoPE1D   c                    > [         TU ]  5         X0l        Xl        X l        [        US-  U5      n[        R                  " X$R                  S9nU R                  S[        R                  " XT5      SS9  g )Nr   r   r   F
persistent)rx   ry   r   r   max_posr!   r   r   r   register_bufferr   )r   r   r   r   freqposr   s         r    ry   Kandinsky5RoPE1D.__init__   s]    $:.ll7**5VU[[%;Nr"   c                    U R                   U   n[        R                  " U5      n[        R                  " U5      n[        R                  " X4* XC/SS9nUR
                  " / UR                  S S QSPSP76 nUR                  S5      $ )Nr0   r&   r   )r   r   r   r   stackr   r+   	unsqueeze)r   r   r   cosinesiner*   s         r    r   Kandinsky5RoPE1D.forward   st    yy~4yy{{FE48bAyy0$**Sb/010a0~~b!!r"   )r   r   r   )i   r   r   r   s   @r    r   r      s    O" "r"   r   c                   6   ^  \ rS rSrSU 4S jjrSS jrSrU =r$ )Kandinsky5RoPE3D   c                 0  > [         T	U ]  5         Xl        X l        X0l        [        [        X5      5       H]  u  nu  pV[        US-  U5      n[        R                  " XgR                  S9nU R                  SU 3[        R                  " X5      SS9  M_     g )Nr   r   args_Fr   )rx   ry   	axes_dimsr   r   	enumeratezipr!   r   r   r   r   r   )
r   r   r   r   iaxes_dim
ax_max_posr   r   r   s
            r    ry   Kandinsky5RoPE3D.__init__   s    "$)23y3J)K%A%X]J7D,,z<C  5ekk#.DQV W *Lr"   c                    Uu  pEpgU R                   US      US   -  nU R                  US      US   -  n	U R                  US      US   -  n
[        R                  " UR                  SUSSS5      R                  USXgS5      U	R                  SSUSS5      R                  XESUS5      U
R                  SSSUS5      R                  XEUSS5      /SS9n[        R                  " U5      n[        R                  " U5      n[        R                  " X* X/SS9nUR
                  " / UR                  S S QSPSP76 nUR                  S5      $ )Nr   r%   r   r0   r&   r   )args_0args_1args_2r   r   r   repeatr   r   r   r+   r   )r   r+   r   scale_factorr>   r?   r@   rA   args_targs_hargs_wr   r   r   r*   s                  r    r   Kandinsky5RoPE3D.forward   sP   .3+
fSV$|A6SV$|A6SV$|A6yyAxAr299*aXYZAq&!R077
aQVXYZAq!UB/66zVUVXYZ
 
 4yy{{FE48bAyy0$**Sb/010a0~~b!!r"   )r   r   r   ))   r   r   r   )      ?r   r   r   r   s   @r    r   r      s    	X" "r"   r   c                   .   ^  \ rS rSrU 4S jrS rSrU =r$ )Kandinsky5Modulationi  c                 J  > [         TU ]  5         [        R                  " 5       U l        [        R
                  " XU-  5      U l        U R                  R                  R                  R                  5         U R                  R                  R                  R                  5         g r   )rx   ry   r{   r~   r   r|   r   weightdatazero_rw   )r   r   rz   
num_paramsr   s       r    ry   Kandinsky5Modulation.__init__  sh    '')8)-CD""((*  &&(r"   c                 B    U R                  U R                  U5      5      $ r   )r   r   r   r)   s     r    r   Kandinsky5Modulation.forward  s    ~~dooa011r"   )r   r   r   r   s   @r    r   r     s    )2 2r"   r   c                   ,    \ rS rSrSrSrS rSS jrSrg)Kandinsky5AttnProcessori  Nc                 r    [        [        S5      (       d"  [        U R                  R                   S35      eg )Nscaled_dot_product_attentionz; requires PyTorch 2.0. Please upgrade your pytorch version.)hasattrFImportErrorr   r   )r   s    r    ry    Kandinsky5AttnProcessor.__init__  s3    q899!8!8 99tuvv :r"   c           	      j   UR                  U5      nUb  UR                  U5      nUR                  U5      nUR                  S S UR                  S S pUR                  " / U	QUR
                  PSP76 nUR                  " / U
QUR
                  PSP76 nUR                  " / U
QUR
                  PSP76 nOUR                  U5      nUR                  U5      nUR                  S S n	UR                  " / U	QUR
                  PSP76 nUR                  " / U	QUR
                  PSP76 nUR                  " / U	QUR
                  PSP76 nUR                  UR                  5       5      R                  U5      nUR                  UR                  5       5      R                  U5      nS nUb.  U" Xd5      R                  U5      nU" Xt5      R                  U5      nUb  [        UUUS   US   S9nOS n[        UUUUU R                  U R                  S9nUR                  SS5      nUR                  U5      nU$ )Nr0   c                    U R                   " / U R                  S S QSPSPSP76 R                  [        R                  5      nX-  R                  SS9nUR                   " U R                  6 R                  [        R                  5      $ )Nr0   r%   r   r&   )r1   r+   r_   r   r   r^   bfloat16)r)   r*   x_x_outs       r    apply_rotary6Kandinsky5AttnProcessor.__call__.<locals>.apply_rotary5  so    3AGGCRL3"3a3366u}}EBYOOO+E==!''*--enn==r"   sta_maskP)rI   )	attn_maskbackendparallel_configrM   )to_queryto_keyto_valuer+   r1   	num_heads
query_normfloatr   key_normrq   r   _attention_backend_parallel_configr(   r   )r   attnhidden_statesencoder_hidden_states
rotary_embsparse_paramsquerykeyvaluer+   
cond_shaper   r   attn_outs                 r    __call__ Kandinsky5AttnProcessor.__call__  s+   m, ,++34CMM"78E %CR 0#))CR.:MM=5=$..="=E++>z>4>>>2>CMMB:Bt~~BrBE ++m,CMM-0EKK$EMM=5=$..="=E++9u9dnn9b9CMM=5=$..="=E .66u=mmCIIK(005	>
 ! 3;;EBEs/77<C$!j)!#&	I I-++ 11
 &--b"5>>-0r"    NNN)	r   r   r   r   r  r  ry   r  r   r  r"   r    r   r     s    w:r"   r   c                      ^  \ rS rSr\r\/rSU 4S jjr   SS\R                  S\R                  S-  S\R                  S-  S\
\R                  \R                  4   S-  S\R                  4
S	 jjrS
rU =r$ )Kandinsky5AttentioniX  Nc                   > [         TU ]  5         X-  S:X  d   eX-  U l        [        R                  " XSS9U l        [        R                  " XSS9U l        [        R                  " XSS9U l        [        R                  " U5      U l	        [        R                  " U5      U l
        [        R                  " XSS9U l        Uc  U R                  5       nU R                  U5        g )Nr   Trv   )rx   ry   r  r{   r|   r   r  r  RMSNormr  r  r   _default_processor_clsset_processor)r   num_channelshead_dim	processorr   s       r    ry   Kandinsky5Attention.__init__^  s    &!+++%1		,4HiiF		,4H**X.

8,<DI335I9%r"   r
  r  r  r  returnc                 0   [        [        R                  " U R                  R                  5      R
                  R                  5       5      n0 nUR                  5        VV	s/ s H  u  pX;  d  M  X;  d  M  UPM     n
nn	[        U
5      S:  a:  [        R                  SU
 SU R                  R                  R                   S35        UR                  5        VVs0 s H  u  pX;   d  M  X_M     nnnU R                  " U U4UUUS.UD6$ s  sn	nf s  snnf )Nr   zattention_processor_kwargs z are not expected by z and will be ignored.)r  r  r  )setinspect	signaturer  r  
parameterskeysitemsr;   loggerwarningr   r   )r   r
  r  r  r  kwargsattn_parametersquiet_attn_parametersrG   _unused_kwargsws               r    r   Kandinsky5Attention.forwardn  s    g//0G0GHSSXXZ[ "'-||~u~tq9QVWVt~u}!NN-m_<QRVR`R`RjRjRsRsQt  uJ  K $*<<>J>41Q5I$!$>J~~
 #8'!
 
 	
 v
 Ks   !D0D7DD+D)r  r  r   r  r  r   r  r   r  )r   r   r   r   r   r  _available_processorsry   r   r   tupler   r   r   r   s   @r    r  r  X  s    4&& 6:-1?C
||
  %||d2
 ||d*	

 %,,45<
 

 
r"   r  c                   .   ^  \ rS rSrU 4S jrS rSrU =r$ )Kandinsky5FeedForwardi  c                    > [         TU ]  5         [        R                  " XSS9U l        [        R
                  " 5       U l        [        R                  " X!SS9U l        g )NFrv   )rx   ry   r{   r|   r}   GELUr   r   )r   r   ff_dimr   s      r    ry   Kandinsky5FeedForward.__init__  s?    		#E:'')6U;r"   c                 `    U R                  U R                  U R                  U5      5      5      $ r   )r   r   r}   r   s     r    r   Kandinsky5FeedForward.forward  s#    ~~doodmmA.>?@@r"   )r   r}   r   r   r   s   @r    r5  r5    s    <A Ar"   r5  c                   .   ^  \ rS rSrU 4S jrS rSrU =r$ )Kandinsky5OutLayeri  c                    > [         TU ]  5         X@l        [        X!S5      U l        [
        R                  " USS9U l        [
        R                  " U[        R                  " U5      U-  SS9U l        g )Nr   Fr   Trv   )rx   ry   r   r   
modulationr{   r   r   r|   r   r   r   )r   rz   r   r   r   r   s        r    ry   Kandinsky5OutLayer.__init__  sV    $.xAFLLuE	9dii
.Cj.PW[\r"   c                    [         R                  " U R                  U5      R                  SS9SSS9u  pEU R	                  UR                  5       5      UR                  5       S S 2S S 4   S-   -  UR                  5       S S 2S S 4   -   R                  U5      nU R                  U5      nUR                  u  pxpnUR                  UUU	U
SU R                  S   U R                  S   U R                  S   5      R                  SSSSSS	S
S5      R                  SS5      R                  SS	5      R                  S	S5      nU$ )Nr%   r&   r   r0   r   r   r6   r7   r   r   r5   )r   chunkr?  r   r   r  r   r   r+   r   r   r9   r(   )r   visual_embedr   r   shiftscaler)   r>   r?   r@   rA   r.  s               r    r   Kandinsky5OutLayer.forward  s4   {{4??:#>#H#HQ#H#OQRXZ[ IIl((*+u{{}Qd]/Kc/QRUZU`U`UbcdfjlpcpUqq
',
 	 NN<(12.
fQFF"""	 WQ1aAq!,WQ]WQ]WQ] 	
  r"   )r?  r   r   r   r   r   s   @r    r=  r=    s    ] r"   r=  c                   .   ^  \ rS rSrU 4S jrS rSrU =r$ )!Kandinsky5TransformerEncoderBlocki  c                    > [         TU ]  5         [        X!S5      U l        [        R
                  " USS9U l        [        X[        5       S9U l	        [        R
                  " USS9U l
        [        X5      U l        g )Nr7   Fr   r  )rx   ry   r   text_modulationr{   r   self_attention_normr  r   self_attentionfeed_forward_normr5  feed_forwardr   rz   r   r8  r  r   s        r    ry   *Kandinsky5TransformerEncoderBlock.__init__  sc    3HK#%<<	e#T 1)QhQjk!#iE!R1)Dr"   c                 b   [         R                  " U R                  U5      R                  SS9SSS9u  pE[         R                  " USSS9u  pgnU R	                  UR                  5       5      UR                  5       S-   -  UR                  5       -   R                  U5      n	U R                  XS9n	UR                  5       UR                  5       U	R                  5       -  -   R                  U5      n[         R                  " USSS9u  pgnU R                  UR                  5       5      UR                  5       S-   -  UR                  5       -   R                  U5      n	U R                  U	5      n	UR                  5       UR                  5       U	R                  5       -  -   R                  U5      nU$ )Nr%   r&   r   r0   r   r   )r  )
r   rB  rK  r   rL  r  r   rM  rN  rO  )
r   r)   r   r*   self_attn_params	ff_paramsrD  rE  gateouts
             r    r   )Kandinsky5TransformerEncoderBlock.forward  s]   &+kk$2F2Fz2R2\2\ab2\2cefln&o#"[[)91"Ed''	2ekkmc6IJU[[]Zccdef!!#!7WWY		33<<Q?"[[A2>d%%aggi0EKKMC4GH5;;=Xaabcd$WWY		33<<Q?r"   )rO  rN  rM  rL  rK  r   r   s   @r    rH  rH    s    E r"   rH  c                   .   ^  \ rS rSrU 4S jrS rSrU =r$ )!Kandinsky5TransformerDecoderBlocki  c                 \  > [         TU ]  5         [        X!S5      U l        [        R
                  " USS9U l        [        X[        5       S9U l	        [        R
                  " USS9U l
        [        X[        5       S9U l        [        R
                  " USS9U l        [        X5      U l        g )N	   Fr   rJ  )rx   ry   r   visual_modulationr{   r   rL  r  r   rM  cross_attention_normcross_attentionrN  r5  rO  rP  s        r    ry   *Kandinsky5TransformerDecoderBlock.__init__  s    !5h1!M#%<<	e#T 1)QhQjk$&LLu$U!29RiRkl!#iE!R1)Dr"   c                    [         R                  " U R                  U5      R                  SS9SSS9u  pgn[         R                  " USSS9u  pnU R	                  UR                  5       5      U
R                  5       S-   -  U	R                  5       -   R                  U5      nU R                  XUS9nUR                  5       UR                  5       UR                  5       -  -   R                  U5      n[         R                  " USSS9u  pnU R                  UR                  5       5      U
R                  5       S-   -  U	R                  5       -   R                  U5      nU R                  XS9nUR                  5       UR                  5       UR                  5       -  -   R                  U5      n[         R                  " USSS9u  pnU R                  UR                  5       5      U
R                  5       S-   -  U	R                  5       -   R                  U5      nU R                  U5      nUR                  5       UR                  5       UR                  5       -  -   R                  U5      nU$ )Nr%   r&   r   r0   r   )r  r  )r  )r   rB  r\  r   rL  r  r   rM  r]  r^  rN  rO  )r   rC  r   r   r*   r  rS  cross_attn_paramsrT  rD  rE  rU  
visual_outs                r    r   )Kandinsky5TransformerDecoderBlock.forward  s2   9>"":.88Q8?:
6Y #[[)91"Ed..|/A/A/CDX[H[\_d_j_j_lluu

 ((Ta(b
$**,tzz|j>N>N>P/PPYYZfg"[[):A2Fd//0B0B0DEY\I\]`e`k`k`mmvv

 ))*)W
$**,tzz|j>N>N>P/PPYYZfg"[[A2>d,,\-?-?-ABekkmVYFYZ]b]h]h]jjss

 &&z2
$**,tzz|j>N>N>P/PPYYZfgr"   )r^  r]  rO  rN  rM  rL  r\  r   r   s   @r    rY  rY    s    E r"   rY  c                     ^  \ rS rSrSrSS/r/ SQrSr\                       S S\	S	\
S
\
S\
S\S\S\S\S\S\
S\	4U 4S jjj5       r   S!S\R                  S\R                  S\R                  S\R                  S\\\\4   S\R"                  S\\\\4   S\\	\4   S-  S\
S\\R*                  -  4S jjrSrU =r$ )"Kandinsky5Transformer3DModeli  z7
A 3D Diffusion Transformer model for video-like data.
rH  rY  )time_embeddingsr?  r\  rK  TNattention_typeattention_causalattention_localattention_globattention_windowattention_Pattention_wTattention_wWattention_wHattention_add_staattention_methodc                   > [         TU ]  5         [        U5      nXl        Xpl        X`l        Xl        Xl        U(       a  SU-  S-   OUn[        Xt5      U l	        [        X'5      U l        [        X45      U l        [        UXv5      U l        [        U5      U l        [#        U5      U l        [&        R(                  " [+        U	5       Vs/ s H  n[-        XtUU5      PM     sn5      U l        [&        R(                  " [+        U
5       Vs/ s H  n[1        XtUU5      PM     sn5      U l        [5        XtXV5      U l        SU l        g s  snf s  snf )Nr   r%   F)rx   ry   r^   in_visual_dimrz   r   visual_condrg  rs   rf  r   text_embeddingspooled_text_embeddingsr   visual_embeddingsr   text_rope_embeddingsr   visual_rope_embeddingsr{   
ModuleListr:   rH  text_transformer_blocksrY  visual_transformer_blocksr=  r   gradient_checkpointing)r   rs  in_text_dimin_text_dim2r   out_visual_dimr   rz   r8  num_text_blocksnum_visual_blocksr   rt  rg  rh  ri  rj  rk  rl  rm  rn  ro  rp  rq  r  visual_embed_dimr.  r   s                              r    ry   %Kandinsky5Transformer3DModel.__init__  s8   6 	y>*"$&,4?1},q0]  8	L7O&>|&V#!;<Li!d %5X$>!&6y&A# (*}}_det_uv_uZ[.yFHU_uv(
$ *, 011A 2)vxX1*
& ,I\&+# ws   EE	r
  r  timesteppooled_projectionsvisual_rope_postext_rope_posr   r  return_dictr!  c
           	      *   Un
UnUnUnU R                  U5      nU R                  U5      nXR                  U5      -   nU R                  U
5      nU R	                  U5      nUR                  SS9nU R                   HL  n[        R                  " 5       (       a&  U R                  (       a  U R                  UXU5      nMC  U" XU5      nMN     UR                  SS nU R                  UXW5      nUb  US   OSn[        UUUUS9u  nnU R                   HQ  n[        R                  " 5       (       a)  U R                  (       a  U R                  UUUUUU5      nMF  U" XUUU5      nMS     [        UUUS9nU R!                  XU5      n
U	(       d  U
$ [#        U
S9$ )	a   
Forward pass of the Kandinsky5 3D Transformer.

Args:
    hidden_states (`torch.FloatTensor`): Input visual states
    encoder_hidden_states (`torch.FloatTensor`): Text embeddings
    timestep (`torch.Tensor` or `float` or `int`): Current timestep
    pooled_projections (`torch.FloatTensor`): Pooled text embeddings
    visual_rope_pos (`tuple[int, int, int]`): Position for visual RoPE
    text_rope_pos (`torch.LongTensor`): Position for text RoPE
    scale_factor (`tuple[float, float, float]`, optional): Scale factor for RoPE
    sparse_params (`dict[str, Any]`, optional): Parameters for sparse attention
    return_dict (`bool`, optional): Whether to return a dictionary

Returns:
    [`~models.transformer_2d.Transformer2DModelOutput`] or `torch.FloatTensor`: The output of the transformer
r   r&   Nr0   
to_fractalF)r,   )sample)ru  rf  rv  rw  rx  r   r{  r   is_grad_enabledr}  _gradient_checkpointing_funcr+   ry  r.   r|  r3   r   r   )r   r
  r  r  r  r  r  r   r  r  r)   r   r   pooled_text_embedr   rC  	text_ropetext_transformer_blockvisual_shapevisual_roper  visual_transformer_blocks                         r    r   $Kandinsky5Transformer3DModel.forwardQ  s   : *
.))*5
))$/
"="=>O"PP
--a0--m<	''A'.	&*&B&B"$$&&4+F+F!>>*JI
 4JIV
 'C $))#2.11,^4A4M]<0SX
$3L+|hr$s!k(,(F(F$$$&&4+F+F#@@, !   8 j+}  )G )|PZ[NN<Z@H'q11r"   )rg  r}  rs  rz   r   r   rv  ru  rx  r{  rf  rt  rw  ry  r|  )r5   i   i   i   r5   )r%   r   r   i   i   r       )      r  FregularNNNNNNNNNN)r   NT)r   r   r   r   __doc___repeated_blocks_keep_in_fp32_modules _supports_gradient_checkpointingr   strboolrZ   r  ry   r   r   r3  
LongTensordictr   r   FloatTensorr   r   r   r   s   @r    re  re    s   
 	,+ f'+$ '!% $# $!   "& $1=, =, =,  !=," #=,$ %=,& '=,( )=,* +=,, -=,.  /=,0 1=, =,N 4C/3 K2||K2  %||K2 ,,	K2
 "LLK2 sC}-K2 ''K2 E5%/0K2 CH~,K2 K2 
"E$5$5	5K2 K2r"   re  r   )F)r   )g?)7r$  r   typingr   r   torch.nnr{   torch.nn.functional
functionalr   r   configuration_utilsr   r   loadersr   r	   utilsr
   	attentionr   r   attention_dispatchr   r   cache_utilsr   modeling_outputsr   modeling_utilsr   
get_loggerr   r)  r!   r.   r3   r'   r2   r  rq   Modulers   r   r   r   r   r   r   r  r5  r=  rH  rY  re  r  r"   r    <module>r     s~           B ? = J $ 7 ' 
		H	%

6> 	!t!t!t 
!t 
	!tHryy $9ryy 9   2"ryy "&"ryy "B	2299 	2B BJ.
"))%9 .
bABII A" "J		 4(		 (V^2^2r"   