
    
3j                    
   S SK r S SKrS SKrS SKrS SKrS SKrS SKrS SKJr  S SKrS SK	J
r  S SKJr  S SKJrJrJr  S SKrS SKrS SKr S SKJr  Sr/ S	QrSS
 jrS SKJr   " S S5      rS\R@                  4S jr! " S S5      r" " S S5      r# " S S5      r$SS jr% " S S5      r& " S S5      r' " S S5      r( " S S5      r) " S S5      r*Sq+S  r,S! r-SS" jr.S SK/r0S#r1\0Rd                  " 5       r3S$ r4SS% jr5 " S& S'5      r6 " S( S)5      r7 " S* S+5      r8 " S, S-5      r9 " S. S/5      r:S S0K;J<r<  0 4S1 jr=S S2K>J?r?  SS3 jr@S4 rAS SKBrB " S5 S65      rC " S7 S85      rD S S9KEJFrF   S S:KEJGrHJIrJ  SSSS0 4S; jrK " S< S=5      rL " S> S?5      rM0 4S@ jrN0 4SA jrOSB rP0 4SC jrQ0 4SD jrR " SE SF5      rS " SG SH5      rT " SI SJ5      rU " SK SL5      rV " SM SN\R                  5      rX S SOKYJZrZJ[r[   " SP SQ5      r\ " SR SS5      r]SST jr^SSU jr_S SVK`Jara   " SW SX5      rb " SY SZ5      rc S S[KdJere   " S\ S]5      rf " S^ S_5      rg " S` Sa5      rh " Sb Sc5      riSd rj " Se Sf5      rk " Sg Sh\R                  5      rlS SiKmJnro  S SjKJprq  Sk rr " Sl Sm5      rsSn rt0 4So jru " Sp Sq5      rv " Sr Ss\R                  5      rwS StKxJyry  S SuKzJ{r{  SSv jr| " Sw Sx\R                  5      r}\R                  " / SyQ/ SzQ/ S{Q/ S|Q/ S}Q/ S~Q/ SQ/ SQ/ SQ/ SQ/
\R                  S9r " S S5      rg! \ a    Sr\R4                  " S5         GNf = f! \ a    SrF GNf = f! \ a    SrHSrJ GNf = f! \ a    SrZSr[ GNf = f! \ a    Sre GNRf = f)    N)tqdm)args)	wrap_attnoptimized_attentionattention_pytorch)ioTFzSComfyUI v3 node API not available, please update ComfyUI to access latest v3 nodes.)disabledautosageattn_qk_int8_pv_fp16_cudasageattn_qk_int8_pv_fp16_tritonsageattn_qk_int8_pv_fp8_cudasageattn_qk_int8_pv_fp8_cuda++	sageattn3sageattn3_per_block_meanc                   ^ ^^^^^^ [         R                  " ST  35        T S:X  a  SSKJm  SU4S jjmOiT S:X  a  SSKJm  SU4S jjmOTT S	:X  a  SS
KJm  SU4S jjmO?T S:X  a  SSKJm  SU4S jjmO*T S:X  a  SSKJm  SU4S jjmOST ;   a  SSKJm  SU U4S jjmU(       d$  [        R                  R                  5       " T5      m[        SU4S jj5       nU$ )NzUsing sage attention mode: r
   r   )sageattnc           	         > T" XX#XES9$ N)	is_causal	attn_masktensor_layout )qkvr   r   r   r   s         [/home/wildlama/comfy/ComfyUI/custom_nodes/ComfyUI-KJNodes/nodes/model_optimization_nodes.py	sage_func get_sage_func.<locals>.sage_func   s    A!Ikk    r   )r   c           
         > T" XX#USUS9$ )Nfp32r   r   pv_accum_dtyper   r   )r   r   r   r   r   r   r   s         r   r   r   #   s#    0qYbsy  JW  X  Xr   r   )r   c           	         > T" XX#XES9$ r   r   )r   r   r   r   r   r   r   s         r   r   r   '   s    21[d  C  Cr   r   )r   c           
         > T" XX#USUS9$ )Nz	fp32+fp32r"   r   r   r   r   r   r   r   r   s         r   r   r   +   #    /aXar}  N[  \  \r   r   c           
         > T" XX#USUS9$ )Nz	fp32+fp16r"   r   r&   s         r   r   r   /   r'   r   r   )sageattn3_blackwellc           	         > XU4 Vs/ s H  ouS:X  a  UR                  SS5      OUPM     snu  pnT
" XX#UT	S:H  S9nUS:X  a  UR                  SS5      $ U$ s  snf )NNHD      r   )r   r   per_block_mean	transpose)r   r   r   r   r   r   kwargsxoutsage_attentionr)   s            r   r   r   3   s}    TUZ[S\]S\aU,Bq{{1a(IS\]GA!%aAiiw  |V  jV  XC*75*@3==A&IcI ^s   #Ac           	        >^^^ UR                  SS5      SL a  [        XUT4XFUS.UD6$ UR                  n	U R                  [        R                  :X  d<  UR                  [        R                  :X  d  UR                  [        R                  :X  a\  U R                  [        R                  5      UR                  [        R                  5      UR                  [        R                  5      p!n U(       a  U R                  u  m  n
mSnO.U R                  u  mn
mTT-  m[        UUU4S jXU45      u  pnSnUbB  UR                  S:X  a  UR                  S	5      nUR                  S
:X  a  UR                  S5      nT" XX$SUS9R                  U	5      nUS:X  a/  U(       d&  UR                  SS5      R                  TSTT-  5      nU$ U(       a  UR                  SS5      nU$ UR                  TSTT-  5      nU$ )Nlow_precision_attentionTF)maskskip_reshapeskip_output_reshapeHNDc                 ,   > U R                  TSTT5      $ Nviewtbdim_headheadss    r   <lambda>7get_sage_func.<locals>.attention_sage.<locals>.<lambda>I   s    !&&Bx8r   r+   r-   r      r,   )r   r   r   r=   )getr   dtypetorchfloat32tofloat16shapemapndim	unsqueezer0   reshape)r   r   r   rD   r7   attn_precisionr8   r9   r1   in_dtype_r   r3   rB   rC   r   s      `         @@r   attention_sage%get_sage_func.<locals>.attention_sage;   s   ::/6%?$Q1e  N$  pC  N  GM  N  N7777emm#qww%--'?177emmC[dd5==)144+>U]]@S!A !Aq!XMWWNAq(H8q	GA!  MyyA~~~a(yyA~~~a(a5P]^aabjkE!&MM!Q'//2ux7GH  
	 #mmAq) 
 kk!R)9:
r   )FNr+   NNFF)logginginfosageattentionr   r   r   r   r   r)   rJ   compilerdisabler   )	r4   allow_compilerV   r   r   r)   r   r   r   s	   `  @@@@@@r   get_sage_funcr_      s    LL.~.>?@*	l 	l	:	:?	X 	X	<	<A	C 	C	9	9>	\ 	\	;	;>	\ 	\		&1	J 	J
 NN**,Y7	# #H r   )CallbacksMPc                   B    \ rS rSr\S 5       rSrSrSrSr	Sr
SS jrS	rg
)PathchSageAttentionKJd   c                 2    S[         SSS.4S.SSSSS.40S	.$ )
NMODELFzPatch the attention of the model passing through this node to use sageattn. To revert, run this node again with the disabled option. Requires the sageattention library to be installed.defaulttooltip)modelr4   r^   BOOLEANziAllow the use of torch.compile for the sage attention function, requires latest sageattn 2.2.0 or higher.requiredoptional)sageattn_modesss    r   INPUT_TYPES!PathchSageAttentionKJ.INPUT_TYPESe   sO      -5  NH  0I  J

 iU  Hs  *t  u	
 	
r   re   patchzExperimental node for patching attention mode. This doesn't use the model patching system and thus can't be disabled without running the node again with 'disabled' option.TKJNodes/experimentalc                 |   ^ US:X  a  U4$ UR                  5       n[        X#S9mU4S jnXTR                  S   S'   U4$ )Nr	   )r^   c                 (   > TR                   " U0 UD6$ N__wrapped__funcr   r1   new_attentions      r   attention_override_sage<PathchSageAttentionKJ.patch.<locals>.attention_override_sage}        ,,d=f==r   transformer_optionsoptimized_attention_override)cloner_   model_options)selfrj   r4   r^   model_cloner~   r}   s         @r   rt   PathchSageAttentionKJ.patchv   sM    Z'6Mkkm%nR	> \s!!"789WX|r   r   NF__name__
__module____qualname____firstlineno__classmethodrr   RETURN_TYPESFUNCTIONDESCRIPTIONEXPERIMENTALCATEGORYrt   __static_attributes__r   r   r   rb   rb   d   s7    
 
 LH @KL%Hr   rb   c           	        ^^^^ Sm SSK Jm  [        R
                  " ST(       a  SOS S	T 35        UU4S
 jmU (       d$  [        R                  R                  5       " T5      m[        R                  R                  5       (       a"  [        R                  " SSSSTSS9nT" X"U5        [        SUU4S jj5       nU$ ! [         a%     SSKJm  Sm N! [         a    [        S5      ef = ff = f)NFr   )flash_attn_funcTzyFlash attention not found. Install either FA2 ('flash_attn') or FA3 ('flash_attn_interface', pip package 'flash-attn-3').zUsing flash attention 32z: cast_dtype=c                 n   > T(       a	  T" XUSS9nO	T" XUSSS9n[        U[        5      (       a  US   $ U$ )NF)causal        )	dropout_pr   r   )
isinstancetuple)r   r   r   r3   r   is_fa3s       r   
flash_func"get_flash_func.<locals>.flash_func   s?    !!%8C!!SGC#C//s1v8S8r   r,      r-   @   cudarI   devicec                   >^^^ Ub  [        S5      eUR                  n	U R                  [        R                  :X  d<  UR                  [        R                  :X  d  UR                  [        R                  :X  a2  U R	                  T5      UR	                  T5      UR	                  T5      p!n U(       a$  U R
                  u  m  n
m[        S XU45      u  pnO,U R
                  u  mn
mTT-  m[        UUU4S jXU45      u  pnT" XU5      R	                  U	5      nU(       a  UR                  SS5      nU$ UR                  TSTT-  5      nU$ )Nz0Flash attention does not support attention masksc                 &    U R                  SS5      $ )Nr,   r-   r/   )rA   s    r   rE   9get_flash_func.<locals>.attention_flash.<locals>.<lambda>   s    AKK1$5r   c                 ,   > U R                  TSTT5      $ r<   r>   r@   s    r   rE   r      s    AFF1b%$Br   r,   r-   r=   )	RuntimeErrorrI   rJ   rK   rL   rN   rO   r0   rR   )r   r   r   rD   r7   rS   r8   r9   r1   rT   rU   r3   rB   rC   
cast_dtyper   s      `        @@r   attention_flash'get_flash_func.<locals>.attention_flash   s   QRR7777emm#qww%--'?177emmC[dd:&Z(8!$$z:J!A !Aq!X5ayAGA!WWNAq(HBQ1INGA!q!$$X.--1%C 
 ++aUX%56C
r   rX   )
flash_attnr   ImportErrorflash_attn_interfacerY   rZ   rJ   r\   r]   r   is_availablezerosr   )r^   r   prober   r   r   r   s    `  @@@r   get_flash_funcr      s     F
. LL)#S)Azl[\9 ^^++-j9
zz  Aq!Rz&I5' , e  	<F 	L 		s   B> >
C-	CC))C-c                   B    \ rS rSr\S 5       rSrSrSrSr	Sr
SS jrS	rg
)PatchFlashAttentionKJ   c                     SS0SSSSS.40S.$ )	Nrj   re   r^   rk   FzAllow torch.compile to trace into the flash attention function. If disabled (default), the function is wrapped with torch.compiler.disable() for compatibility, matching the sage attention node.rg   rl   r   rp   s    r   rr   !PatchFlashAttentionKJ.INPUT_TYPES   s9     Z
 iU  HK  *L  M
 	r   re   rt   a  Experimental node for patching attention to use flash attention, without the silent SDPA fallback the ComfyUI default does. Patches the attention of the model passing through this node; to disable, bypass or disconnect this node. Requires the flash_attn library to be installed.Tru   c                 b  ^ [        UR                  S5      (       a  UR                  R                  5       O[        R                  nU[        R                  [        R
                  4;   a  UO[        R                  n[        UUS9mUR                  5       nU4S jnXeR                  S   S'   U4$ )Nget_dtype_inference)r^   r   c                 (   > TR                   " U0 UD6$ rx   ry   r{   s      r   attention_override_flash=PatchFlashAttentionKJ.patch.<locals>.attention_override_flash   r   r   r   r   )	hasattrrj   r   rJ   rM   bfloat16r   r   r   )r   rj   r^   inference_dtyper   r   r   r}   s          @r   rt   PatchFlashAttentionKJ.patch   s    ?Fu{{Ti?j?j%++99;pup}p}(7EMM5>>;Z(Z_`e`m`m
&'!

 kkm	> \t!!"789WX|r   r   Nr   r   r   r   r   r   r      s7      LH kKL%Hr   r   c                   >    \ rS rSr\S 5       rSrSrSrSr	Sr
S rS	rg
)CheckpointLoaderKJ   c           
          S[         R                  " S5      SS04/ SQ4/ SQSSS	.4S
SSS	.4[        SSS	.4S
SSS	.4S.0$ )Nrm   checkpointsri   +The name of the checkpoint (model) to load.rh   
fp8_e4m3fnfp8_e4m3fn_fastfp8_e5m2fp16bf16r!   rh   r   r   r!   rh   'The compute dtype to use for the model.rg   rk   F$Enable or disable the cublas_ops arg&Patch comfy attention to use sageattn.aEnable torch.backends.cuda.matmul.allow_fp16_accumulation, required minimum pytorch version 2.7.1)	ckpt_nameweight_dtypecompute_dtypepatch_cublaslinearr4   enable_fp16_accumulationfolder_pathsget_filename_listro   rp   s    r   rr   CheckpointLoaderKJ.INPUT_TYPES   s    &88G)  VC  JD  EmoAy  fO  DP  Q#,%Lr.s"t-5Mu/vw)2  Sv  5w  )x
  	r   )rf   CLIPVAEloadzAExperimental node for patching torch.nn.Linear with CublasLinear.TKJNodes/model_loadersc                    ^ [         R                  [         R                  [         R                  [         R                  [         R
                  S.n0 nUR                  U5      =n	(       a   XS'   [        R                  " SU SU	 35        US:X  a  [         R                  US'   SUS'   U(       a   [        R                  R                  S5        O[        R                  R                  S5        [        R                  " S	U5      n
[        R                   R#                  U
SS[        R$                  " S
5      US9u  ppUR                  U5      =n	(       a4  UR'                  U	5        SUl        [        R                  " SU SU	 35        U(       ah  [+        [         R,                  R.                  R0                  S5      (       a*  S[         R,                  R.                  R0                  l        Og[5        S5      e[+        [         R,                  R.                  R0                  S5      (       a)  S[         R,                  R.                  R0                  l        US:w  a"  [7        U5      mU4S jnXR8                  S   S'   XU4$ )Nr   r   r   r   r!   rI   Setting  weight dtype to r   Tfp8_optimizations
cublas_opsr   
embeddings)
output_vaeoutput_clipembedding_directoryr   F compute dtype to allow_fp16_accumulationIFailed to set fp16 accumulation, requires pytorch version 2.7.1 or higherr	   c                 (   > TR                   " U0 UD6$ rx   ry   r{   s      r   r~   8CheckpointLoaderKJ.load.<locals>.attention_override_sage%      $00$A&AAr   r   r   )rJ   float8_e4m3fnfloat8_e5m2rM   r   rK   rH   rY   rZ   r   fastadddiscardr   get_full_path_or_raisecomfysdload_checkpoint_guess_configget_folder_pathsset_model_compute_dtypeforce_cast_weightsr   backendsr   matmulr   r   r_   r   )r   r   r   r   r   r4   r   	DTYPE_MAPr   rI   	ckpt_pathrj   clipvaerU   r~   r}   s                   @r   r   CheckpointLoaderKJ.load   s   --))MMNNMM
	 MM,//5/%*'"LL8I;.?wGH,,%*%8%8M'"15M-.IIMM,'IIl+ 77yQ	#hhCC , = =l K' D )S MM-0050))%0',E$LL8I;.@HI#u~~**113LMMEI##**B"#noou~~**113LMMEJ##**BZ').9MB Zq 567UVCr   r   N)r   r   r   r   r   rr   r   r   r   r   r   r   r   r   r   r   r   r      s4      ,LHUKL&H4 r   r   c                   B    \ rS rSr\S 5       rSrSrSrSr	Sr
SrS	 rS
rg)DiffusionModelSelectori.  c                     [         R                  " S5      nU Vs/ s H  nSUR                  5       ;   d  M  UPM     nnSS[         R                  " S5      U-   SS0400$ s  snf )Ntext_encoders	connectorrm   
model_namediffusion_modelsri   r   )r   r   lower)rq   ltx2_connector_modelsms      r   rr   "DiffusionModelSelector.INPUT_TYPES/  s     , > > O,A ^,Aq[TUT[T[T]E],A ^<99:LMPeehq  ta  hb  c
 
 	
 !_s
   A A STRING)
model_pathget_pathz*Returns the path to the model as a string.Tr   c                     SUR                  5       ;   a  [        R                  " SU5      nU4$ [        R                  " SU5      nU4$ )Nr	  r  r  )r  r   r   )r   r
  r  s      r   r  DiffusionModelSelector.get_path?  sM    ***,,%<<_jYJ } &<<=OQ[\J}r   r   N)r   r   r   r   r   rr   r   RETURN_NAMESr   r   r   r   r  r   r   r   r   r  r  .  s9    
 
 L"LH>KL&Hr   r  c                    Uc  0 O
[        U5      n[        R                  R                  U SS9u  pEUbz  [        R                  R                  U5      nUR	                  U5        A[        R
                  R                  R                  U5      n[        R                  R                  XGS0SS9n[        R
                  R                  UUUUS9n[        XU44Ul        U$ )NT)return_metadata Ffilter_keys)r   metadatadisable_dynamic)dictr   utilsload_torch_fileupdater   model_detectionunet_prefix_from_state_dictstate_dict_prefix_replaceload_diffusion_model_state_dict_load_diffusion_model_kjcached_patcher_init)		unet_pathr   extra_state_dictr  r   r  extra_sddiffusion_model_prefixrj   s	            r   r&  r&  F  s    '/BT-5HM;;..y$.OLB#;;../?@
		(!&!9!9!U!UVX!Y[[222PR7Saf2gHH44
#'	 5 E ":IVf;g hELr   c                   B    \ rS rSr\S 5       rSrSrSrSr	Sr
SS	 jrS
rg)DiffusionModelLoaderKJi\  c           	          [         R                  " S5      SS04/ SQ4/ SQSSS.4S	S
SS.4[        S
SS.4S	S
SS.4S.SSSSS.40S.$ )Nr  ri   r   r   r   rh   r   rg   rk   Fr   r   ZEnable torch.backends.cuda.matmul.allow_fp16_accumulation, requires pytorch 2.7.0 nightly.)r
  r   r   r   r4   r   r)  r  TzThe full path to an additional state dict to load, this will be merged with the main state dict. Useful for example to add VACE module to a WanVideoModel. You can use DiffusionModelSelector to easily get the path.)
forceInputri   rl   r   rp   s    r   rr   "DiffusionModelLoaderKJ.INPUT_TYPES]  s     (99:LMPY  \I  PJ  KmoAy  fO  DP  Q#,%Lr.s"t-5Mu/vw)2  So  5p  )q
 $  Lc  ,d  !e

 	
r   re   patch_and_loadz4Node for patching torch.nn.Linear with CublasLinear.Tr   Nc                   ^ [         R                  [         R                  [         R                  [         R                  [         R
                  S.n0 n	UR                  U5      =n
(       a   XS'   [        R                  " SU SU
 35        US:X  a  [         R                  U	S'   SU	S'   U(       ah  [        [         R                  R                  R                  S5      (       a*  S[         R                  R                  R                  l        Og[        S	5      e[        [         R                  R                  R                  S5      (       a)  S
[         R                  R                  R                  l        U(       a   [        R                   R#                  S5        O[        R                   R%                  S5        [&        R(                  " SU5      n[+        XUS9nUR                  U5      =n
(       a4  UR-                  U
5        S
Ul        [        R                  " SU SU
 35        US:w  a"  [1        U5      mU4S jnXR2                  S   S'   U4$ )Nr   rI   r   r   r   Tr   r   FFailed to set fp16 accumulation, this requires pytorch 2.7.1 or higherFr   r  )r   r)  r   r	   c                 (   > TR                   " U0 UD6$ rx   ry   r{   s      r   r~   FDiffusionModelLoaderKJ.patch_and_load.<locals>.attention_override_sage  r   r   r   r   )rJ   r   r   rM   r   rK   rH   rY   rZ   r   r   r   r   r   r   r   r   r   r   r   r   r&  r   r   r_   r   )r   r
  r   r   r   r4   r   r)  r   r   rI   r(  rj   r~   r}   s                 @r   r2  %DiffusionModelLoaderKJ.patch_and_loadr  s   --))MMNNMM
	 MM,//5/%*'"LL8J</@HI,,%*%8%8M'"15M-.#u~~**113LMMEI##**B"#kllu~~**113LMMEJ##**BIIMM,'IIl+ 778JJW	(brsMM-0050))%0',E$LL8J</A%IJZ').9MB Zq 567UVxr   r   rx   )r   r   r   r   r   rr   r   r   r   r   r   r2  r   r   r   r   r-  r-  \  s4    
 
 LHHKL&H/r   r-  c                   >    \ rS rSr\S 5       rSrSrSrSr	Sr
S rS	rg
)ModelPatchTorchSettingsi  c                     SSSSSS.4S.0$ )Nrm   re   rk   Fr/  rg   )rj   r   r   rp   s    r   rr   #ModelPatchTorchSettings.INPUT_TYPES  s/    )2  So  5p  )q
  	r   re   rt   zQAdds callbacks to model to set torch settings before and after running the model.Tru   c                     UR                  5       nS nS nU(       a  [        [        R                  R                  R
                  S5      (       aC  UR                  [        R                  U5        UR                  [        R                  U5        U4$ [        S5      e[        [        R                  R                  R
                  S5      (       a#  UR                  [        R                  U5        U4$ [        S5      e)Nc                     [         R                  " S5        S[        R                  R                  R
                  l        g )NzRPatching torch settings: torch.backends.cuda.matmul.allow_fp16_accumulation = TrueTrY   rZ   rJ   r   r   r   r   rj   s    r   patch_enable_fp16_accum>ModelPatchTorchSettings.patch.<locals>.patch_enable_fp16_accum  s'    LLmnAEENN&&>r   c                     [         R                  " S5        S[        R                  R                  R
                  l        g )NzSPatching torch settings: torch.backends.cuda.matmul.allow_fp16_accumulation = FalseFr>  r?  s    r   patch_disable_fp16_accum?ModelPatchTorchSettings.patch.<locals>.patch_disable_fp16_accum  s'    LLnoAFENN&&>r   r   r4  )r   r   rJ   r   r   r   add_callbackr`   
ON_PRE_RUN
ON_CLEANUPr   )r   rj   r   r   r@  rC  s         r   rt   ModelPatchTorchSettings.patch  s    kkm	F	G $u~~**113LMM(()?)?AXY(()?)?AYZ ~ ##kllu~~**113LMM(()?)?AYZ ~ ##kllr   r   Nr   r   r   r   r9  r9    s4      LHeKL%Hr   r9  c                   >    \ rS rSr\S 5       rSrSrSrSr	Sr
S rS	rg
)PatchModelPatcherOrderi  c                 .    SSSS/SSS.4/ SQSS	S.4S
.0$ )Nrm   re   object_patch_firstweight_patch_firstz^Patch the comfy patch_model function to load weight patches (LoRAs) before compiling the modelrg   )enabledr	   r
   r
   zDisabling may help with memory issues when loading large models, when changing this you should probably force model reload to avoid issues!)rj   patch_order	full_loadr   rp   s    r   rr   "PatchModelPatcherOrder.INPUT_TYPES  sP    '%9;O$P^r  @`  Sa  $b"Av  cp  Dq  "r  	r   re   rt   KJNodes/deprecatedzNO LONGER NECESSARY OR FUNCTIONAL, keeping node for backwards compatibility. Use the TorchCompileModelAdvanced to use LoRA with torch.compile.Tc                     U4$ rx   r   )r   rj   rO  rP  s       r   rt   PatchModelPatcherOrder.patch  	    vr   r   N)r   r   r   r   r   rr   r   r   r   r   
DEPRECATEDrt   r   r   r   r   rJ  rJ    s7      LH#H cKJr   rJ  c                   L    \ rS rSrS r\S 5       rSrSrSr	Sr
SrSrSS	 jrS
rg)TorchCompileModelFluxAdvancedV2i  c                     SU l         g NF	_compiledr   s    r   __init__(TorchCompileModelFluxAdvancedV2.__init__  s	    r   c           
      v    SSS/4SSSS.4/ SQS	S	04SS
SS.4SS
SS.4SSSS.4S.SSSSSSS.4SS
SS.4S.S.$ )Nre   inductor
cudagraphsrk   FEnable full graph moderg   rh   zmax-autotunemax-autotune-no-cudagraphszreduce-overheadrh   TzCompile double blockszCompile single blocksEnable dynamic mode)rj   backend	fullgraphmodedouble_blockssingle_blocksdynamicINTr   r      r,   %torch._dynamo.config.cache_size_limitrh   minmaxstepri   2torch._dynamo.config.force_parameter_static_shapes)dynamo_cache_size_limitforce_parameter_static_shapesrl   r   rp   s    r   rr   +TorchCompileModelFluxAdvancedV2.INPUT_TYPES  s     (!+\ :<"+Kc-d!eilu  xA  lB  C&/TNe1f%g&/TNe1f%g )uI^+_` 162aX\fg  u\  8]  0^6?T  _S  BT  6U 	r   re   rt   KJNodes/torchcompileT2Deprecated, use TorchCompileModelAdvanced instead.c
           	          SSK Jn
  UR                  5       nUR                  S5      nU[        R
                  R                  l        U	[        R
                  R                  l        / n U(       a>  [        UR                  5       H%  u  p[        SU5        UR                  SU 35        M'     U(       a2  [        UR                  5       H  u  pUR                  SU 35        M     U
" XX#XtS9  U4$ ! [         a  n[        S5      UeS nAff = f)	Nr   set_torch_compile_wrapperdiffusion_modelz#Adding double block to compile listzdiffusion_model.double_blocks.zdiffusion_model.single_blocks.rj   keysrg  ri  rl  rh  Failed to compile model)comfy_api.torch_helpersr|  r   get_model_objectrJ   _dynamoconfigcache_size_limitrv  	enumeraterj  printappendrk  	Exceptionr   )r   rj   rg  ri  rh  rk  rj  rl  ru  rv  r|  r  r}  compile_key_listiblockes                    r   rt   %TorchCompileModelFluxAdvancedV2.patch  s    EKKM,,->?0G-=Z:	A )/*G*G HHA?C$++.LQC,PQ !I  )/*G*G HHA$++.LQC,PQ !I &Agjq  H u  	A89q@	As   )BC2 2
D<DDr[  N)r   T)r   r   r   r   r^  r   rr   r   r   r   r   rV  r   rt   r   r   r   r   rX  rX    s@       LH%HLJFKr   rX  c                   F    \ rS rSr\S 5       rSrSrSrSr	Sr
SrSS jrS	rg
)TorchCompileModelWanVideoV2i  c                 p    SSS/SS04SSSS.4/ S	QSS04SSS
S.4SSSS.4SSSSSSS.4S.SSSSS.40S.$ )Nre   ra  rb  rh   rk   Frc  rg   rd  rf  TDCompile only transformer blocks, faster compile and less error pronerm  r   r   rn  r,   ro  rp  rj   rg  rh  ri  rl  compile_transformer_blocks_onlyru  rv  rt  rl   r   rp   s    r   rr   'TorchCompileModelWanVideoV2.INPUT_TYPES  s     $'5	:7NO'UG_)`aehqs|g}~%5EZ'[\4=4  ]c  @d  4e,1r!TXbc  qX  4Y  ,Z	 0)  [O  >P  2Q
 	
r   re   rt   rx  Try  c	           	         SSK Jn	  UR                  5       n
U
R                  S5      nU[        R
                  R                  l        U[        R
                  R                  l         U(       a5  / n[        UR                  5       H  u  pUR                  SU 35        M     OS/nU	" XX$XSS9  U
4$ ! [         a  n[        S5      UeS nAff = f)Nr   r{  r}  diffusion_model.blocks.r~  r  )r  r|  r   r  rJ   r  r  r  rv  r  blocksr  r  r   )r   rj   rg  rh  ri  rl  ru  r  rv  r|  r  r}  r  r  r  r  s                   r   rt   !TorchCompileModelWanVideoV2.patch+  s    EKKM,,->?0G-=Z:
	A.#%  )/*@*@ AHA$++.EaS,IJ !B $5"5 %Agjq  H u  	A89q@	As   'AB1 1
C;CCr   NTr   r   r   r   r   rr   r   r   r   r   rV  r   rt   r   r   r   r   r  r    s9    
 
  LH%HLJFKr   r  c                     [         (       a  g Sq Sn U  H[  n[        [        R                  US 5      nUc  M#  [	        [        R                  U[
        R                  R                  U5      5        M]      SS KJn  [
        R                  R                  UR                  5      Ul	        [        R                  " S5        g ! [         a     N#f = f)NT)cast_bias_weightuncast_bias_weightcast_modules_with_vbarresolve_cast_module_with_vbarr   zbKJNodes dynamic-compile: comfy.ops weight cast marked as eager graph break (recompile fix active).)_aimdo_patchedgetattrr   opssetattrrJ   r  r]   comfy_aimdo.torchget_tensor_from_raw_ptrr  rY   rZ   )namesnamefn_ats       r   patch_aimdo_for_compiler  B  s     ~NqEUYYd+>EIItU]]%:%:2%>? '&+mm&;&;C<W<W&X# LLuv  s   34B> >
C
Cc                 L    U  Vs/ s H  nSUR                   ;  PM     sn$ s  snf )Nr   )r  )guard_entriesentrys     r   skip_torch_compile_dictr  U  s$    CPQ=%"%**4=QQQs   !c                 Z    XUS.nU(       a  US:w  a  XS'   U$ U(       a  S[         0US'   U$ )N)rg  rh  rl  rh   ri  guard_filter_fnoptions)r  )rg  ri  rh  rl  use_guard_filterkws         r   build_compile_kwargsr  Z  s?     	IB	!6
 I 
*,CD9Ir   ztorch.compilec                 h   [         R                  U R                  5      nU(       d  U " U0 UD6$ 0 n UR                  5        H[  u  pV[        R
                  R                  U R                  U5      XE'   [        R
                  R                  U R                  XV5        M]     U " U0 UD6UR                  5        H/  u  pV[        R
                  R                  U R                  XV5        M1     $ ! UR                  5        H/  u  pV[        R
                  R                  U R                  XV5        M1     f = frx   )_KJ_COMPILED_BY_MODELrH   	class_objitemsr   r  get_attrset_attr)executorr   r1   compiledorigkeyvalues          r   _kj_apply_torch_compile_wrapperr  j  s    $((););<H(((DA"..*JC,,X-?-?EDIKK  !3!3S@ + ((**,JCKK  !3!3S@ '$**,JCKK  !3!3S@ 's   A6C, ,AD1c                    [         R                  R                  nU R                  UR                  [
        5        U(       d  S/nXX4US.nU V	s0 s H*  o[        R                  " SSU R                  U	5      0UD6_M,     n
n	U
[        U R                  '   U R                  UR                  [
        [        5        XR                  S'   g s  sn	f )Nr}  )rg  r  ri  rh  rl  rj   torch_compile_kwargsr   )r   patcher_extension
WrappersMPremove_wrappers_with_keyAPPLY_MODEL_KJ_COMPILE_KEYrJ   compiler  r  rj   add_wrapper_with_keyr  r   )rj   rg  r  ri  rh  rl  r  r  compile_kwargsr  compiled_moduless              r   kj_set_torch_compile_wrapperr  y  s    ((33J	"":#9#9?K!"!(dovwNkopkodgU]]_1G1G1L_P^__kop)9%++&	z55Hgh2@./ qs   1C	c                   B    \ rS rSr\S 5       rSrSrSrSr	Sr
SS jrS	rg
)TorchCompileModelAdvancedi  c                     SSS/SS04SSSS.4/ S	QSS04/ S
QSSS.4SSSS.4SSSSSSS.4SSSS.4S.SSSSS.40S.$ )Nre   ra  rb  rh   rk   Frc  rg   rd  )r
   truefalser
   zUse dynamic shape tracing.Tr  rm  r   r   rn  r,   ro  rp  z-Print the compile keys used for torch.compile)rj   rg  rh  ri  rl  r  ru  debug_compile_keysdisable_dynamic_vramz@Disable dynamic VRAM feature as it can cause issues with compilerl   r   rp   s    r   rr   %TorchCompileModelAdvanced.INPUT_TYPES  s     $'5	:7NO'UG_)`aehqs|g}~- &3OP 5>4  ]c  @d  4e,1r!TXbc  qX  4Y  ,Z'0eP  3A  'B '  SU  5V  )W
 	
r   re   rt   rx  z5Advanced torch.compile patching for diffusion models.Tc
           
         U	(       a   UR                  SS9n
OUR                  5       n
U
R	                  S5      nU[
        R                  R                  l         / nU(       a  / SQnU HR  n[        X5      (       d  M  [        X5      n[        [        U5      5       H  nUR                  SU SU 35        M     MT     U(       d  [        R                  " S5        O?U(       a8  [        R                  " S	5        U H  n[        R                  " S
U 35        M     U(       d  S/nSSS S.n UU   nU	(       d"  [        U
SS 5      " 5       (       a
  [#        5         [%        SXS.['        X$X55      D6  U
4$ ! [         a*    [        R                  " S5        UR                  5       n
 GNf = f! [         a    [!        SU 35      ef = f! [(         a  n[+        S5      UeS nAff = f)NT)r  ztThis ComfyUI version do not support disabling dynamic VRAM through a node. This may cause issues with torch.compile.r}  )	rj  rk  layerstransformer_blocksr  visual_transformer_blockstext_transformer_blockspatch_blockspixel_blockszdiffusion_model..zVNo known transformer blocks found to compile, compiling entire diffusion model insteadz,TorchCompileModelAdvanced: Compile key list:z - F)r  r  r
   zInvalid dynamic arg 
is_dynamicc                      grZ  r   r   r   r   rE   1TorchCompileModelAdvanced.patch.<locals>.<lambda>  s    Ur   )rj   r  r  r   )r   	TypeErrorrY   warningr  rJ   r  r  r  r   r  rangelenr  rZ   KeyError
ValueErrorr  r  r  r  r   )r   rj   rg  rh  ri  rl  ru  r  r  r  r  r}  r  layer_types
layer_namer  r  r  
dynamic_kvr  s                       r   rt   TorchCompileModelAdvanced.patch  s   "KKK5
 A,,->?0G-	A!. C"-J;;!(!E!&s6{!3A,336FzlRSTUSV4WX "4 #.
 (OO$|}'LL!OP/s3%[1  0##4"5 "&EJC$W- (GA|],S,U,U')(  Dq  DK_`gox  LC  D uO  "  !W  XKKM"<  C #7y!ABBC  	A89q@	AsI   E5  G >B.G -F, 2A G 50F)(F),GG 
G#GG#r   Nr   )r   r   r   r   r   rr   r   r   r   r   r   rt   r   r   r   r   r  r    s4    
 
& LH%HIKL+r   r  c                   B    \ rS rSr\S 5       rSrSrSrSr	Sr
SrS rS	rg
)TorchCompileModelQwenImagei  c                 `    SSSS/SS04SSSS	.4/ S
QSS04SSSS	.4SSSS	.4SSSSSSS.4S.0$ )Nrm   re   ra  rb  rh   rk   Frc  rg   rd  rf  Tr  rm  r   r   rn  r,   ro  rp  r  r   rp   s    r   rr   &TorchCompileModelQwenImage.INPUT_TYPES  s     #'5	:7NO'UG_)`aehqs|g}~%5EZ'[\4=4  ]c  @d  4e,1r!TXbc  qX  4Y  ,Z

 
	
r   re   rt   rx  Try  c           	      `   SSK Jn  UR                  5       n	U	R                  S5      n
U[        R
                  R                  l         U(       a5  / n[        U
R                  5       H  u  pUR                  SU 35        M     OS/nU" XX$XSS9  U	4$ ! [         a  n[        S5      UeS nAff = f)Nr   r{  r}  #diffusion_model.transformer_blocks.r~  r  )r  r|  r   r  rJ   r  r  r  r  r  r  r  r   )r   rj   rg  rh  ri  rl  ru  r  r|  r  r}  r  r  r  r  s                  r   rt    TorchCompileModelQwenImage.patch  s    EKKM,,->?0G-
	A.#%  )/*L*L MHA$++.QRSQT,UV !N $5"5 %Agjq  H u  	A89q@	As   AB 
B-B((B-r   Nr  r   r   r   r  r    s9    
 
 LH%HLJFKr   r  c                   @    \ rS rSrS r\S 5       rSrSrSr	Sr
S rS	rg
)TorchCompileVAEi  c                      SU l         SU l        g rZ  )_compiled_encoder_compiled_decoderr]  s    r   r^  TorchCompileVAE.__init__  s    !&!&r   c           
      H    SSSS/4SSSS.4/ S	QS
S
04SSSS.4SSSS.4S.0$ )Nrm   r   ra  rb  rk   Frc  rg   rd  rh   TzCompile encoderzCompile decoder)r  rg  rh  ri  compile_encodercompile_decoderr   rp   s    r   rr   TorchCompileVAE.INPUT_TYPES  sf    #!+\ :<"+Kc-d!eilu  xA  lB  C(1tPa3b'c(1tPa3b'c  	r   r  r  rx  Tc                    U(       ax  U R                   (       dg  Sn[        UR                  S5      (       a  Sn [        UR                  U[        R
                  " [        UR                  U5      UUUS95        SU l         U(       a{  U R                  (       dj  Sn	[        UR                  S5      (       a  Sn	 [        UR                  U	[        R
                  " [        UR                  U	5      UUUS95        SU l	        U4$ U4$ ! [         a  n[        S5      UeS nAff = f! [         a  n[        S5      UeS nAff = f)Nencodertaesd_encoderri  rh  rg  Tr  decodertaesd_decoder)
r  r   first_stage_modelr  rJ   r  r  r  r   r  )
r   r  rg  ri  rh  r  r  encoder_namer  decoder_names
             r   r  TorchCompileVAE.compile  s:   ))(300/BB#2LI--$#C$9$9<H!%&/$+		 .2D* ))(300/BB#2LI--$#C$9$9<H!%&/$+		 .2D* ww- ! I&'@AqHI( ! I&'@AqHIs1   AD 8AD# 
D DD #
D>-D99D>)r  r  Nr   r   r   r   r^  r   rr   r   r   r   r   r  r   r   r   r   r  r    s6    '   LH%HL)r   r  c                   @    \ rS rSrS r\S 5       rSrSrSr	Sr
S rS	rg
)TorchCompileControlNeti7  c                     SU l         g rZ  r[  r]  s    r   r^  TorchCompileControlNet.__init__8  s	    r   c                 0    SSSS/4SSSS.4/ S	QS
S
04S.0$ )Nrm   CONTROL_NETra  rb  rk   Frc  rg   rd  rh   )
controlnetrg  rh  ri  r   rp   s    r   rr   "TorchCompileControlNet.INPUT_TYPES;  sJ    "2!+\ :<"+Kc-d!eilu  xA  lB  C	  	r   r  r  rx  Tc                     U R                   (       d0   [        R                  " UR                  X4US9Ul        SU l         U4$ U4$ ! [         a  nSU l         [        S5      UeS nAff = f)Nr   TFr  )r\  rJ   r  control_modelr  r   )r   r  rg  ri  rh  r  s         r   r  TorchCompileControlNet.compileI  sv    ~~E ,1==9Q9QX\  |C  ,D
(!%
 ~
~	  E!&"#<=1DEs   ,A 
A'A""A'r[  Nr  r   r   r   r	  r	  7  s6       $LH%HLr   r	  c                   F    \ rS rSr\S 5       rSrSrSrSr	Sr
SrSrS	 rS
rg)WanVideoTeaCacheKJiW  c                 d    SSSSSSSSS	.4SS
SSSSS	.4SSSSSSS	.4SS/SSS.4/ SQSSS.4S.0$ )Nrm   re   FLOATg?r         $@MbP?a  Threshold for to determine when to apply the cache, compromise between speed and accuracy. When using coefficients a good value range is something between 0.2-0.4 for all but 1.3B model, which should be about 10 times smaller, same as when not using coefficients.rp  g?      ?{Gz?z7The start percentage of the steps to use with TeaCache.z5The end percentage of the steps to use with TeaCache.main_deviceoffload_devicezDevice to cache torg   )r	   z1.3B14Bi2v_480i2v_720r  zCoefficients for rescaling the relative l1 distance, if disabled the threshold value should be about 10 times smaller than the value used with coefficients.)rj   rel_l1_threshstart_percentend_percentcache_devicecoefficientsr   rp   s    r   rr   WanVideoTeaCacheKJ.INPUT_TYPESX  s     #")uSQU_d  r{  ,|  "}")s3s\`  ng  ,h  "i 'SSZ^  lc  *d   e"/1A!BP`  nB  EC  !D!R`i  wU  UV  !W	
 		
r   re   r?  patch_teacacherR  TzbDEPRECATED, use the native EasyCache or alternative custom node that's up to date instead of this.c                     U4$ rx   r   )r   rj   r!  r"  r#  r$  r%  s          r   r'  !WanVideoTeaCacheKJ.patch_teacachem  rU  r   r   N)r   r   r   r   r   rr   r   r  r   r   rV  r   r   r'  r   r   r   r   r  r  W  s>    

 

 LLH#HJzKLr   r  )
apply_ropec           	        ^ ^	^
^^ / UR                   SS QT R                  PT R                  P7u  m	mmm
U	U
UUU 4S jnU" U5      u  pVn[        XVU5      u  pV[	        XVT R
                  T R                  5      n[        R                  R                  R                  R                  UR                  T	TTT
-  5      UR                  T	TTT
-  5      UT R                  US9nT R                  U5      nX-  nU$ )z|
Args:
    x(Tensor): Shape [B, L, num_heads, C / num_heads]
    freqs(Tensor): Rope freqs, shape [1024, C / num_heads / 2]
Nr-   c                   > TR                  TR                  U 5      5      R                  TTTT5      nTR                  TR	                  U 5      5      R                  TTTT5      nTR                  U 5      R                  TTTT-  5      nXU4$ rx   )norm_qr   r?   norm_kr   r   )	r2   r   r   r   rB   dnrq   r   s	       r   qkv_fn3modified_wan_self_attention_forward.<locals>.qkv_fn|  sx    KKq	"''1a3KKq	"''1a3FF1INN1aQ'Qwr   rD   r   )rN   	num_headshead_dimr*  get_feta_scores
num_framesenhance_weightr   ldmmodules	attentionr   r?   o)r   r2   freqsr   r1  r   r   r   feta_scoresrB   r/  r0  rq   s   `        @@@@r   #modified_wan_self_attention_forwardr?  s  s     =!''"1+<t~~<t}}<JAq!Q  QiGA!aE"DA!!9L9LMK		##77FF1aQFF1aQ.. 3 	8 	
A 	q	AAHr   )	rearrangec           	      *   XpeUR                   S:X  a  UR                  u  pxpIOGUR                   S:X  a7  UR                  u  pxn
X-  n	UR                  XxXI5      nUR                  XxXI5      nWU-  n[        USX+UW	S9n[        USX+XIS9n[	        XXU5      $ )N   rG   zB (T S) N C -> (B S) N T C)TSNC)rP   rN   r?   r@  
feta_score)queryr  r7  r8  r4  img_qimg_kBSTr5  
hidden_dimspatial_dimquery_image	key_images                 r   r6  r6    s    5zzQ%*[["y(	q!KKz* 

1)6

1)6
"K+zIYaK +zII khNSSr   c                    US-  nX-  n XR                  SS5      -  nUR                  [        R                  5      nUR	                  SS9nUR                  SX35      n[        R                  " X6R                  S9R                  5       nUR                  S5      R                  UR                  S   SS5      nUR                  US5      nX3-  U-
  n	UR                  SS9U	-  n
U
R                  5       X4-   -  nUR                  SS	9nU$ )
Ng      r=   dim)r   r   )r,   r-   r,   )rq  )r0   rL   rJ   rK   softmaxrR   eyer   boolrQ   expandrN   masked_fillsummeanclamp)rO  rP  r5  r7  r8  scale	attn_temp	diag_maskattn_wo_diagnum_off_diagmean_scoresenhance_scoress               r   rG  rG    s   dNE%K11"b99IU]]+I!!b!)I !!"j=I 		*-=-=>CCEI##A&--iooa.@"bII ((A6L *Z7L""v".=K %%':+FGN#))a)0Nr   c                   $    \ rS rSrS rSS jrSrg)WanAttentionPatchi  c                     Xl         X l        g rx   r7  r8  r   r7  weights      r   r^  WanAttentionPatch.__init__      $$r   Nc                 <   ^  U 4S jn[         R                  " X15      $ )Nc                 f   > TR                   U l         TR                  U l        [        U /UQ70 UD6$ rx   )r7  r8  r?  self_moduler   r1   r   s      r   wrapped_attention4WanAttentionPatch.__get__.<locals>.wrapped_attention  s2    %)__K")-)<)<K&6{TTTVTTr   types
MethodTyper   objobjtyperp  s   `   r   __get__WanAttentionPatch.__get__  s    	U  177r   r8  r7  rx   r   r   r   r   r^  rx  r   r   r   r   re  re        %8r   re  c                   B    \ rS rSr\S 5       rSrSrSrSr	Sr
SrS	 rS
rg)WanVideoEnhanceAVideoKJi  c           
      *    SSSSS04SSSS	S
SS.4S.0$ )Nrm   re   LATENTri   !Only used to get the latent countr         @r   r  r  Strength of the enhance effectrp  rj   latentri  r   rp   s    r   rr   #WanVideoEnhanceAVideoKJ.INPUT_TYPES  sF     ##i1T%UV"CV[  iI  %J  K
 	
r   re   r?  enhanceKJNodes/wan1https://github.com/NUS-HPC-AI-Lab/Enhance-A-VideoTc           	          US:X  a  U4$ US   R                   S   nUR                  5       nSUR                  ;  a  0 UR                  S'   X%R                  S   S'   UR                  S5      n[	        UR
                  SS 5      n[        UR                  5       Hp  u  p[        XB5      R                  U	R                  U	R                  5      n
Ub#  [        R                  " XS   US	   US
   US   S9n
UR                  SU S3U
5        Mr     U4$ )Nr   samplesr-   r   r8  r}  compile_settingsri  rl  rh  rg  )ri  rl  rh  rg  r  z.self_attn.forward)rN   r   r   r  r  rj   r  r  re  rx  	self_attn	__class__rJ   r  add_object_patch)r   rj   ri  r  r7  r   r}  r  idxr  patched_attns              r   r  WanVideoEnhanceAVideoKJ.enhance  s8   Q;8OI&,,Q/
kkm (A(AA?AK%%&;<MS!!"789IJ%667HI"5;;0BDI#O$:$:;JC,Z@HHZ_ZiZijL+$}}\QW@Xbrs|b}  JZ  [f  Jg  qA  BK  qL   M((+B3%GY)Z\hi < ~r   r   Nr   r   r   r   r   rr   r   r  r   r   r   r   r  r   r   r   r   r~  r~    s9    
 
 LLHHEKLr   r~  )apply_rotary_emb)GuideAttentionMask_attention_with_guide_maskc           
      l   U R                  U5      nUc  UOUnU R                  U5      nU R                  U5      n	U R                  U5      nU R	                  U5      nUb  [        Xt5      n[        Xc  UOU5      n[        XxU R                  U R                  U R                  5      n
UcI  [        R                  R                  R                  R                  XxXR                  U R                  US9nO[         b6  [#        U[         5      (       a!  [%        XxXR                  X0R                  US9nOH[        R                  R                  R                  R'                  XxXR                  X0R                  US9nU R(                  b  U R)                  U5      nUR*                  u  pnUR-                  XU R                  U R.                  5      nS[0        R2                  " U5      -  nUUR5                  S5      -  nUR-                  XU R                  U R.                  -  5      nU R7                  U5      U
-  $ )N)rS   r   r  r=   )to_qto_kto_vq_normk_normr  r6  r7  r8  rD   r   r9  r:  r;  r   rS   _GuideAttentionMaskr   _ltx_attn_with_guide_maskoptimized_attention_maskedto_gate_logitsrN   r?   rC   rJ   sigmoidrQ   to_out)r   r2   contextr7   pek_per   r   r   r   r>  r3   gate_logitsrB   rA   rU   gatess                    r   ltxv_feta_forwardr  	  s   		!A?aG		'A		'AAAAA	~Q#Ql=!!9L9LdjjYK|ii))==aAzzbfbubu  L_=  `		(Z>Q-R-R'aTReRe  |O  Pii))DDQ1jjZ^  pC  pC  YlD  m&))!,))ahhqTZZ7emmK00EOOB''hhqTZZ$--78;;sk))r   c                   $    \ rS rSrS rSS jrSrg)LTXCrossAttentionPatchi*  c                     Xl         X l        g rx   rg  rh  s      r   r^  LTXCrossAttentionPatch.__init__+  rk  r   Nc                 <   ^  U 4S jn[         R                  " X15      $ )Nc                 f   > TR                   U l         TR                  U l        [        U /UQ70 UD6$ rx   )r7  r8  r  rn  s      r   rp  9LTXCrossAttentionPatch.__get__.<locals>.wrapped_attention1  s2    %)__K")-)<)<K&$[B4B6BBr   rr  ru  s   `   r   rx  LTXCrossAttentionPatch.__get__/  s    	C  177r   rz  rx   r{  r   r   r   r  r  *  r|  r   r  c                   B    \ rS rSr\S 5       rSrSrSrSr	Sr
SrS	 rS
rg)LTXVEnhanceAVideoKJi7  c           
      *    SSSSS04SSSS	S
SS.4S.0$ )Nrm   re   r  ri   r  r  g      @r         Y@r  r  rp  r  r   rp   s    r   rr   LTXVEnhanceAVideoKJ.INPUT_TYPES8  sF     ##i1T%UV"CW\  jJ  %K  L
 	
r   re   r?  r  zKJNodes/ltxvr  Tc                    US:X  a  U4$ US   R                   S   nUR                  5       nSUR                  ;  a  0 UR                  S'   X%R                  S   S'   UR                  S5      n[	        UR
                  5       HJ  u  px[        XB5      R                  UR                  UR                  5      n	UR                  SU S3U	5        ML     U4$ )	Nr   r  r-   r   r8  r}  r  z.attn1.forward)rN   r   r   r  r  r  r  rx  attn1r  r  )
r   rj   ri  r  r7  r   r}  r  r  patched_attn1s
             r   r  LTXVEnhanceAVideoKJ.enhanceI  s    Q;8OI&,,Q/
kkm (A(AA?AK%%&;<MS!!"789IJ%667HI#O$F$FGJC2:FNNu{{\a\k\klM((+NseSa)bdqr H ~r   r   Nr  r   r   r   r  r  7  s9    
 
 LLHHEKLr   r  c                     U R                  U R                  U5      5      nU R                  U5      n[        R                  R
                  R                  R                  XXPR                  US9R                  S5      $ )Nr3  r-   )
r.  r   r   r   r9  r:  r;  r   r4  flatten)r   rH  r  r   r   r   s         r   _wan_compute_attentionr  Z  su    DFF7O$AwA99&&::5Qnn  sF:  G  O  O  PQ  R  Rr   c                 8    [        XX$5      n[        XX45      nXV4$ rx   )r  )r   rH  context_positivenag_contextr   
x_positive
x_negatives          r   wan_nag_attentionr  _  s$    '5E[J'[VJ!!r   c                 @   U R                   (       aE  UR                  U R                  S-
  5      R                  5       R	                  XR                  S9nAO/X R                  S-
  -  nAXR                  -  R                  U5      n[        R                  " USSSS9n[        R                  " USSSS9nXT-  n[        R                  " USS9  X`R                  :  nAX@R                  -  US-   -  nAAUR                  [        R                  " XxS	5      5        AAU R                   (       a9  UR                  U5      R                  U R                  5      R	                  U5      $ UR                  U R                  5        UR	                  USU R                  -
  -  5      $ )
Nr,   )alphar=   T)prT  keepdimr  )nangHz>r  )inplacemul_	nag_scaleneg_add_sub_rJ   normnan_to_num_nag_tauwhere	nag_alpha)	r   r  r  nag_guidancenorm_positivenorm_guidancer]  r7   
adjustments	            r   normalized_attention_guidancer  d  sV   ||!t~~'9:??AFFzYgYgFh!^^a%78"^^399,GJJzQBEMJJ|qb$GM)E	e&<<D,,.=43GHJ}ekk$C89j||  ,11$..AFFzRR$..)  q4>>/A!BCCr   c                 X   U R                   S:X  aK  UR                  S   S:X  a  XpeSu  pxOG[        R                  " USSS9u  pW[        R                  " USSS9u  phOU R                   S:X  a  XpeSu  pxU R	                  U R                  W5      5      n	U R                  n
U R                   S:X  a   U
R                  UR                  S   SS5      n
A[        X	WXS9u  pAA	[        XU5      nAAWb  Wb  U R	                  U R                  U5      5      nU R                  U R                  U5      5      nU R                  U5      n[        R                  R                  R                   R#                  XUU R$                  US	9n[        R&                  " UU/SS9nOUnU R)                  U5      $ )
Nrh   r   r,   )NNr-   rS  batchr   r3  )
input_typerN   rJ   chunkr-  r   r  repeatr  r  r.  r   r   r   r9  r:  r;  r   r4  catr<  )r   r2   r  r   r1   x_poscontext_posx_negcontext_negq_posr  r  r  	x_pos_outq_negk_negv_neg	x_neg_outs                     r   wan_crossattn_forward_nagr    s   )#==q !";!+E; ;;q!3LE',{{7A1'E$K	G	#{' KKu&E""K'!!((QA>.tK~JU-d
KIJ [4DFF5M*DFF;/0{#II%%//CCERW_c_m_m  DWC  X	IIy),!466!9r   c                    US S 2S U24   nUS S 2US 24   nU R                  U R                  U5      5      nU R                  U R                  U5      5      nU R	                  U5      n	[
        R                  R                  R                  R                  XxXR                  US9n
AAA	AUR                  S   S:X  a1  [        R                  " USSS9u  p[        R                  " USSS9u  pOUnS nU R                  U R                  U5      5      n[        XXR                  US9u  nnAA[!        XU5      nAAUb  U R                  U R                  W5      5      nU R#                  U R%                  U5      5      nU R'                  U5      n[
        R                  R                  R                  R                  UUUU R                  US9n[        R(                  " X/SS9nU R+                  X-   5      $ )Nr3  r   r-   rS  r  )r-  r   
norm_k_imgk_imgv_imgr   r9  r:  r;  r   r4  rN   rJ   r  r  r  r  r.  r   r   r  r<  )r   r2   r  context_img_lenr   r1   context_imgq_imgr  r  img_xx_real_negativer  context_negativer   r  r  q_real_negativek_real_negativev_real_negatives                       r   wan_i2v_crossattn_forward_nagr    s   !-o--.Ka))*GKKq	"EOODJJ{34EJJ{#EII'';;E%WeWe  |O;  PEue[}}Q1"[[A15-2[[!-K**"DFF1IA.t8HJZJZ  qD  EJ
	%d
CAJ#++dff_&=>++dff-=&>?&&!12))++55II/[jl{  DH  DR  DR  h{I  |IIq*266!)r   c                   (    \ rS rSrSS jrSS jrSrg)WanCrossAttentionPatchi  c                 X    Xl         X l        X0l        X@l        XPl        X`l        Xpl        g rx   )r  r  r  r  i2vr  r  )r   r  r  r  r  r  r  r  s           r   r^  WanCrossAttentionPatch.__init__  s&    """$r   Nc                 <   ^  U 4S jn[         R                  " X15      $ )Nc                 .  > TR                   U l         TR                  U l        TR                  U l        TR                  U l        TR                  U l        TR
                  U l        TR                  (       a  [        U /UQ70 UD6$ [        U /UQ70 UD6$ rx   )	r  r  r  r  r  r  r  r  r  rn  s      r   rp  9WanCrossAttentionPatch.__get__.<locals>.wrapped_attention  s~    &*&6&6K#$(NNK!$(NNK!"&,,K%)__K""&,,Kxx4[R4R6RR0NtNvNNr   rr  ru  s   `   r   rx  WanCrossAttentionPatch.__get__  s    
	O  177r   )r  r  r  r  r  r  r  )Frh   Trx   r{  r   r   r   r  r    s    8r   r  c                   F    \ rS rSr\S 5       rSrSrSrSr	Sr
SrSS	 jrS
rg)WanVideoNAGi  c                 d    SSSSSSSSS	.4SS
SSSSS	.4SSSSSSS	.4S.SS/SS04SSSS.4S.S.$ )Nre   )CONDITIONINGr  g      &@r   r  r  z$Strength of negative guidance effectrp        ?r  zMixing coefficient in that controls the balance between the normalized guided representation and the original positive representation.g      @r  zgClipping threshold that controls how much the guided attention can deviate from the positive attention.)rj   conditioningr  r  r  rh   r  ri   zType of the model inputrk   Fz}If true, modifies tensors in place to save memory. Leads to different numerical results which may change the output slightly.rg   r  r  rl   r   rp   s    r   rr   WanVideoNAG.INPUT_TYPES  s     $ 1%4E[`  nT  (U  V%4CY^  lt  (u  v#SW\  jS  &T  U !*73iAZ5[\%5  FE  (F  G
 	
r   re   r?  rt   r  z;https://github.com/ChenDarYen/Normalized-Attention-GuidanceTc                 t   US:X  a  U4$ [         R                  " 5       n[         R                  " 5       n	UR                  5       n
U
R	                  S5      nUR
                  R                  U5        UR                  US   S   R                  X5      5      n[        [        UR                  R                  5      R                  5      nSU;   a  SOSn[        UR                  5       HL  u  nn[        XXEXUS9R                  UR                   UR"                  5      nU
R%                  SU S3U5        MN     U
4$ )	Nr   r}  	WAN21_I2VTFr  r  z.cross_attn.forward)mmget_torch_device
unet_dtyper   r  text_embeddingrL   strtyperj   model_configr   r  r  r  rx  
cross_attnr  r  )r   rj   r  r  r  r  r  r  r   rI   r   r}  r  type_strr  r  r  r  s                     r   rt   WanVideoNAG.patch  s>   >8O$$&kkm%667HI&&))&1!00a1C1F1Fv1UVtEKK445>>?!X-d5#O$:$:;JC1'iZ]  F  G  O  O  PU  P`  P`  bg  bq  bq  rL((+B3%GZ)[]ij <
 ~r   r   N)rh   F)r   r   r   r   r   rr   r   r  r   r   r   r   rt   r   r   r   r   r  r    s9    
 
  LLHHOKLr   r  c                   B    \ rS rSr\S 5       rSrSrSrSr	Sr
SrS rS	rg
)SkipLayerGuidanceWanVideoi  c           
      :    SSSSSS.4SSS	S
SS.4SS
S	S
SS.4S.0$ )Nrm   re   r  10F)rh   	multiliner  皙?r   r  r  rh   rq  rr  rs  )rj   r  r"  r#  r   rp   s    r   rr   %SkipLayerGuidanceWanVideo.INPUT_TYPES  sK    k(0dQV2W'X/6CPS\_in8o.p-4#cZ]gl6m,n  	r   re   slgTzLSimplified skip layer guidance that only skips the uncond on selected blockszadvanced/guidancec                 J  ^^ UU4S jnUR                  S5       Vs/ s H  n[        UR                  5       5      PM     nnU Vs/ s H  n[        U5      PM     nn[        R                  " SU 35        UR                  5       n	U H  n
U	R                  S   R                  5       nSU;  a  0 US'   OUS   R                  5       US'   SUS   ;  a	  0 US   S'   OUS   S   R                  5       US   S'   SU
4nX[S   S   U'   XR                  S'   M     U	4$ s  snf s  snf )Nc                 L  > UR                  S0 5      nUS   nU(       d  [        S5      eT	US   s=::  a  T::  a  O  OU S   R                  S   S:X  a  U S   S   R                  S5      nU S   S   R                  S5      U S	   S   R                  S5      U S
   S   R                  S5      U S   S   R                  S5      S.nU" U5      n[        R
                  " XFS   /SS9U S	   U S
   U S   S.nU$ UR                  S5      S/:X  a
  U" U 5      nU$ U n U$ U" U 5      nU$ )Nr   original_blockzktransformer_options not found in extra_args, currently SkipLayerGuidanceWanVideo only works with TeaCacheKJcurrent_percentimgr   r-   r,   txtvecr  )r"  r#  r$  r  rS  cond_or_uncond)rH   r  rN   rQ   rJ   r  )
r   
extra_argsr   r   prev_img_uncondnew_args	block_outr3   r#  r"  s
           r   skip+SkipLayerGuidanceWanVideo.slg.<locals>.skip&  sl   ",..1F"K'(89N&   "O  P  P 34E FU+U;$$Q'1,&*5k!n&>&>q&AO  $E{1~77:#E{1~77:#E{1~77:"4jm55a8	 H !/x 8I  %yy/U;K)LRST#E{#E{"4j	C J +../?@QCG,T2
 J # J %T*Jr   ,z#Selected blocks to skip uncond on: r   patches_replaceditdouble_block)splitintstriprY   rZ   r   r   copy)r   rj   r"  r#  r  r*  r2   
block_listr  r  rB   r   r  s     ``         r   r  SkipLayerGuidanceWanVideo.slg%  s9    	D /5ll3.?@.?c!'')n.?
@",-*Q#a&*-:6(CDKKMAOO,ABGGIM 535/03@AR3S3X3X3Z/0M*;<<:</07:GHY:Z[`:a:f:f:h/07#Q'E=A+,U3E:5BOO12! & u3 A-s   #DD r   N)r   r   r   r   r   rr   r   r   r   r   rV  r   r  r   r   r   r   r  r    s9      LHL`KJ"H<r   r  c                   >    \ rS rSr\S 5       rSrSrSrSr	Sr
S rS	rg
)CFGZeroStarAndInitic  c                 &    SSSSS04SSSSS	.4S
.0$ )Nrm   re   rk   rh   Trm  r   zXfor zero init, starts from 0 so first step is always zeroed out if use_zero_init enabledrh   rq  ri   )rj   use_zero_initzero_init_stepsr   rp   s    r   rr   CFGZeroStarAndInit.INPUT_TYPESd  s>    '&/)T1B%C(-1Q  Tn  0o  (p  	r   re   rt   z+https://github.com/WeichenFan/CFG-Zero-starru   Tc                 \   ^^ UU4S jnUR                  5       nUR                  U5        U4$ )Nc                   > U S   nU S   nU S   S   S   nX2S   :H  R                  5       n[        U5      S:  a  UR                  5       nO@[        [        U5      S-
  5       H#  nX6   US   -
  X6S-      US   -
  -  S::  d  M!  Un  O   SnUT::  a  T(       a  US-  $ U S   nU S	   nUR                  S   n	UR                  U	S
5      n
UR                  U	S
5      n[        R                  " X-  SSS9n[        R                  " US-  SSS9S-   nX-  nUR
                  " U	/S/[        UR                  5      S-
  -  Q76 nX~-  XX~-  -
  -  -   nU$ )Ncondtimestepr   r   sample_sigmasr   r,   uncond
cond_scaler=   T)rT  r  r-   g:0yE>)nonzeror  itemr  rN   r?   rJ   rZ  )r   r?  r@  sigmasmatched_step_indexcurrent_step_indexr  rB  rC  
batch_sizepositive_flatnegative_flatdot_productsquared_normr  
noise_predr:  r;  s                   r   cfg_zerostar.CFGZeroStarAndInit.patch.<locals>.cfg_zerostarr  s   <DJ'H/*+@A/RF"(QK"7!@!@!B%&*%7%<%<%>"s6{Q/A	HQK/Fq5MHQK4OPTUU-.* 0
 *+&"o5=ax(^Fl+JAJ IIj"5M"KK
B7M))M$AqRVWK 99]a%7QMPTTL.EJJzJaSC

Oa4G-HJE*v~8M*NNJr   )r   set_model_sampler_cfg_function)r   rj   r:  r;  rO  r  s     ``  r   rt   CFGZeroStarAndInit.patchq  s+    !	F KKM	((6ur   r   N)r   r   r   r   r   rr   r   r   r   r   r   rt   r   r   r   r   r7  r7  c  s4      LH?K%HL&r   r7  c                   j    \ rS rSr\S 5       rS rS rS rS r	\\\\	S.r
\S 5       r\S	 5       rS
rg)GGUFLoaderKJi  c                     [         R                  " S5      n[         R                  " S5      nU Vs/ s H  nSUR                  5       ;   d  M  UPM     nn[        R
                  " SSSS[        R                  R                  SUS	9[        R                  R                  S
X-   S/-   SSS9[        R                  R                  S/ SQSS9[        R                  R                  S/ SQSS9[        R                  R                  SSS9[        R                  R                  SSSS9[        R                  R                  S/ SQSSS9/[        R                  R                  5       /S9$ s  snf ! [         a    / n/ n GN*f = f)N	unet_ggufr  r	  rT  r   zzLoads a GGUF model with advanced options, requires [ComfyUI-GGUF](https://github.com/city96/ComfyUI-GGUF) to be installed.Tr
  )r  extra_model_namenonezRAn extra gguf model to load and merge into the main model, for example VACE module)r  rh   ri   dequant_dtype)rh   targetrK   rM   r   rh   )r  rh   patch_dtypepatch_on_deviceF)rh   r   r   rg   attention_override)rX  sdpar   xformers	flashattnz\Overrides the used attention implementation, requires the respective library to be installednode_idcategorydescriptionis_experimentalinputsoutputs)r   r   r  r  r   SchemaComboInputBooleanModelOutput)clsgguf_modelsr  r  s       r   define_schemaGGUFLoaderKJ.define_schema  s   	'&88EK$0$B$B?$S!0E$b0E1XYX_X_XaIaQ0E!$b
 yy", U |[A1;;^bhai;isy  DX  Y8o  zC  D}6m  xA  B

  !2E B

  !;U  Ux   y3=r  }C  Mk  l	 XX__&(
 	
 %c 	'K$&!	's(   0E/ E*E*E/ *E/ /F Fc                     [         R                  R                  R                  R                  nUR
                  " U0 UD6$ rx   )r   r9  r:  r;  r   rz   r{   s       r   attention_override_pytorch'GGUFLoaderKJ.attention_override_pytorch  s3    		))33EE(($9&99r   c                     [         R                  R                  R                  R                  nUR
                  " U0 UD6$ rx   )r   r9  r:  r;  rV   rz   r{   s       r   r~   $GGUFLoaderKJ.attention_override_sage  s3    		))33BB(($9&99r   c                     [         R                  R                  R                  R                  nUR
                  " U0 UD6$ rx   )r   r9  r:  r;  attention_xformersrz   r{   s       r   attention_override_xformers(GGUFLoaderKJ.attention_override_xformers  s3    		))33FF(($9&99r   c                     [         R                  R                  R                  R                  nUR
                  " U0 UD6$ rx   )r   r9  r:  r;  r   rz   r{   s       r   r   %GGUFLoaderKJ.attention_override_flash  s3    		))33CC(($9&99r   )r^  r   r_  r`  c                    [         R                  R                  5        H[  u  pUR                  S5      (       d  UR                  S5      (       d  M3  [	        US5      (       d  MF  [	        US5      (       d  MY  Us  $    [
        R                  R                  [        R                  S   S   S   S5      nSSSSX3R                  5       4 H  n [        R                  " U5      nUs  $    [        S	5      e! [         a     M8  f = f)
z*Import GGUF module with version validationzComfyUI-GGUFzcomfyui-ggufr  nodescustom_nodesr   zcustom_nodes.ComfyUI-GGUFzcustom_nodes.comfyui-ggufzeCompatible ComfyUI-GGUF not found. Please install/update from: https://github.com/city96/ComfyUI-GGUF)sysr:  r  endswithr   ospathjoinr   folder_names_and_pathsr  	importlibimport_moduler   )rn  r  mod	gguf_pathmodule_namemodules         r   _get_gguf_moduleGGUFLoaderKJ._get_gguf_module  s     ))+HC||N++s||N/K/K3&&73+@+@J ,
 GGLL!D!D^!TUV!WXY!Z\jk	*,GYtv  BQ  BQ  BS  TK"00= T Q
 	
  s   C55
DDc                   ^ U R                  5       nUR                  R                  5       mU4S jn	U	" SU5        U	" SU5        0 n
[        R                  " SU5      n UR
                  R                  U5      u  pUGb  US:w  Ga
  UR                  S5      (       a6  [        R                  " SU5      n UR
                  R                  U5      u  pOSUR                  5       ;   a  [        R                  " SU5      n[        R                  R                  U5      n[        R                  R                  U5      nUS	:X  a2  [        R                  R                  UUS
0SS9n[!        U5      S:  a  UnO[#        S5      eUR%                  U5        [        R&                  R)                  UST0U
R+                  S0 5      S9nUc  [-        SU 35      eUR.                  R0                  R3                  U5      nUUl        X`R6                  ;   a  U R6                  U   UR8                  S   S'   U(       ah  [;        [<        R>                  R@                  RB                  S5      (       a*  S[<        R>                  R@                  RB                  l"        Og[-        S5      e[;        [<        R>                  R@                  RB                  S5      (       a)  S[<        R>                  R@                  RB                  l"        [F        RH                  " U5      $ ! [         a    UR
                  R                  U5      n GNf = f! [         a    UR
                  R                  U5      n GNf = f)Nc                    > US:X  a  [        TR                  U S 5        g US:X  a  [        TR                  X5        g [        TR                  U [        [        U5      5        g )Nrh   rZ  )r  Linearr  rJ   )attrr  r  s     r   set_linear_dtype.GGUFLoaderKJ.execute.<locals>.set_linear_dtype  sH    	!

D$/("

D0

D'%*?@r   rY  r[  unetrX  z.ggufr	  r  zmodel.diffusion_model.r  Tr  r   z%Extra model must also be a .gguf filecustom_operationsr  )r   r  z'ERROR: Could not detect model type of: r   r   r   r   F)%r  r  GGMLOpsr   get_full_pathloadergguf_sd_loaderr  r  r  r   r  r   r"  r#  r$  r  r  r!  r   r%  rH   r   r~  GGUFModelPatcherr   r\  ATTENTION_OVERRIDESr   r   rJ   r   r   r   r   r   
NodeOutput)rn  r
  rW  rY  r[  r\  r]  r   
gguf_nodesr  extrar  r   extra_model_full_pathextra_modelrU   r+  temp_sdrj   r  s                      @r   executeGGUFLoaderKJ.execute  s   ))+
nn$$&	A 	-84 !//
C
	>"))88DIB ',<,F((11(4(B(B6K[(\%Z%/%6%6%E%EF[%\NK  0 6 6 88(4(B(B?Td(e%#kk99:OP).)>)>)Z)Z[f)g&)-EE#kkCCKRhjlQm{C  AG7|a'&- !HIIIIk"882C8599ZY[C\ 9 
 =!HUVV  1177> / !8!88Y\YpYp  rD  ZEE 567UV#u~~**113LMMEI##**B"#noou~~**113LMMEJ##**B}}U$$Y  	>""11*=B	> ! Z","3"3"B"BCX"YKZs$   L 4L9 %L65L69%M"!M"r   N)r   r   r   r   r   rp  rs  r~   ry  r   r  r  r  r   r   r   r   rT  rT    sh    
 
8::::
 ++/-	 
 
( @% @%r   rT  )flex_attention	BlockMaskc                   :    \ rS rSr\S 5       rSrSrSrSr	S r
Srg	)
NABLA_AttentionKJi*  c                 ^    SSSSS04SSSS	S
.4SSSSS
.4SSSSS
.4SSSSSS.4SSSS.4S.0$ )Nrm   re   r  ri   z!Only used to get the latent shaperm     r,   zTemporal attention window sizer9  rG   zSpatial attention window sizer  ?r   r  r  r  rk   Tz0Most likely required for reasonable memory usagerg   )rj   r  window_timewindow_widthwindow_heightsparsitytorch_compiler   rp   s    r   rr   NABLA_AttentionKJ.INPUT_TYPES+  su    )-P!QR!r!Hh#ij"!Hg$hi#1Ih%ij c#cSW"XY'TFx)yz
 	
 		
r   re   rt   zExperimental node for patching attention mode to use NABLA sparse attention for video models, currently only works with Kadinsky5ru   c                    ^ [         b  [        c  [        S5      eUR                  5       nUS   n	[	        XXTU5      n
[        U
5      mU4S jnU(       a  [        R                  " USSS9nXR                  S   S'   U4$ )	NzScan't import flex_attention from torch.nn.attention, requires newer pytorch versionr  c                    > T" U0 UD6$ rx   r   )r|   r   r1   nabla_attentions      r   attention_override_nabla9NABLA_AttentionKJ.patch.<locals>.attention_override_nablaG  s    "D3F33r   re  T)ri  rl  r   r   )	r  r  r   r   get_sparse_paramsNABLA_AttentionrJ   r  r   )r   rj   r  r  r  r  r  r  r   r  sparse_paramsr  r  s               @r   rt   NABLA_AttentionKJ.patch=  s    !Y%6tuukkm#)']ef)-8	4 ',}}5MTpz~'$ \t!!"789WX|r   r   N)r   r   r   r   r   rr   r   r   r   r   rt   r   r   r   r   r  r  *  s2    

 

 LH VK%Hr   r  c                   *    \ rS rSrS rS rSS jrSrg)r  iS  c                     Xl         g rx   r  )r   r  s     r   r^  NABLA_Attention.__init__T  s    *r   c                 2   UR                   S   S:  d  UR                   S   S:  a  [        XX440 UD6$ U R                  XU R                  S   U R                  S   S9n[	        XX6S9R                  SS5      R                  5       R                  SS	5      nU$ )
NrR  i  sta_maskP)thr)
block_maskr,   r-   r=   )rN   r   	nablaT_v2r  r  r0   
contiguousr  )r   r   r   r   rD   r1   r  r3   s           r   __call__NABLA_Attention.__call__W  s    772;t!3&qQ@@@^^A$*<*<Z*HdN`N`adNe^f
Q1<FFq!LWWYaabdfhi
r   c           	      (   SnUR                   u  pgpX-  n
UR                  XgXU	5      R                  S5      nUR                  XgXU	5      R                  S5      R                  SS5      nX-  n[        R
                  " U[        R                  " U	5      -  SS9nUR                  S5      u  pUR                  S5      nUSU-
  :  R                  5       nUR                  SUR                  S5      5      n[        R                  " UU5      nUR                  S5      R                  [        R                   5      nUR                  SSS9R                  [        R                   5      n["        R$                  " [        R&                  " U5      UUUUS S9$ )	Nr   rR  r=   rS  r,   T)rT  
descending)
BLOCK_SIZEmask_mod)rN   rR   r[  r0   rJ   rU  mathsqrtsortcumsum_r1  gatherargsort
logical_orrZ  rL   int32r  from_kv_blocks
zeros_like)r   r   r   star  r  rK  hrD  Ds1qakarO   valsindscvalsr7   kv_nbkv_indss                       r   r  NABLA_Attention.nablaT_v2^  sK   
WW
a_YYqRQ/44R8YYqRQ/44R8BB2rJgmmC$))A,.B7XXb\
R S %%'{{2t||B/0c* ,,,2$,7::5;;G''(8(8(?%QXeoz~r   r  Nr  )r   r   r   r   r^  r  r  r   r   r   r   r  r  S  s    +@r   r  c                 J   [         R                  " XU/5      R                  5       n[         R                  " SUS[         R                  [
        R                  " 5       S9nUR                  S5      UR                  S5      -
  R                  5       nUS U 2S U 24   R                  5       US U2S U24   R                  5       US U2S U24   R                  5       pn	XS-  :*  n	XS-  :*  n
XS-  :*  nU
R                  S5      UR                  S5      -  R                  XX"5      R                  SS5      R                  5       nU	R                  S5      UR                  S5      -  R                  X X-  X-  5      R                  SS5      nUR                  X-  U-  X-  U-  5      $ )Nr   r,   r   r-   )rJ   Tensoramaxarangeint16r  r  rQ   absr  rR   r0   )rC  HWwTwHwWlrmatsta_tsta_hsta_wsta_hwr  s                 r   fast_sta_nablar  u  s   aAY$$&AQ1EKK8K8K8MNA;;q>AKKN*
/
/
1CBQBFBQBFBQBF E
 1WE1WE1WEooa 5??1#55>>qQJTTUVXYZbbdF??1 0 0 33
<
<Q15!%
P
Z
Z[\^_
`C;;quqy!%!),,r   c           
          U R                   u  pVpxn	Sn
XzS   -  XS   -  XS   -  pn[        XxS-  U	S-  XU5      nUR                  S5      R                  S5      SUUUUSXxU	4SS.	nU$ )	N)r,   r-   r-   r   r,   r-   r   Ttopcdf)	r  
to_fractalr  r  r  r  add_stavisual_shapemethod)rN   r  
unsqueeze_)r2   r  r  r  r  rK  rF  rC  r  r  
patch_sizer  r  s                r   r  r    s    GGMA!J	]	]	] A
 aaa<H''*55a8q	
M r   )IOc                   T    \ rS rSr\S 5       r\R                  4rSr	Sr
SrSrS rSrg	)
StartRecordCUDAMemoryHistoryi  c                 l    S[         R                  4/ SQSSS.4/ SQSSS.4SS/SS	S.4S
SSSSS.4S.0$ )Nrm   )allstateNoner  zcNone: disable, 'state': keep info for allocated memory, 'all': keep history of all alloc/free callsrg   )r  r  allocr  znNone: no tracebacks, 'state': tracebacks for allocated memory, 'alloc': for alloc calls, 'all': for free callspythonzD'python': Python/TorchScript/inductor frames, 'all': also C++ framesrm  i i  i z#Maximum number of entries to record)rh   rq  rr  ri   )inputrN  r  stacksmax_entriesr  ANYrp   s    r   rr   (StartRecordCUDAMemoryHistory.INPUT_TYPES  s     &&4%  Uz  7{  |=5  ^N  @O  P$e,%  MS  /T  U %6$x  eJ  (K   L
 	
r   r  output_pathstartKJNodes/memoryzTHIS NODE ALWAYS RUNS. Starts recording CUDA memory allocation history, can be ended and saved with EndRecordCUDAMemoryHistory. c                 
   [         R                  " 5         [        R                  R	                  [         R
                  " 5       5        [        R                  R                  R                  UUS:w  a  UOS US:w  a  UOS US9  U4$ )Nr  )r  rN  r  r  )r  soft_empty_cacherJ   r   reset_peak_memory_statsr  memory_record_memory_history)r   r  rN  r  r  r  s         r   r  "StartRecordCUDAMemoryHistory.start  sm    


**2+>+>+@A

00#&&0Gd&&0Gd	 	1 	
 vr   r   N)r   r   r   r   r   rr   r  r
  r   r  r   r   r   r  r   r   r   r   r  r    s?    
 	
 	
 FF:L,LHH UK	r   r  c                   V    \ rS rSr\S 5       r\R                  S4rSr	Sr
SrSrS rS	rg
)EndRecordCUDAMemoryHistoryi  c                 8    S[         R                  4SSS0S4S.0$ )Nrm   r  rh   comfy_cuda_memory_historyz\Base path for saving the CUDA memory history file, timestamp and .pt extension will be addedr  r	  rp   s    r   rr   &EndRecordCUDAMemoryHistory.INPUT_TYPES  s5    ffY$y2M&N  Qo  p
 
 	
r   r  r  endr  zRecords CUDA memory allocation history between start and end, saves to a file that can be analyzed here: https://docs.pytorch.org/memory_viz or with VisualizeCUDAMemoryHistory nodec                 :   [         R                  " 5         [        R                  R                  5       R	                  S5      nU U S3n[
        R                  R                  R                  U5        [
        R                  R                  R                  S S9  X4$ )Nz%Y%m%d_%H%M%Sz.pt)rN  )
r  r  datetimenowstrftimerJ   r   r  _dump_snapshotr  )r   r  r  times       r   r  EndRecordCUDAMemoryHistory.end  sy    
  $$&//@$dV3/

((5

000>!!r   r   N)r   r   r   r   r   rr   r  r
  r   r  r   r   r   r  r   r   r   r   r  r    s@    
 
 FFH&L,LHH IK"r   r  )PromptServerc                   B    \ rS rSr\S 5       rSrSrSrSr	Sr
SrS	 rS
rg)VisualizeCUDAMemoryHistoryi  c                     SS0SS0S.$ )Nsnapshot_pathr  	unique_id	UNIQUE_ID)rm   hiddenr   rp   s    r   rr   &VisualizeCUDAMemoryHistory.INPUT_TYPES  s$     \
 [
 	
r   r  )r  	visualizer  zBVisualizes a CUDA memory allocation history file, opens in browserTc                    SS K nSSKJn  SS KnSSKJn  U" 5       n[        US5       nUR                  U5      n	S S S 5        UR                  W	5      n
SUR                  5       R                   S3n[        R                  R                  USU5      n[        R                  " [        R                  R                  U5      SS	9  [        US
SS9 nUR!                  U
5        S S S 5        SU S3nU(       a+  ["        b$   ["        R$                  R'                  UU5        U4$ U4$ ! , (       d  f       N= f! , (       d  f       NZ= f!    U4$ = f)Nr   )_memory_viz)get_output_directoryrbcuda_memory_history_z.htmlmemory_historyT)exist_okwzutf-8)encodingz4http://localhost:8188/api/view?type=output&filename=z&subfolder=memory_history)pickle
torch.cudar.  uuidr   r/  openr   
trace_plotuuid4hexr  r  r  makedirsdirnamewriter#  instancesend_progress_text)r   r'  r(  r6  r.  r8  r/  
output_dirfsnapshothtmlhtml_filenamer  api_urls                 r   r,  $VisualizeCUDAMemoryHistory.visualize  s   *5)+
-&!{{1~H ' %%h/.tzz|/?/?.@Fggll:/?O
BGGOOK04@+sW5GGDM 6 IWpq 1%%88 xwx/ '& 65xs#   D$D5> E $
D25
EEr   N)r   r   r   r   r   rr   r   r  r   r   r   OUTPUT_NODEr,  r   r   r   r   r%  r%    s9    
 
 L#LHHVKKr   r%  c                   >    \ rS rSr\S 5       rSrSrSrSr	Sr
S rS	rg
)ModelMemoryUseReportPatchi  c                     SSS00$ )Nrm   rj   re   r   rp   s    r   rr   %ModelMemoryUseReportPatch.INPUT_TYPES  s    Z
  	r   re   rt   zDAdds callbacks to model to report memory usage during after samplingTr  c                    ^ UR                  5       n[        R                  " 5       mU4S jnU4S jnUR                  [        R
                  U5        UR                  [        R                  U5        U4$ )Nc                 D   > [         R                  R                  T5        g rx   )rJ   r   r  )rj   r   s    r   reset_mem_usage8ModelMemoryUseReportPatch.patch.<locals>.reset_mem_usage"  s    JJ..v6r   c                    > [         R                  R                  T5      S-  n[         R                  R                  T5      S-  n[        R
                  " SUS S35        [        R
                  " SUS S35        g )Ni   @z*Sampling max allocated memory: max_memory=z.3fz GBz+Sampling max reserved memory: max_reserved=)rJ   r   max_memory_allocatedmax_memory_reservedrY   rZ   )rj   
max_memorymax_reservedr   s      r   report_mem_usage9ModelMemoryUseReportPatch.patch.<locals>.report_mem_usage$  sg    88@7JJ ::99&AGKLLLF:s:K3OPLLG,9LCPQr   )r   r  r  rE  r`   rF  rG  )r   rj   r   rP  rW  r   s        @r   rt   ModelMemoryUseReportPatch.patch  s^    kkm$$&	7	R 	  !7!7I  !7!79IJ~r   r   Nr   r   r   r   rK  rK    s4     
 LHXKLHr   rK  c                   <    \ rS rSrS rS\R                  4S jrSrg)MemoryUsageFactorAdjustWrapperi0  c                     Xl         X l        g rx   memory_usage_factororiginal_factor)r   r^  r_  s      r   r^  'MemoryUsageFactorAdjustWrapper.__init__1  s    #6 .r   noise_shapec                    UR                  5       nU R                  UR                  l        [        R                  " SU R                   35         U" Xc/UQ70 UD6n[        R                  " SU R
                   35        U R
                  UR                  l        U$ ! [        R                  " SU R
                   35        U R
                  UR                  l        f = f)Nz'Temporarily set memory usage factor to zGModel memory usage calculated, restoring original memory usage factor: )r   r^  rj   rY   rZ   r_  )r   r  rj   ra  r   r1   r  results           r   r  'MemoryUsageFactorAdjustWrapper.__call__5  s    KKM&*&>&>#>t?W?W>XYZ	?a>t>v>FLLbcgcwcwbxyz*.*>*>AGG' LLbcgcwcwbxyz*.*>*>AGG's   B A Cr]  N)	r   r   r   r   r^  rJ   r  r  r   r   r   r   r[  r[  0  s    /	U\\ 	r   r[  c                   >    \ rS rSr\S 5       rSrSrSrSr	Sr
S rS	rg
)ModelMemoryUsageFactorOverridei@  c                     SSSSSSSS.4S	.0$ )
Nrm   re   r  r  r   r  r  r  )rj   r^  r   rp   s    r   rr   *ModelMemoryUsageFactorOverride.INPUT_TYPESA  s*    $+SQV`e-f#g
  	r   re   rt   z?Overrides the memory usage factor of the model during sampling.Tr  c                    UR                  5       nUR                  R                  n[        R                  " SU 35        [        X$5      nUR                  [        R                  R                  R                  SU5        U4$ )NzOriginal memory usage factor: +memory_usage_factor_adjust_prepare_sampling)r   rj   r^  rY   rZ   r[  r  r   r  r  PREPARE_SAMPLING)r   rj   r^  r   original_memory_usage_factorwrappers         r   rt   $ModelMemoryUsageFactorOverride.patchN  ss    kkm'2'8'8'L'L$56R5STU01Dc((##..??9	

 ~r   r   Nr   r   r   r   rf  rf  @  s4      LHSKLHr   rf  c                    UR                   S   U R                  :  ay  [        R                  " XR                  SS9n/ nU H;  nUR                  [        R                  R                  R                  X5      5        M=     [        R                  " USS9nU$ [        R                  R                  R                  X5      $ Nr,   rS  )
rN   dim_thresholdrJ   r  
num_chunksr  nn
Sequentialforwardr  )r   r2   chunksoutput_chunksr  chunkeds         r   wan_ffn_chunked_forwardry  [  s    wwqzD&&&QQ7E  !4!4!<!<T!IJ ))Mq1xx""**433r   c                   (    \ rS rSrSS jrSS jrSrg)WanffnChunkPatchif  c                     Xl         X l        g rx   rr  rq  r   rr  rq  s      r   r^  WanffnChunkPatch.__init__g      $*r   Nc                 <   ^  U 4S jn[         R                  " X15      $ )Nc                 f   > TR                   U l         TR                  U l        [        U /UQ70 UD6$ rx   )rr  rq  ry  rn  s      r   wrapped_forward1WanffnChunkPatch.__get__.<locals>.wrapped_forwardl  s2    %)__K"(,(:(:K%*;HHHHr   rr  r   rv  rw  r  s   `   r   rx  WanffnChunkPatch.__get__k  s    	I 55r   rq  rr  )   rx   r{  r   r   r   r{  r{  f  s    +6r   r{  c                   P    \ rS rSr\S 5       r\S\R                  4S j5       rSr	g)WanChunkFeedForwardir  c                 .   [         R                  " SSSSS[         R                  R                  S5      [         R                  R                  SSS	S
S	SS9[         R                  R                  SSSSSSS9/[         R                  R                  SS9/S9$ )Nr  zWan Chunk FeedForwardr  zhEXPERIMENTAL AND MAY CHANGE THE MODEL OUTPUT!! Chunks feedforward activations to reduce peak VRAM usage.Trj   rv  r-   r,   rc   zUNumber of chunks to split the feedforward activations into to reduce peak VRAM usage.rp  rq  r  rn  i @     z2Dimension threshold above which to apply chunking.display_namerb  r  rc  rd  re  rf  rg  )r   rh  rl  rj  Intrm  rn  s    r   rp  !WanChunkFeedForward.define_schemat  s    yy)0" C w'XqaSq  Sj  k_d%VY  dX  Y W5
 	
r   returnc                 n   US:X  a  [         R                  " U5      $ UR                  5       nUR                  S5      n[	        UR
                  5       HJ  u  pg[        X#5      R                  UR                  UR                  5      nUR                  SU S3U5        ML     [         R                  " U5      $ )Nr,   r}  r  z.ffn.forward)r   r  r   r  r  r  r{  rx  ffnr  r  )	rn  rj   rv  rq  r   r}  r  r  patched_ffns	            r   r  WanChunkFeedForward.execute  s    Q;==''kkm%667HI#O$:$:;JC*6AII%))UZUdUdeK((+B3%|)TVab < }}[))r   r   N
r   r   r   r   r   rp  r   r  r  r   r   r   r   r  r  r  s3    
 
" *bmm * *r   r  )FeedForward)r  c                 B   UR                   S   U R                  :  ah  U R                  S:  aX  [        R                  " XR                  SS9 Vs/ s H  n[
        R                  " X5      PM     nn[        R                  " USS9$ [
        R                  " X5      $ s  snf rp  )rN   kj_dim_thresholdkj_num_chunksrJ   r  _Ideogram4FeedForwardru  r  )r   r2   cr3   s       r   ideogram4_ffn_chunked_forwardr    s    wwqzD)))d.@.@1.D?D{{1N`N`fg?hi?h!$,,T5?hiyy!$$ ((11 js    Bc                   $    \ rS rSrS rSS jrSrg)Ideogram4FFNChunkPatchi  c                     Xl         X l        g rx   r}  r~  s      r   r^  Ideogram4FFNChunkPatch.__init__  r  r   Nc                 <   ^  U 4S jn[         R                  " X15      $ )Nc                 f   > TR                   U l        TR                  U l        [	        U /UQ70 UD6$ rx   )rr  r  rq  r  r  rn  s      r   r  7Ideogram4FFNChunkPatch.__get__.<locals>.wrapped_forward  s2    (,K%+/+=+=K(0NtNvNNr   rr  r  s   `   r   rx  Ideogram4FFNChunkPatch.__get__  s    	O 55r   r  rx   r{  r   r   r   r  r    s    +6r   r  c                    US   R                  U R                  5      nUS   R                  U R                  5      nUS   R                  U R                  5      nX-  nUR                  S   S-  nUSS U24   R                  U SUS 24   U5        USUS 24   R                  U SS U24   U5        X-  nUR                  S   S-  n	USS U	24   R                  USU	S 24   U5        USU	S 24   R                  USS U	24   U5        Xh4$ )Nr   r,   r-   r=   .)rL   rI   rN   addcmul_)
xqxk	freqs_ciscossinnsinq_embedqsk_embedkss
             r   _ideogram4_apply_rope_lowpr    s   
A,//"((
#C
A,//"((
#CQ<??288$DhG	r	a	BC"Hr#rs(|T2CHr#ss(|S1hG	r	a	BC"Hr#rs(|T2CHr#ss(|S1r   c           
         UR                   u  pVnU R                  U5      R                  XVSU R                  U R                  5      R                  SS9u  pn
U R                  U5      R                  SS5      nU R                  U	5      R                  SS5      n	U
R                  SS5      n
[        XU5      u  p[        XXR                  USUS9nU R                  U5      $ )NrG   r-   rS  r,   T)r8   r   )rN   qkvr?   r4  r5  unbindr-  r0   r.  r  _ideogram4_attnr<  )r   r2   r   r  r   rI  seq_lenrU   r   r   r   r3   s               r   %ideogram4_attention_lowp_rope_forwardr    s    WWJhhqkzAt~~t}}U\\ab\cGA!A  A&AA  A&A	AqA%aI6DA
!>>94ex
yC66#;r   c                       \ rS rSrSS jrSrg)Ideogram4RopePatchi  Nc                 8    [         R                  " [        U5      $ rx   )rs  rt  r  )r   rv  rw  s      r   rx  Ideogram4RopePatch.__get__  s     EsKKr   r   rx   )r   r   r   r   rx  r   r   r   r   r  r    s    Lr   r  c                   P    \ rS rSr\S 5       r\S\R                  4S j5       rSr	g)Ideogram4OptimizationsKJi  c                    [         R                  " SSSSS[         R                  R                  S5      [         R                  R                  SSSS	9[         R
                  R                  S
SSSSSS9[         R
                  R                  SSSSSSS9[         R                  R                  SSSS	9/[         R                  R                  SS9/S9$ )Nr  zIdeogram4 Optimizations KJru   a  EXPERIMENTAL AND MAY CHANGE THE MODEL OUTPUT!! Reduces peak VRAM of the Ideogram4 forward. chunk_ffn splits the SwiGLU activations over the token dim; bf16_rope applies RoPE in the model dtype instead of upcasting to fp32. Both target the two largest transient tensors in the block.Trj   	chunk_ffnz_Chunk the feedforward activations over the sequence dim to cap the (B, L, hidden) intermediate.rg   
ffn_chunksr-   r,   r   zjNumber of chunks to split the feedforward sequence into. More chunks = lower peak, slightly more overhead.rp  ffn_seq_thresholdrn  r  i   z[Only chunk when the token sequence length exceeds this (skips chunking for tiny sequences).	bf16_ropeznApply RoPE in the input dtype instead of fp32. ~Halves RoPE activation memory; matches the HF reference dtype.r  r  )r   rh  rl  rj  rk  r  rm  r  s    r   rp  &Ideogram4OptimizationsKJ.define_schema  s    yy.5+z !w'

  d *K ! L\1!! &R  S0$CUY\ &C  D

  d *Z ! [
 W5'
 	
r   r  c                    U(       d  U(       d  [         R                  " U5      $ UR                  5       nUR                  S5      n[	        USS 5      nU(       a(  [        US   S5      (       a  [        US   S5      (       d,  [        R                  " S5        [         R                  " U5      $ [        U5       H  u  pU(       aU  US:  aO  [        X45      R                  U
R                  U
R                  R                  5      nUR                  SU	 S	3U5        U(       d  Mj  [        5       R                  U
R                  U
R                  R                  5      nUR                  SU	 S
3U5        M     [         R                  " U5      $ )Nr}  r  r   feed_forwardr;  zIdeogram4OptimizationsKJ: model does not look like Ideogram4 (expected diffusion_model.layers[*].feed_forward/.attention); returning model unchanged.r,   zdiffusion_model.layers.z.feed_forward.forwardz.attention.forward)r   r  r   r  r  r   rY   r  r  r  rx  r  r  r  r  r;  )rn  rj   r  r  r  r  r  r}  r  r  r  r  r  s                r   r   Ideogram4OptimizationsKJ.execute  sP   ==''KKM,,->?(D9WVAY??wvVWyZeGfGfOO w x==''#F+JCZ!^4ZS[[\a\n\npu  qC  qC  qM  qM  N""%<SEAV#WYdey13;;EOOU__MfMfg""%<SEAS#TVbc , }}Qr   r   Nr  r   r   r   r  r    s6    
 
2  TVTaTa    r   r  )KSAMPLER)to_dc                    Uc  0 OUnUR                  UR                  S   /5      nUb9  [        R                  " [        R                  " S5      5      R                  U5      n[        [        U5      S-
  USS9n[        [        U5      S-
  5       GH2  nUR                  US5      nUS:  nU(       a  US-   OSnX/   X/S-      nnS nS nS nS nS nSn[        U5       GH  nUS:  a-  UR                  SU S[        U5      S-
   S	US-    SU S
3	5        U(       a  Un  GOy[        R                  " UR                  [        R                  " S5      WS9R                  U5      nUS:X  a  UOSU-
  U-  UU-  -   nUS:  a  UnU " UX/   U-  40 UD6nUb  U" XX/   X/   US.5        [        UUU5      nUUU-
  U-  -   n UR                  S:X  a  U
b  [        R                   " U
SS  5      n!US S 2S S 2S U!24   R#                  UR                  S   /[%        U
5      SS  -   5      n"U S S 2S S 2S U!24   R#                  UR                  S   /[%        U
5      SS  -   5      n#US S 2S S 2U!S 24   n$U S S 2S S 2U!S 24   n%U	(       a0  [        R&                  " SU"R                   SU$R                   35        OUn"U n#S n$S n%U(       GaR  UGbN  U"U-
  n&[        R(                  " [        R*                  " U&S-  SS95      U"R                  S   -  n'U'U:  n(U	(       a  [        R&                  " SU S[        U5      S-
   SUS-    SU S3	5        [        R&                  " SU'R-                  5       S SU'R/                  5       S SU 35        [        R&                  " SU(R+                  5        SU(R1                  5        SU(R+                  5       U(R1                  5       -  S 35        Ub  U(U-  n(U(R+                  5       U(R1                  5       -  U:  a(  SnU	(       a  [        R&                  " U SU SU S35        U(R3                  5       R5                  S5      n)U)U-  SU)-
  U#-  -   n#U)U-  SU)-
  U"-  -   n"U%b  U R7                  5       n U#R#                  U#R                  S   U R                  S   S/5      U S S 2S S 2S W!24'   UR7                  5       n*U"R#                  U"R                  S   UR                  S   S/5      U*S S 2S S 2S U!24'   OU#n U"n*U(nU"nU*nU#nU nOaU(       aZ  U%bK  UR7                  5       n*U"R#                  U"R                  S   UR                  S   S/5      U*S S 2S S 2S W!24'   OU"n*S nU"nU*nU#nU nU(       a  UUS-
  :X  a  UnGM  U(       a  GM  U nGM     UR9                  S5        US:X  d  GM!  UR                  S5        GM5     UR;                  5         U$ )Nr   cpur,   Sampling)totalr]   descFzStep /z
 (substep ))r   	generatorr  )r2   r  sigma	sigma_hatdenoisedrG   zVideo shape: z, Audio shape: r-   rS  z	 substep :zUncertainty: min z.4fz, max z, threshold zCertain pixels: z = Tz: Certain region is more than z, we are certainr=   )new_onesrN   rJ   	Generatorr   manual_seedr   r  r  rH   set_descriptionrandnrL   r  rP   r  prodrR   listr?  r  rZ  rq  rr  numelfloatrQ   r   r!  close)+rj   r2   rF  stochastic_step_mapcertain_percentageuncertainty_thresholdr&  callbackr]   verbosevideo_shapeseedsigma_inr  pbarr  current_num_anneal_stepsuse_stochasticr  r  
sigma_nextprev_certain_maskprev_denoisedprev_denoised_fullprev_x_nextprev_x_next_video
is_certainiinoisex_inr  r/  x_nextcutdenoised_videox_next_videodenoised_audiox_next_audiodiffuncertaintycertain_maskcertain_mask_floatdenoised_fulls+                                              r   sample_selfrefinevideor  	  s{   !)zJzz1771:,'HOOELL$78DDTJ	c&kAowZHD3v;?# $7#:#:1a#@ 1A5,:$q("Iv!e}z ! 
(B1u$$uQCqVQz"Q$qQRPSST%UV KKU0CyY\\]^_Ea1cEk5G%G%RW-%WDAvT69x#7F:FH#q6969bjkl Qx(A*u,11F vv{{6iiAB0!)!Q*!5!=!=x~~a?P>QTXYdTefgfhTi>i!j%aDSDj1998>>!;L:MPTU`PabcbdPe:ef!)!Q*!5%aCDj1JJ~/C/C.DOTbThThSijk!)%!%# -";%5#jj419!)DEH\H\]^H__*-BBJJqc3v;q=/2a4&!ANOJJ!2;??3DS2IP[P_P_PabeOffr  tI  sJ   K  LJJ!1,2B2B2D1EQ|GYGYG[F\\_`l`p`p`rs  tF  tF  tH  aH  IL  `M   N  O %0#/2C#CL  ##%(:(:(<<?QQ!%J

bT+C*DDbcubv  wG  $H  I &2%7%7%9%C%CA%F"14EEOaIaeqHqq!3m!CsM_G_cqFq!q  +#\\^F)5)=)=|?Q?QRS?TV\VbVbcdVegi>j)kF1a#:&$,NN$4M0>0F0FH\H\]^H_aiaoaopqartvGw0xM!Q*- *F$2M$0! .%2"$0!$+$,NN$4M0>0F0FH\H\]^H_aiaoaopqartvGw0xM!Q*-$2M$(! .%2"$0!$ "A+#^K N 	A6  ,s $t 	JJLHr   c                   T    \ rS rSr\S 5       r\SS\R                  4S jj5       rSr	g)SamplerSelfRefineVideoi  c                    SS/n/ n/ n[        SS5       H  nXS-
     u  pVnUR                  [        R                  R	                  SU 3USSSSU 3S	9[        R                  R	                  S
U 3USSSSU 3S	9[        R                  R	                  SU 3USSSSU 3S	9/5        M     UR                  [        R                  R                  SUS95        / n[        SS5       H  nXS-
     u  pVnUR                  [        R                  R	                  SU 3USSSSU 3S	9[        R                  R	                  S
U 3USSSSU 3S	9[        R                  R	                  SU 3USSSSU 3S	9/5        M     UR                  [        R                  R                  SUS95        UR                  [        R                  R                  S[        R                  R	                  SSSSS9/S95        [        R                  " SSSS[        R                  R	                  SUSS9[        R                  R	                  SS S!S"S#S$S%S&9[        R                  R	                  S'S(S!S"S)S$S*S&9[        R                  R	                  S+S$S,S-9[        R                  R	                  S.SS/S09[        R                  R	                  S1SSS2SS3S	9/[        R                  R                  5       /S49$ )5N)r-      rG   )      r,   r,   rG   
start_stepr   i  zStart step for range rp  end_stepzEnd step for range steps_rc   zNumber of P&P steps for range z2 ranges)r  rf  r-   z1 rangefrom_stringstochastic_planz2-5:3,6-14:1Tz=Format: 'start-end:steps,start-end:steps' e.g. '2-5:3,6-14:1')rh   r  ri   r  zKJNodes/samplerszmAttempt to implement https://github.com/agwmon/self-refine-video, for testing only, MAY NOT WORK AS INTENDED.
input_modezHow to configure the step plan)r  ri   r  +?r   r  r  FzYPercentage of certain pixels to consider the frame as certain and skip further refinement)rh   rq  rr  rs  roundri   r  r  r  z6Threshold of uncertainty to consider a pixel uncertainr  z&Enable verbose logging during samplingrg   r  zHOptional latent input to get input shape for LTX2 audio/video separation)rn   ri   r  l    zSeed for stochastic samplingra  )r  extendr   r  rj  r  DynamicComboOptionStringrh  Floatrk  LatentSamplerrm  )	rn  default_rangesr  range_inputs_2r  start_defaultend_defaultsteps_defaultrange_inputs_1s	            r   rp  $SamplerSelfRefineVideo.define_schema  sn    

  q!A8F1u8M5M!!z!-}!QT[\h}~  ~A  gB  Cxs^[aSWXdwxywzb{|vaS\=aSWX  eC  DE  CF  cG  H#   	r--*^-TU q!A8F1u8M5M!!z!-}!QT[\h}~  ~A  gB  Cxs^[aSWXdwxywzb{|vaS\=aSWX  eC  DE  CF  cG  H#   	r--)N-ST 	r--		%*"[	    . 

 
	 yy,' H %%lGMm%n3UQT[`hm  xS  T6RU\`hm  xp  q

  ECk l		4  BL  MVQA;MTU_}~ ZZ&&()
 	
r   Nr  c           
         S nUb  US   R                   n[        UR                  5        Vs/ s H  oR                  S5      (       d  M  UPM     sn5      n	0 n
SU;   a  US   nUR	                  S5      nU H  nUR                  5       nU(       d  M   UR	                  S5      u  pUR	                  S5      u  nn[        U5      [        U5      [        U5      nnn[        UUS-   5       H  nUU
U'   M
     M     OUR                  5        Vs/ s H  oR                  S5      (       d  M  UPM     n	nU	 H{  nUR                  SS
5      nUR                  SU 35      nUR                  SU 35      nUR                  SU 35      nUc  MV  Uc  M[  Uc  M`  [        UUS-   5       H  nUU
U'   M
     M}     [        [        U
UUUUUS.5      n[        R                  " U5      $ s  snf ! [         a    [        SU S	35      ef = fs  snf )Nr  r	  r  r,  r  -r,   z$Invalid format in stochastic_plan: 'z%'. Expected format: 'start-end:steps'r  r
  r  )r  r  r  r  r  r  )rN   sortedr  
startswithr0  r2  r1  r  r  replacerH   r  r  r   r  )rn  r  r  r  r  r  r  r  r   
range_keysr  plan_strranges
range_spec
range_part
steps_partr  r  stepsr  	start_keyr  samplers                          r   r  SamplerSelfRefineVideo.execute  s    +11K
(9X(91\\,=WQ(9XY
 
*!"34H^^C(F$
'--/
!-7-=-=c-B*J!+!1!1#!6JE3(+E
CHc*o3E$UC!G438+C0  5 % &0__%6U%6,,|:T!%6JU'	%%lB7"A3'78 nnxs^4"s|4$U=N$UC!G438+C0  5 ( 1#6"4%:&4
  }}W%%M Y  " $'KJ<W|%}~~ Vs$   G 	G A#G%H*H%G?r   rx   r  r   r   r   r  r    s7    8
 8
t +&kmkxkx +& +&r   r  )g_}<ݭg	IG?g73pʌ?)g bGgX+ڿguʣaq?)gqR8gYh4?gI))gr#Dg($ɇg聏)g)ʥ?g1Tm7?gA?)gfegk) gx@)gCϿgOοgF%u)gu7Ou?g^L3?g23/ݷ?)g#F-Ŀg^Ϳg5&ļ)g%gTr3܀?gPf?rI   c                   >    \ rS rSr\S 5       rSrSrSrSr	Sr
S rS	rg
)PiDColorBiasCorrectioni	  c           	      .    SSSSSSSSS	.4S
/S
SS.4S.0$ )Nrm   re   r  r  g      4g      4@r  zdCorrection strength. 1.0 = full predicted bias subtracted. <1 = milder, >1 = stronger, 0 = disabled.rp  flux2ue   Calibrated PiD backbone (currently only flux2 — others use the same model but coefficients differ).rg   )rj   strengthbackboner   rp   s    r   rr   "PiDColorBiasCorrection.INPUT_TYPES	  sN     c%VZ /U#V W! 1X%Y Z	
  	r   re   rt   ru   Ta   PiD 4-step decoder color/brightness drift corrector. Subtracts a per-channel bias from x0_pred at the first sampling step, using a small linear model calibrated against the model's systematic drift (model tends to brighten dark scenes and add a blue cast).c                    ^^ TS:X  d  US:w  a  U4$ [         mUU4S jnUR                  5       nUR                  U5        U4$ )Nr   r2  c                 @  > U S   n U S   S   S   nU R                   " SU R                   " S5      5      nUb;  [        R                  " UR                  5       US   5      R	                  5       (       d  U$  T
R                  UR                  UR                  S	9nUR                  S
S9nUR                  S
S9n[        R                  " SUR                  UR                  S9n[        R                  " US   US   US   US   US   US   US   US   -  US   US   -  US   US   -  U/
5      nX-  n	UTU	R                  SSSS5      -  -
  $ ! [
        [        4 a?    U R                   " S5      nUb"  UR                  5       R	                  5       S:  a  Us $  GN%f = f)Nr  r   r   rA  r  r@  r   gffffff?r.  )r   r-   rG   rS  r  )r   rI   r,   r-   rG   )rH   rJ   iscloserr  rE  r  AttributeErrorrL   r   rI   r[  stdtensorstackr?   )r   r  rF  r  coefrgb_mrgb_sonefeatsbiascoef_cpur3  s             r   pid_bias_post_cfg7PiDColorBiasCorrection.patch.<locals>.pid_bias_post_cfg(	  s   J'H
$o./DEoV$((:*>?=eiik6!9(M(R(R(T(T#O )U ;;xhnn;EDMMiM0ELLYL/E,,s8??(..QCKKa%(E!Ha%(E!Ha58#U1Xa%8%(U1X:M	! E <Dh1aA)>>>># n- $)=EIIK$4$4$6$=#O %>$s   A-E A	FF)PID_BIAS_COEF_FLUX2r   #set_model_sampler_post_cfg_function)r   rj   r3  r4  rD  r  rC  s     `   @r   rt   PiDColorBiasCorrection.patch#	  sE    s?h'18O&	?: KKM	--.?@tr   r   N)r   r   r   r   r   rr   r   r   r   r   r   rt   r   r   r   r   r0  r0  	  s:      LH%HL	E $r   r0  r   )NNFr  )NNFN)r}  )   )rG   rG   rG   r  )r  r  NNNFNN)r  r  rY   rJ   r  r  r  r   r   comfy.model_managementmodel_managementr  comfy.cli_argsr   comfy.ldm.modules.attentionr   r   r   comfy.utilsr   comfy.sd	comfy.opscomfy_api.latestr   v3_availabler   r  ro   r_   comfy.patcher_extensionr`   rb   rM   r   r   r   r  r&  r-  r9  rJ  rX  r  r  r  r  r  weakref_kj_weakrefr  WeakKeyDictionaryr  r  r  r  r  r  r	  r  comfy.ldm.flux.mathr*  r?  einopsr@  r6  rG  rs  re  r~  comfy.ldm.lightricks.modelr  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r7  	ComfyNoderT  !torch.nn.attention.flex_attentionr  r  r  r  r  r  comfy.comfy_types.node_typingr  r  r  serverr#  r%  rK  r[  rf  ry  r{  r  comfy.ldm.lumina.modelr  r  r  r  r  r  r  r  r  r  comfy.samplersr  comfy.k_diffusion.samplingr  r  r  r;  rK   rF  r0  r   r   r   <module>ra     s   	 
        #  Y Y   k#L
 UEP 0 D "'5== 8v! !HF  F R 0,E EN$ $N $2 2j, ,^ w&R
 !#557 A	AF FR' 'R> >@ @ 4 +LN B T04 8 8& &P;% N (,$4d`b *B8 8! !F FH R
 WY "
D< EG &P Z\ D8 82/ /bK KZ4 4lI%2<< I%VK
& &R @  @D-"0 -   D" ".#1 1h >   6	4
6 
6 *",,  *H H U2
6 
6$ ^` L L
2 r|| 2 j $ +DLh&R\\ h&b ll%%%%%%%%%%$ 
 9 9sG  kLOOijkT  
  % $%D  NIh  LsZ   
L$ "M )M (M% )M6 $MMMM
M"!M"%
M32M36NN