
    3jAH                     H   S SK r S SKrS SKrS SKrS SKrS SKrS SKrS SKrS SKrS SK	r	S SKJ
r  S SKJrJr  S SKJr  S SKJr  S SKJrJr  \R,                  " S5      rSrS	rS
\S\4S jr " S S\R8                  5      r " S S\R8                  5      r " S S\R8                  5      r " S S\R8                  5      r  " S S5      r! " S S\R8                  5      r" " S S\RF                  RH                  5      r% " S S\R8                  5      r& " S S\5      r'S\'4S jr(g)     N)model_trange)ComfyExtensionio)
raft_large)override)RaftOpticalFlowget_noise_from_videoOPTICAL_FLOW      lengthreturnc                     U S-
  [         -  S-   nU[        -  S:X  a  U $ [        [        U[        -  [        -  5      nUS-
  [         -  S-   $ )a  Round ``length`` down to a value that produces an even latent_t.

VOID / CogVideoX-Fun-V1.5 uses patch_size_t=2, so the VAE-encoded latent
must have an even temporal dimension. If latent_t is odd, the transformer
pad_to_patch_size circular-wraps an extra latent frame onto the end; after
the post-transformer crop the last real latent frame has been influenced
by the wrapped phantom frame, producing visible jitter and "disappearing"
subjects near the end of the decoded video. Rounding down fixes this.
   r   )TEMPORAL_COMPRESSIONPATCH_SIZE_Tmax)r   latent_ttarget_latent_ts      7/home/wildlama/comfy/ComfyUI/comfy_extras/nodes_void.py_valid_void_lengthr      sX     ! 449H,!# ,\)A\(QROa#77!;;    c                   T    \ rS rSrSr\S 5       r\S\R                  4S j5       r	Sr
g)OpticalFlowLoader/   u  Load an optical flow model from ``models/optical_flow/``.

Only torchvision's RAFT-large format is recognized today (the model used
by VOIDWarpedNoise).  The checkpoint must be placed under
``models/optical_flow/`` — ComfyUI never downloads optical-flow weights
at runtime.
c                     [         R                  " SSS[         R                  R                  S[        R
                  " S5      SS9/[        R                  5       /S9$ )	Nr   zLoad Optical Flow Modelzmodel/loaders
model_nameoptical_flowzOptical flow model to load.  Files must be placed in the 'optical_flow' folder.  Today only torchvision's raft_large.pth is supported.)optionstooltipnode_iddisplay_namecategoryinputsoutputs)r   SchemaComboInputfolder_pathsget_filename_listOpticalFlowOutputclss    r   define_schemaOpticalFlowLoader.define_schema8   s_    yy'2$ (::>J7	  
 ""$
 	
r   r   c                    [         R                  " SU5      n[        R                  R	                  USS9n[        S U 5       5      =(       a+    [        S U 5       5      =(       a    [        S U 5       5      nU(       d  [        S5      e[        S SS	9nUR                  U5        UR                  5       R                  [        R                  5        [        R                  R                  U[        R                  R!                  5       [        R                  R#                  5       S
9n[$        R&                  " U5      $ )Nr   T)	safe_loadc              3   B   #    U  H  oR                  S 5      v   M     g7f)zfeature_encoder.N
startswith.0ks     r   	<genexpr>,OpticalFlowLoader.execute.<locals>.<genexpr>U   s     ="Q/00"   c              3   B   #    U  H  oR                  S 5      v   M     g7f)zcontext_encoder.Nr5   r7   s     r   r:   r;   V   s     AbLL!344br<   c              3   B   #    U  H  oR                  S 5      v   M     g7f)zupdate_block.Nr5   r7   s     r   r:   r;   W   s     >2aLL112r<   zUnrecognized optical flow model format: expected a torchvision RAFT-large state dict with 'feature_encoder.', 'context_encoder.' and 'update_block.' prefixes.F)weightsprogress)load_deviceoffload_device)r*   get_full_path_or_raisecomfyutilsload_torch_fileany
ValueErrorr   load_state_dictevaltotorchfloat32model_patcherModelPatchermodel_managementget_torch_deviceunet_offload_devicer   
NodeOutput)r/   r   
model_pathsdhas_raft_keysmodelpatchers          r   executeOpticalFlowLoader.executeN   s    "88T
[[((t(D ="== ?AbAA?>2>> 	
 0  4%8b!

&%%22..??A 11EEG 3 

 }}W%%r    N__name__
__module____qualname____firstlineno____doc__classmethodr0   r   rS   rY   __static_attributes__r[   r   r   r   r   /   s:     
 
* &BMM & &r   r   c                   X    \ rS rSrSr\S 5       r\SS\R                  4S jj5       r	Sr
g)	VOIDQuadmaskPreprocessl   a  Preprocess a quadmask video for VOID inpainting.

Quantizes mask values to four semantic levels, inverts, and normalizes:
  0   -> primary object to remove
  63  -> overlap of primary + affected
  127 -> affected region (interactions)
  255 -> background (keep)

After inversion and normalization, the output mask has values in [0, 1]
with four discrete levels: 1.0 (remove), ~0.75, ~0.50, 0.0 (keep).
c                     [         R                  " SSS[         R                  R                  S5      [         R                  R                  SSSSSS	S
9/[         R                  R                  SS9/S9$ )Nre   zVOID Quadmask Preprocessorz
image/maskmaskdilate_widthr   2   r   z=Dilation radius for the primary mask region (0 = no dilation)defaultminr   stepr    quadmaskr#   r!   )r   r'   Maskr)   Intr-   r.   s    r   r0   $VOIDQuadmaskPreprocess.define_schemay   sm    yy,5!f%^QA2A%d  f J7
 	
r   r   c                    UR                  5       nUR                  5       S::  a  US-  nUS:  a  UR                  S:  a  US:  R                  5       nUS-  S-   nUR                  S:X  a  UR	                  S5      n[
        R                  R                  R                  XESUS9nUR                  S	:X  a  UR                  S5      n[
        R                  " US
:  [
        R                  " U5      U5      n[
        R                  " US:*  [
        R                  " U5      U5      n[
        R                  " US:  US:*  -  [
        R                  " US5      U5      n[
        R                  " US:  US:*  -  [
        R                  " US5      U5      n[
        R                  " US:  [
        R                  " US5      U5      nSU-
  S-  n[        R                  " U5      $ )N      ?g     o@r         r   r   )kernel_sizestridepaddingr         ?   _   ?            )cloner   ndimfloat	unsqueezerL   nn
functional
max_pool2dsqueezewhere
zeros_like	full_liker   rS   )r/   rh   ri   mbinaryrx   dilateds          r   rY   VOIDQuadmaskPreprocess.execute   s   JJL557c>E	A!!#g__&F&*Q.K{{a))!,hh))44< 5 G ||q !//!,GcM5+;+;A+>BAKKR!1!1!!4a8KKRAG,eooa.DaHKKRAH-uq#/FJKKCC!8!<QY%}}Qr   r[   N)r   r\   r[   r   r   re   re   l   s:    
 
 
  bmm    r   re   c                   T    \ rS rSrSr\S 5       r\S\R                  4S j5       r	Sr
g)VOIDInpaintConditioning   a  Build VOID inpainting conditioning for CogVideoX.

Encodes the processed quadmask and masked source video through the VAE,
producing a 32-channel concat conditioning (16ch mask + 16ch masked video)
that gets concatenated with the 16ch noise latent by the model.
c                 T   [         R                  " SS[         R                  R                  S5      [         R                  R                  S5      [         R                  R                  S5      [         R
                  R                  SSS9[         R                  R                  S	S
S9[         R                  R                  SSS[        R                  SS9[         R                  R                  SSS[        R                  SS9[         R                  R                  SSS[        R                  SSS9[         R                  R                  SSSSS9/	[         R                  R                  SS9[         R                  R                  SS9[         R                  R                  SS9/S9$ )Nr   zmodel/conditioning/voidpositivenegativevaevideoz Source video frames [T, H, W, 3]r    ro   z;Preprocessed quadmask from VOIDQuadmaskPreprocess [T, H, W]width        rl   rm   r   rn   height  r   -   r   u   Number of pixel frames to process. For CogVideoX-Fun-V1.5 (patch_size_t=2), latent_t must be even — lengths that produce odd latent_t are rounded down (e.g. 49 → 45).rk   
batch_size@   rl   rm   r   rp   latentr"   r$   r%   r&   )r   r'   Conditioningr)   VaeImagerq   rr   nodesMAX_RESOLUTIONr-   Latentr.   s    r   r0   %VOIDInpaintConditioning.define_schema   sK   yy-.%%j1%%j1U#w0RSj2opWcru?S?SZ[\Xs@T@T[\]Xrqe>R>RYZ&_  ` \1!D &&J&?&&J&?		  h 7#
 	
r   r   c
                    [        U5      n
X:w  a  [        R                  " SX5        U
nUS-
  [        -  S-   nUS-  nUS-  nUS U n[        R
                  R                  UR                  SS5      XgSS5      R                  SS5      nUS U nUR                  S:X  a  UR                  S5      n[        R
                  R                  UR                  SS5      XgSS5      R                  SS5      nUR                  S:X  a$  UR                  S   S:X  a  UR                  S5      nUnUR                  S:X  a$  UR                  S5      R                  SSSS5      nOUnS	U-
  nUS S 2S S 2S S 2S S24   S	U-
  -  nUR                  U5      nUR                  U5      nS
 nU" UU5      nU" UU5      n[        R                  " UU/SS9n[         R"                  " USU05      n[         R"                  " USU05      n[        R$                  " U	SXU/[        R&                  R)                  5       S9n[*        R,                  " XSU05      $ )NzVOIDInpaintConditioning: rounding length %d down to %d so that latent_t is even (required by CogVideoX-Fun-V1.5 patch_size_t=2). Using odd latent_t causes the last frame to be corrupted by circular padding.r   r   bilinearcenterrv   r   ru   c           
          U R                   S   U:  a  U S S 2S S 2S U24   $ U R                   S   U:  aF  XR                   S   -
  n[        R                  " X S S 2S S 2SS 24   R                  SSUSS5      /SS9$ U $ )Nr   r   r   dim)shaperL   catrepeat)lattarget_tpads      r   _match_temporal8VOIDInpaintConditioning.execute.<locals>._match_temporal   s    yy|h&1a(?++1(1-yy#1a9~'<'<Q31'M!NTUVVJr   r   concat_latent_imager   devicesamples)r   loggingwarningr   rD   rE   common_upscalemovedimr   r   r   r   expandencoderL   r   node_helpersconditioning_set_valueszerosrP   intermediate_devicer   rS   )r/   r   r   r   r   ro   r   r   r   r   adjusted_lengthr   latent_hlatent_wvidqmmask_conditionmask_condition_3chinverted_mask_3chmasked_videomask_latentsmasked_video_latentsr   inpaint_latentsnoise_latents                            r   rY   VOIDInpaintConditioning.execute   s\    -V4$OO$ &,	 %FaZ$88A=Q;A:GVnkk((KKAz8

'!R. 	 gv77a<b!B[[''JJr1uj(

'!R. 	 77a<BHHRLA-BB!#!/!9!9"!=!D!DRRQR!S!/"441aBQB;'31C+CDzz"34"zz,7	 '|X>./CXN))\3G$HaP  77,o>
  77,o>
 {{X:))==?

 }}X)\1JKKr   r[   Nr\   r[   r   r   r   r      sA     
 
2 KL68mmKL KLr   r   c                   T    \ rS rSrSr\S 5       r\S\R                  4S j5       r	Sr
g)VOIDWarpedNoisei  a  Generate optical-flow warped noise for VOID Pass 2 refinement.

Takes the Pass 1 output video and produces temporally-correlated noise
by warping Gaussian noise along optical flow vectors. This noise is used
as the initial latent for Pass 2, resulting in better temporal consistency.
c                    [         R                  " SS[        R                  SSS9[         R                  R                  SSS9[         R
                  R                  SS	S
[        R                  SS9[         R
                  R                  SSS
[        R                  SS9[         R
                  R                  SSS[        R                  SSS9[         R
                  R                  SSSSS9/[         R                  R                  SS9/S9$ )Nr   model/latent/voidr   z7Optical flow model from OpticalFlowLoader (RAFT-large).r   r   z'Pass 1 output video frames [T, H, W, 3]r   r   r   r   r   r   r   r   r   r   uh   Number of pixel frames. Rounded down to make latent_t even (patch_size_t=2 requirement), e.g. 49 → 45.rk   r   r   r   warped_noiserp   r   )
r   r'   r,   r)   r   rr   r   r   r   r-   r.   s    r   r0   VOIDWarpedNoise.define_schema  s    yy%(!!"U "  w0YZWcru?S?SZ[\Xs@T@T[\]Xrqe>R>RYZ&Z  [ \1!D 		  n =!
 	
r   r   c                    [        U5      nXu:w  a  [        R                  " SXW5        UnUS-
  [        -  S-   nUS-  n	US-  n
[        R
                  R                  5       n[        R
                  R                  U5        [        UR                  US9nUS U R                  U5      n[        R                  R                  UR                  SS5      X4SS5      R                  SS5      nUR                  SS5      S	-  R                  [        R                   5      nS
nSnSn[#        UUSUU[%        UU-  5      U-  US9nUR&                  S   U:w  a:  [        R(                  " SUR&                  S   S-
  UUS9R+                  5       nUU   nUR&                  S   U	:w  d  UR&                  S   U
:w  aS  UR-                  SSSS5      n[        R.                  R0                  R3                  UX4SSS9nUR-                  SSSS5      nUR-                  SSSS5      R5                  S5      nUS:  a  UR7                  USSSS5      nUR                  [        R
                  R9                  5       5      n[:        R<                  " SU05      $ )NzxVOIDWarpedNoise: rounding length %d down to %d so that latent_t is even (required by CogVideoX-Fun-V1.5 patch_size_t=2).r   r   r   r   r   r   r   r   r{   r   )noise_channelsresize_framesresize_flowdownscale_factorr   r   rv   F)sizemodealign_cornersr   )r   r   r   r   rD   rP   rQ   load_model_gpur   rW   rK   rE   r   r   clamprL   uint8r	   roundr   linspacelongpermuter   r   interpolater   r   r   r   rS   )r/   r   r   r   r   r   r   r   r   r   r   r   raftr   	vid_uint8FRAMEFLOWLATENT_SCALEwarpedindiceswarped_tensors                        r   rY   VOIDWarpedNoise.execute6  sp    -V4$OOT
 %FaZ$88A=Q;A: ''88:--l;|11&AGVn'kk((KKAz8

'!R. 	 YYq!_s*..u{{;	%"54<0<?
 <<?h&nnQQ!(;X,2448DF G_F<<?h&&,,q/X*E^^Aq!Q/FXX((44h1u 5 F ^^Aq!Q/F q!Q2<<Q?>)00Q1aHM%(()?)?)S)S)UV}}i788r   r[   Nr\   r[   r   r   r   r     s@     
 
, @9PRP]P] @9 @9r   r   c                   $    \ rS rSrSrS rS rSrg)Noise_FromLatentiz  z5Wraps a pre-computed LATENT tensor as a NOISE source.c                 &    SU l         US   U l        g )Nr   r   )seed_samples)selflatent_dicts     r   __init__Noise_FromLatent.__init__|  s    	#I.r   c                 R    U R                   R                  5       R                  5       $ N)r   r   cpu)r   input_latents     r   generate_noiseNoise_FromLatent.generate_noise  s    }}""$((**r   )r   r   N)r]   r^   r_   r`   ra   r   r  rc   r[   r   r   r   r   z  s    ?/+r   r   c                   T    \ rS rSrSr\S 5       r\S\R                  4S j5       r	Sr
g)VOIDWarpedNoiseSourcei  zdConvert a LATENT (e.g. from VOIDWarpedNoise) into a NOISE source
for use with SamplerCustomAdvanced.c           	          [         R                  " SS[         R                  R                  SSS9/[         R                  R                  5       /S9$ )Nr  r   r   z(Warped noise latent from VOIDWarpedNoiser   r   )r   r'   r   r)   Noiser-   r.   s    r   r0   #VOIDWarpedNoiseSource.define_schema  sL    yy+(		F   H XX__&'
 	
r   r   c                 @    [         R                  " [        U5      5      $ r  )r   rS   r   )r/   r   s     r   rY   VOIDWarpedNoiseSource.execute  s    }}-l;<<r   r[   Nr\   r[   r   r   r  r    s:    + 	
 	
 =bmm = =r   r  c                   "    \ rS rSrSrSS jrSrg)	VOID_DDIMi  u  DDIM sampler for VOID inpainting models.

VOID was trained with the diffusers CogVideoXDDIMScheduler which operates in
alpha-space (input std ≈ 1). The standard KSampler applies noise_scaling that
multiplies by sqrt(1+sigma^2) ≈ 4500x, which is incompatible with VOID's
training. This sampler skips noise_scaling and implements the DDIM update rule
directly using sigma-to-alpha conversion.
Nc	           	         UR                  [        R                  5      n	UR                  S0 5      n
UR                  SS 5      nU	R	                  U	R
                  S   /5      n[        [        U5      S-
  US9 Hz  nX-   nX-S-      nU" XU-  XS9nUb  U" UUU	[        U5      S-
  5        US:X  a  Un	M<  SSUS-  -   -  nSSUS-  -   -  nU	US	-  U-  -
  SU-
  S	-  -  nUS	-  U-  SU-
  S	-  U-  -   n	M|     U	$ )
Nmodel_optionsr   r   r   )disable)r  r   ru   r   r{   )rK   rL   rM   getnew_onesr   trangelen)r   
model_wrapsigmas
extra_argscallbacknoiselatent_imagedenoise_maskdisable_pbarxr  r   s_inisigma
sigma_nextdenoisedalpha_t
alpha_prevpred_epss                       r   sampleVOID_DDIM.sample  s"   HHU]]#";~~fd+zz1771:,'Fa>AIEAJ!!T\ZH#HaVq9Quz!12 C*/$9:
C8 ;;gRU?UU3&(2cJ6F35NQY5YY! ?$ r   r[   )NNF)r]   r^   r_   r`   ra   r(  rc   r[   r   r   r  r    s    r   r  c                   X    \ rS rSrSr\S 5       r\S\R                  4S j5       r	\	r
Srg)VOIDSampleri  a6  VOID DDIM sampler for use with SamplerCustom / SamplerCustomAdvanced.

Required for VOID inpainting models. Implements the same DDIM loop that VOID
was trained with (diffusers CogVideoXDDIMScheduler), without the noise_scaling
that the standard KSampler applies. Use with RandomNoise or VOIDWarpedNoiseSource.
c                 j    [         R                  " SS/ [         R                  R                  5       /S9$ )Nr+  zmodel/sampling/samplersr   )r   r'   Samplerr-   r.   s    r   r0   VOIDSampler.define_schema  s/    yy!.ZZ&&()	
 	
r   r   c                 >    [         R                  " [        5       5      $ r  )r   rS   r  r.   s    r   rY   VOIDSampler.execute  s    }}Y[))r   r[   N)r]   r^   r_   r`   ra   rb   r0   r   rS   rY   get_samplerrc   r[   r   r   r+  r+    sA     
 
 * * * Kr   r+  c                   L    \ rS rSr\S\\\R                        4S j5       r	Sr
g)VOIDExtensioni  r   c                 J   #    [         [        [        [        [        [
        /$ 7fr  )r   re   r   r   r  r+  )r   s    r   get_node_listVOIDExtension.get_node_list  s#      "#!
 	
s   !#r[   N)r]   r^   r_   r`   r   listtyper   	ComfyNoder5  rc   r[   r   r   r3  r3    s)    
T$r||*<%= 
 
r   r3  c                     #    [        5       $ 7fr  )r3  r[   r   r   comfy_entrypointr;    s     ?s   ))r   rL   rD   comfy.model_managementcomfy.model_patchercomfy.samplerscomfy.utilsr*   r   r   r   r  comfy_api.latestr   r   torchvision.models.optical_flowr   typing_extensionsr   comfy_extras.void_noise_warpr   r	   Customr,   r   r   intr   r9  r   re   r   r   r   r  samplersr-  r  r+  r3  r;  r[   r   r   <module>rG     s             . / 6 & Oii' <s <s <(:& :&z7 R\\ 7 tnLbll nLb`9bll `9F+ +=BLL =*"&& "J",, 0

N 

 r   