
    3jN                     :   S r SSKJr  SSKrSSKJs  Jr  SSKrSSK	r	SSK
rSSKrSSKJrJr  SSKJr  \R$                  " S5      r/ SQrS rS	 rS
 rSS jrSS jrS r " S S\R6                  5      r " S S\R6                  5      r " S S\5      rS\4S jrg)zSCAIL / SCAIL-2 nodes: the WanSCAILToVideo conditioning node and the SAM3
preprocessing that turns video tracks into the bundle the SCAIL-2 model consumes.    )overrideN)ComfyExtensionio)unpack_masksSAM3_TRACK_DATA))        r         ?)r	   r   r   )r   r	   r   )r	   r   r	   )r   r	   r	   )r	   r	   r   c                 P    U S   nUb  UR                   S   S:X  a  g [        U5      $ )Npacked_masks   r   )shaper   )
track_datapackeds     8/home/wildlama/comfy/ComfyUI/comfy_extras/nodes_scail.py_unpackr      s.    'F~aA-    c                 .   U R                  5       nUR                  S   UR                  S   UR                  S   pCn[        R                  " XAR                  UR
                  S9R                  SSSU5      nUR                  SS9nX-  R                  SS9UR                  SS9-  nUS:  n[        R                  " X!R                  S	9R                  S5      n	[        R                  " XU5      R                  SS9n
U
R                  US-
  S
9R                  S5      nUR                  SU5      R                  S5      nUR                  SU5      R                  S5      nU
R                  5       X-  R                  5       XU-  -  R                  5       4$ )zNPer object: first frame it appears in, plus centroid-x and area in that frame.r   devicedtyper   )r   r   dim)minr   )max)floatr   torcharanger   r   viewsumclamp	unsqueezewhereamingathersqueezetolist)
masks_boolmTHWgrid_xarea_tcx_tpresent	frame_idxfirst_tselcxareas                 r   _first_appearance_cx_arear8   %   sV   Aggaj!''"+qwwr{!A\\!HHAGG<AA!Q1MFUUxU FJ)FLLQL,??DqjGQxx0::1=Ikk'a055!5<G
--AE-
"
,
,Q
/C	Q		$	$Q	'B==C ((+D>>bf__.Q0G0G0IIIr   c                    [        U 5      nU S   nUb  U(       d  S US'   SU;   a  / US'   U$ US S 2U4   R                  5       US'   U R                  S5      nUb(  U Vs/ s H  oU[        U5      :  d  M  XE   PM     snUS'   U$ s  snf )Nr   scores)dict
contiguousgetlen)r   obj_indicesoutr   r:   is         r   _subset_track_datarB   5   s    
z
C'F~["Ns?CM
 K0;;=C^^H%F,7KKqs6{?KKHJ Ls   B5Bc           
         U S   nU S   u  p4[         R                  R                  5       n[         R                  R                  5       nUR	                  S5      (       a  SOSnUb  UR
                  S   S:X  aV  Uc  U R                  SS5      OUR
                  S   n[        R                  " XUS	XVS
9n	US   US   US   sU	S'   U	S'   U	S'   U	$ UR
                  S   UR
                  S   p[        R                  " [        U
5       Vs/ s H  n[        U[        [        5      -     PM     snXVS
9n[        UR                  U5      5      R                  5       nUR
                  S   UR
                  S   p[         R"                  " UR%                  X-  SX5      X44SS9R%                  XX45      S:  nUR'                  SS9nXR                  [        R(                  5      R+                  SS9   n[        R                  " XuUR,                  S
9R%                  SSSS	5      n[        R.                  " UR1                  S5      UUR3                  U5      5      $ s  snf )Nr   	orig_sizewhiter	   r	   r	   r   r   r   r   r   n_frames   r      ).r   ).r   ).rJ   r   r   nearestsizemode      ?r   )comfymodel_managementintermediate_deviceintermediate_dtype
startswithr   r=   r   emptytensorrangeDEFAULT_PALETTEr>   r   tor   Finterpolater!   anyuint8argmaxr   r%   r$   	expand_as)r   
backgroundr   r-   r.   r   r   bg_rgbr,   r@   N_objrA   colors
masks_fullHmWmany_maskcolor_overlay	bg_tensors                      r   _render_colored_masksrj   D   s   'Fk"DA##779F""557E * 5 5g > >_OF~aA--3^JNN:q)akk!1VA06q	6!9fQi-FS[#f+
||AQu\\<A%LILqS11	2LIF fii/0668Jb!:#3#3B#7	1b-QF
d1QS!J ~~!~$H==5<<<CDMV-:M:MNSSTUWXZ[]^_I;;x))"-}i>Q>QR_>`aa 	Js   	#Ic           
      n   [         R                  R                  5       n[         R                  R                  5       nU R                  S:X  a  U R                  S5      n U R                  X#S9n U R                  u  pEnUR                  S5      (       a  SOSn[        R                  " [        S   X#S9R                  SSSS5      n[        R                  " XrUS9R                  SSSS5      n	[        R                  " U S	:  R                  S
5      UR                  XEUS5      U	R                  XEUS5      5      $ )zPlain comfy MASK (B,H,W) or (H,W) -> (B,H,W,3) rendered as a single identity (palette[0])
on the given background. A batch is treated as multiple views of that one subject.rJ   r   r   rE   rF   rG   r   rI   rO   r   )rP   rQ   rR   rS   ndimr$   rY   r   rT   r   rV   rX   r!   r%   expand)
maskr`   r   r   Br-   r.   ra   colorbgs
             r   _render_mask_as_identityrr   _   s    ##779F""557EyyA~~~a 77&7.DjjGA! * 5 5g > >_OFLL+FHMMaQRTUWXYE	f5	9	>	>q!Q	JB;;s
--b15<<a3KRYYWX]^`aMbccr   c           
         U R                   u  pp4SnU R                  SS5      R                  5       nUSS2SS24   U:  R                  5       nUSS2SS24   U:  R                  5       nUSS2SS24   U:  R                  5       n	SU-
  SU-
  SU	-
  pn
[        R                  " Xx-  U	-  X{-  U-  X-  U-  X-  U	-  Xx-  U-  X{-  U	-  X-  U	-  /SS9nX#p[        S5       H  nUS-   S-  nUS-   S-  nM     [        R                  R                  R                  XU4S	S
9nUS-
  S-  S-   n[        R                  " USS R                  SSSS5      USS /SS9nUR                  USX5      nUR                  S5      $ )zColored RGB mask (T, H, W, 3) in [0, 1] -> SCAIL-2 28-channel binary latent
(1, T_lat, 28, H_lat, W_lat). 7 per-color binary channels (white/r/g/b/y/m/c)
threshold-extracted at 225/255, 8x spatial downsample, 4-frame temporal stacking.g<<<<<<?r   r   Nr   rJ   rI   r   r7   rL         )r   movedimr   r   catrW   nn
functionalr[   repeatr!   r$   )	rgb_videor,   r-   r.   _
_ON_THRESHrn   RGro   nRnGnB
binary_7chH_latW_latT_latentpaddedr@   s                      r   _extract_mask_to_28chr   n   s    JA!JR#))+D	a1f
	"))+A	a1f
	"))+A	a1f
	"))+AQAq1uBB			

!	
	


 J 51Xq q   $$00%.W]0^JA!|aHYY
2A--aAq9:ab>JPQRF
++hE
1C==r   c                   X    \ rS rSr\S 5       r\  SS\R                  4S jj5       rSr	g)WanSCAILToVideo   c                 D   [         R                  " SS[         R                  R                  S5      [         R                  R                  S5      [         R                  R                  S5      [         R
                  R                  SSS[        R                  SS	9[         R
                  R                  S
SS[        R                  SS	9[         R
                  R                  SSS[        R                  SS	9[         R
                  R                  SSSSS9[         R                  R                  SSSS9[         R                  R                  SSSS9[         R                  R                  SSSSS9[         R                  R                  SSSS S!S"S#9[         R                  R                  S$SSSS!S%S#9[         R                  R                  S&SSSS!S'S#9[         R                  R                  S(SS)S9[         R                  R                  S*SS+S9[         R                  R                  S,SS-S9[         R
                  R                  S.S/S/[        R                  SS0S#9[         R
                  R                  S1S2S[        R                  SS3S#9[         R                  R                  S4SS5S9/[         R                  R                  SS69[         R                  R                  SS69[         R                  R                  S7S8S99[         R
                  R                  S.S:S99/SS;9$ )<Nr   model/conditioning/wan/scailpositivenegativevaewidthi       )defaultr   r   stepheighti  lengthQ   r   rt   
batch_sizei   )r   r   r   
pose_videoTz^Video used for pose conditioning. Will be downscaled to half the resolution of the main video.)optionaltooltippose_video_maskzXSCAIL-2 only. Colored per-identity SAM3 mask video at the same resolution as pose_video.replacement_modeFzSCAIL-2 only. False = Animation Mode (pose_video_mask should have black background). True = Replacement Mode (pose_video_mask should have white background).)r   r   r   pose_strengthr	   r   g      $@g{Gz?zStrength of the pose latent.)r   r   r   r   r   
pose_startz$Start step of the pose conditioning.pose_endz"End step of the pose conditioning.reference_imagea	  Reference image. The first image is the primary reference (composite all identities onto it). SCAIL-2: extra batch images are used as additional views (back view, close-up, occluded background), each needing a matching reference_image_mask in that identity's color.reference_image_maskzSCAIL-2 only. Colored reference mask, batch matching reference_image (first = primary reference mask, rest = identity masks for the additional reference_image).clip_vision_outputz\CLIP vision features for conditioning. Model is trained with stretch resize to aspect ratio.video_frame_offsetr   zgCumulative output frame this chunk begins at. Wire from the previous chunk's video_frame_offset output.previous_frame_count   z`Tail frames of previous_frames to anchor. SCAIL-2 trained at 5 (81-frame chunks, 76-frame step).previous_framesz}SCAIL-2 only. Full decoded output of the previous chunk. Only the last previous_frame_count are used as the extension anchor.)display_namelatentz$Empty latent of the generation size.)r   r   z3Adjusted offset + length. Wire into the next chunk.)node_idcategoryinputsoutputsis_experimental)r   SchemaConditioningInputVaeIntnodesMAX_RESOLUTIONImageBooleanFloatClipVisionOutputOutputLatentclss    r   define_schemaWanSCAILToVideo.define_schema   s   yy%3%%j1%%j1U#Wcru?S?SZ\]Xs@T@T[]^Xrqe>R>RYZ[\1!F|d  Ee  f04  Jd  e

  !3UT  \z   {$UY  dB  C|ScsQU  `F  Gz3CSt  ^B  C04  JU  V5  Oq  r##))*>  Xv)  w11!I]I]de  pY  Z3QA5K_K_fg  rT  U04  JI  J', &&J&?&&J&?		  h@f g+?I~	 !=
 	
r   Nreturnc                    [         R                  " USUS-
  S-  S-   US-  US-  /[        R                  R	                  5       S9nS nU(       + n[
        R                  " USU05      n[
        R                  " USU05      nS nUb7  UR                  S   S:  a$  UU* S  nUUR                  S   -  n[        SU5      nUGb  [        R                  R                  UR                  SS5      XES	S
5      R                  SS5      nUR                  S   nU(       a  Ub  [        R                  R                  UR                  SS5      XESS
5      R                  SS5      nU[        U5       Vs/ s H  n[        UUR                  S   S-
  5      PM!     sn   nUSS S24   R                  SSS9R                  S:  R                  UR                   5      nUU-  n[        U5       Vs/ s H&  nUR#                  UUUS-   2S S 2S S 2S S24   5      PM(     nn[
        R                  " USU0SS9n[
        R                  " USU0SS9nUb2  [
        R                  " USU05      n[
        R                  " USU05      nUb  UR                  S   U::  a  S nOUUS  nUb  UR                  S   U::  a  S nOUUS  nUU4 Vs/ s H  nUc  M  UR                  S   PM     nnU(       a1  [        [        U5      U5      S-
  S-  S-  S-   n Ub  US U  nUb  US U  nUb  [        R                  R                  US U R                  SS5      US-  US-  SS
5      R                  SS5      nUR#                  US S 2S S 2S S 2S S24   5      U-  n![
        R$                  " USU!0X5      n[
        R$                  " USU!0X5      nUb  [        R                  R                  US U R                  SS5      US-  US-  SS
5      R                  SS5      n"['        U"5      n#[
        R                  " USU#05      n[
        R                  " USU#05      nUGbL  UGbH  [        R                  R                  UR                  SS5      XESS
5      R                  SS5      n$U$R                  S   n%UR                  S   n[        SU5       Vs/ s H!  n['        U$[        UU%S-
  5         S    5      PM#     n&n['        U$S S 5      n'[         R                  " SUR                  S   SU'R                  S   U'R                  S   4U'R(                  U'R                   S9n([         R*                  " U&U'U(/-   SS9n)[
        R                  " USU)05      n[
        R                  " USU)05      nUGb  [        R                  R                  UR                  SS5      XES	S
5      R                  SS5      n*UR#                  U*S S 2S S 2S S 2S S24   5      n+[        U+R                  S   UR                  S   5      n,U+S S 2S S 2S U,24   R                  UR                   5      US S 2S S 2S U,24'   [         R,                  " SSUR                  S   UR                  S   UR                  S   4UR(                  UR                   S9nSUS S 2S S 2S U,24'   SU0n-Ub  UU-S'   [.        R0                  " XU-X-   5      $ s  snf s  snf s  snf s  snf ) N   r   rt      r   ref_mask_flagr   r   bicubiccenterznearest-exact.rI   T)r   keepdimg?reference_latents)appendr   rJ   r7   pose_video_latentdriving_mask_28chru   r   r   r   ref_mask_28chr   samples
noise_mask)r   zerosrP   rQ   rR   node_helpersconditioning_set_valuesr   r   utilscommon_upscalerv   rW   r   valuesrY   r   encode+conditioning_set_values_with_timestep_ranger   r   rw   onesr   
NodeOutput).r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   prev_trimmedref_imgsn_refrmrA   is_charref_latentsvtsT_keptr   mask_video_hwr   ref_mask_hwn_masks	add_masksref_mask_1fr   r   pfprev_latentprev_latent_frames
out_latents.                                                 r   executeWanSCAILToVideo.execute   s    j"
q/@A.EvQR{TY]^T^_hmh~h~  iS  iS  iU  V
,,77?TaBbc77?TaBbc&?+@+@+Ca+G*,@+@+ABL,"4"4Q"77!$Q(:!;&{{11/2I2I"a2PRWajltu}}~  BD  EHNN1%E  $8$D[[//0D0L0LRQR0SUZdsu}~  G  G  HI  KM  N%,G,QQa0,GHc2A2g;??r4?@GG#MQQRZR`R`a#g-LQRWLYLq3::hqQw1bqb/@&ABLKY#;;HGZ\gFhquvH#;;HGZ\gFhquvH)#;;HG[]oFpqH#;;HG[]oFpqH!"&88!
'(:(;<
&$$Q'+=="&"12D2E"F $."?Q"?Q1jaggaj"?Q3r7F+a/A5:Q>F%'0
*"1'6":!33Jw4G4O4OPRTU4VX]abXbdjnodoqw  zB  C  K  K  LM  OQ  RJ #

:aArrk+B Cm S#OOPX[n  qB  [C  EO  ZH#OOPX[n  qB  [C  EO  ZH&!KK66w7O7W7WXZ\]7^`eij`jlrvwlwy  BJ  K  S  S  TU  WY  ZM 5m D#;;HGZ\mFnoH#;;HGZ\mFnoH  +0K++445I5Q5QRTVW5XZ_ix  {C  D  L  L  MN  PR  SK!''*G#))!,E`efgin`op`o[\.{3q'A+;N/OPT/UV`oIp/BQ@KKKFLLOR9J9J29NP[PaPabdPe foz  pB  pB  JU  J[  J[  \E!IIi;2F&FANM#;;HXeFfgH#;;HXeFfgH#++L,@,@Q,GXacklttuvxz{B**R1a!_5K"%k&7&7&:FLLO"L0;AqBUCUBU<U0V0Y0YZ`ZfZf0gF1a,,,,-Q6<<?FLL<Lfll[]N^$_hnhuhu  ~D  ~J  ~J  KJ47Jq!00001(
!'1J|$}}X=O=XYYI H Z( R4 qs   ,&[&-[$<[)[)<([. )FNNNNNN)
__name__
__module____qualname____firstlineno__classmethodr   r   r   r   __static_attributes__r   r   r   r   r      sG     
  
D  MQQU\ZZ\ZgZg\Z \Zr   r   c                   <    \ rS rSrSr\S 5       r\SS j5       rSrg)SCAIL2ColoredMaski  zRender SAM3 tracks for the driving pose video and reference image(s) into the
colored masks WanSCAILToVideo consumes. Shared `sort_by` keeps each identity on the
same color across both outputs.
c                    [         R                  " SSS[        R                  SSS9[         R                  R                  S[        [         R
                  /SS	S
S9[         R                  R                  SSSS9[         R                  R                  S/ SQSSS9[         R                  R                  SSSS9/[         R                  R                  S5      [         R                  R                  S5      /SS9$ )Nr   zCreate SCAIL-2 Colored Maskr   driving_track_datazWSAM3 track of the driving pose video. Will be rendered into the pose_video_mask output.)r   ref_track_dataTreference_maskszSAM3 track of the reference image(s) (one identity per object, colored in batch order), or a plain MASK of the reference subject (rendered as a single identity).)r   r   r   object_indices z~Comma-separated list of person indices to include (e.g. '0,2,3'). Applied to both reference and pose video masks. Empty = all.)r   r   sort_by)noneleft_to_rightr7   r   a  Order in which palette colors are assigned to the tracked objects (applied to both reference and pose video so each identity keeps the same color). Objects that appear in earlier frames always come first; within a frame, left_to_right = leftmost object (by centroid at first appearance) gets the first color, area = biggest object (by mask area at first appearance) gets the first color; none = keep SAM3's order.)optionsr   r   r   FzFalse = Animation Mode (pose_video_mask has black background, reference_image_mask has white background). True = Replacement Mode (pose_video_mask has white background, reference_image_mask has black background).r   r   )r   r   r   r   r   r   )r   r   SAM3TrackDatar   	MultiTypeMaskStringCombor   r   r   r   s    r   r   SCAIL2ColoredMask.define_schema  s   yy'63##$8  C\#  ]""#3mRWW5MX\k| ,O # P		 0" )i   jy2S]l (G  H

  !3UA ! B  12 67 !)
 	
r   Nc                   ^^ UU4S jnU" U5      n[        Xt(       a  SOS5      nU(       a  SOSn	Ub>  [        U[        R                  5      (       a  [	        XY5      n
Ov[        U" U5      U	5      n
OcUS   u  pU	S:X  a  SOSn[        R
                  " SXS4U[        R                  R                  5       [        R                  R                  5       S	9n
[        R                  " X5      $ )
Nc                   >^^^	 [        U 5      nTS:w  af  Ubc  [        U5      u  m	mmTS:X  a"  [        [        [	        T5      5      UU	4S jS9nO![        [        [	        T5      5      UU	4S jS9n[        X5      n T
R                  5       (       a  T
R                  S5       Vs/ s H@  o3R                  5       R                  5       (       d  M'  [        UR                  5       5      PMB     nnU R                  S5      nUb  UR                  S   OS	nU Vs/ s H  nS	Us=::  a  U:  d  M  O  M  UPM     nn[        X5      n U $ s  snf s  snf )
Nr   r   c                    > TU    TU    4$ Nr   )rA   r6   r4   s    r   <lambda>:SCAIL2ColoredMask.execute.<locals>._prep.<locals>.<lambda>7  s    '!*bQReATr   )keyc                    > TU    TU    * 4$ r	  r   )rA   r7   r4   s    r   r
  r  9  s    GAJQUVWQXPXCYr   ,r   r   r   )r   r8   sortedrW   r>   rB   stripsplitisdigitintr=   r   )tdr*   orderrA   indicesr   n_objr7   r6   r4   r   r   s          @@@r   _prep(SCAIL2ColoredMask.execute.<locals>._prep2  s    J& Z%;$=j$I!To-"5R>7TUE"5T#39YZE'2##%%3A3G3G3Ld3LaPWPWPYPaPaPc>3qwwy>3Ld/+1+=Q1&-@ga%11g@'4I e As   $&EEE	+E	/E	rE   blackrD   r	   r   r   rI   r   )rj   
isinstancer   Tensorrr   fullrP   rQ   rR   rS   r   r   )r   r   r   r   r   r   r  drv
mask_videoref_bgr   r-   r.   
fill_values     ``          r   r   SCAIL2ColoredMask.execute0  s    	" &'*3;KQXY
,'%.%,,77'?'W$'<U>=RTZ'[${#DA &' 1sJ#(::q!lJuOeOeOyOyO{  DI  DZ  DZ  Dm  Dm  Do  $p }}Z>>r   r   r	  )	r   r   r   r   __doc__r   r   r   r   r   r   r   r   r     s/    
 
 
0 !? !?r   r   c                   L    \ rS rSr\S\\\R                        4S j5       r	Sr
g)SCAILExtensioniU  r   c                 "   #    [         [        /$ 7fr	  )r   r   )selfs    r   get_node_listSCAILExtension.get_node_listV  s      
 	
s   r   N)r   r   r   r   r   listtyper   	ComfyNoder(  r   r   r   r   r%  r%  U  s)    
T$r||*<%= 
 
r   r%  r   c                     #    [        5       $ 7fr	  )r%  r   r   r   comfy_entrypointr.  ^  s     s   )r  ) r#  typing_extensionsr   r   torch.nn.functionalrx   ry   rZ   r   r   comfy.model_managementrP   comfy.utilscomfy_api.latestr   r   comfy.ldm.sam3.trackerr   Customr   rX   r   r8   rB   rj   rr   r   r,  r   r   r%  r.  r   r   r   <module>r6     s   U '        / /		+, J b6d>AZbll AZHA? A?H
^ 
 r   