
    +j^                     6   S SK r S SKrS SKrS SKrS SKrS SKJr  S SKrS SKr	S SK
r
S SKJr  S SKJr  S SKJrJrJr  SSKJr  SBS\S	\S
\
R,                  4S jjrS\
R,                  S\
R,                  S
\
R,                  4S jrSSS.S\
R,                  S\S-  S\S-  S
\4S jjrSCS\
R,                  S\S-  S
\R                  4S jjr  SDS\
R,                  S\S-  S\S
\4S jjrSES\R                  S\S
\4S jjrS\S\S\S
\\\4   S-  4S jrSFS\
R,                  S\S
\
R,                  4S jjr S\
R,                  S\S
\
R,                  4S  jr!  SDS\
R,                  S\S-  S\S
\4S! jjr"SGS"\RF                  S#\S$\S
\4S% jjr$  SHS&\RJ                  S#\RL                  S-  S'\RN                  S-  S
\4S( jjr(  SGS)\	RR                  S*\S#\S$\S
\4
S+ jjr*S,\
R,                  S
\	RR                  4S- jr+S"\RF                  S
\4S. jr,S&\RJ                  S/\-S
\RJ                  4S0 jr.S&\RJ                  S1\S
\RJ                  4S2 jr/S\S\S\S
\\\4   S-  4S3 jr0S&\RJ                  S4\S
\RJ                  4S5 jr1S&\RJ                  S6\\\4   S
\RJ                  4S7 jr2S8\
R,                  S
\
R,                  4S9 jr3S:\4S
\54S; jr6    SIS<\
R,                  S\
R,                  4S= jjr7S<\R                  S
\
R,                  4S> jr8S?\S
\4S@ jr9S?\S
\4SA jr:g)J    N)BytesIO)Image)common_upscale)Input	InputImplTypes   )mimetype_to_extensionimage_bytesiomodereturnc                    [         R                  " U 5      nUR                  U5      n[        R                  " U5      R                  [        R                  5      S-  n[        R                  " U5      R                  S5      $ )a  Converts image data from BytesIO to a torch.Tensor.

Args:
    image_bytesio: BytesIO object containing the image data.
    mode: The PIL mode to convert the image to (e.g., "RGB", "RGBA").

Returns:
    A torch.Tensor representing the image (1, H, W, C).

Raises:
    PIL.UnidentifiedImageError: If the image data cannot be identified.
    ValueError: If the specified mode is invalid.
g     o@r   )
r   openconvertnparrayastypefloat32torch
from_numpy	unsqueeze)r   r   imageimage_arrays       @/home/wildlama/comfy/ComfyUI/comfy_api_nodes/util/conversions.pybytesio_to_image_tensorr      s]     JJ}%EMM$E((5/((4u<KK(22155    image1image2c                     U R                   SS UR                   SS :w  aI  [        UR                  SS5      U R                   S   U R                   S   SS5      R                  SS5      n[        R                  " X4SS9$ )	z
Converts a pair of image tensors to a batch tensor.
If the images are not the same size, the smaller image is resized to
match the larger image.
r	   N   bilinearcenterr   dim)shaper   movedimr   cat)r   r   s     r   image_tensor_pair_to_batchr)   '   s|     ||AB6<<++NN2q!LLOLLO
 '!R. 	 99f%1--r     @ 	image/png)total_pixels	mime_typer   r,   r-   c                    U(       d  Sn[        XS9n[        X2S9n[        R                  " 5        S[	        U5       3Ul        U$ )a  Converts a torch.Tensor image to a named BytesIO object.

Args:
    image: Input torch.Tensor image.
    total_pixels: Maximum total pixels for downscaling. If None, no downscaling is performed.
    mime_type: Target image MIME type (e.g., 'image/png', 'image/jpeg', 'image/webp', 'video/mp4').

Returns:
    Named BytesIO object containing the image data, with pointer set to the start of buffer.
r+   r,   r-   .)tensor_to_pilpil_to_bytesiouuiduuid4r
   name)r   r,   r-   	pil_image
img_binarys        r   tensor_to_bytesior9   8   sE      	e?I	?Ja(=i(H'IJJOr   c                 B   [        U R                  5      S:  a  U S   n U R                  5       nUb'  [        UR	                  S5      US9R                  5       nUR                  5       S-  R                  [        R                  5      n[        R                  " U5      nU$ )zVConverts a single torch.Tensor image [H, W, C] to a PIL Image, optionally downscaling.   r   r/      )lenr&   cpudownscale_image_tensorr   squeezenumpyr   r   uint8r   	fromarray)r   r,   input_tensorimage_npimgs        r   r2   r2   Q   s    
5;;!a99;L-l.D.DQ.GVbckkm""$s*22288<H
//(
#CJr   image_tensorc                     [        XS9n[        X2S9nUR                  5       n[        R                  " U5      R                  S5      nU$ )ad  Convert [B, H, W, C] or [H, W, C] tensor to a base64 string.

Args:
    image_tensor: Input torch.Tensor image.
    total_pixels: Maximum total pixels for downscaling. If None, no downscaling is performed.
    mime_type: Target image MIME type (e.g., 'image/png', 'image/jpeg', 'image/webp', 'video/mp4').

Returns:
    Base64 encoded string of the image.
r/   r0   utf-8)r2   r3   getvaluebase64	b64encodedecode)rG   r,   r-   r7   img_byte_arr	img_bytesbase64_encoded_strings          r   tensor_to_base64_stringrQ   ^   sH     lFI!)AL%%'I",,Y7>>wG  r   rF   c                     U(       d  Sn[        5       nUR                  S5      S   R                  5       nUS:X  a  SnU R                  X#S9  UR	                  S5        U$ )z)Converts a PIL Image to a BytesIO object.r+   /r    JPGJPEGformatr   )r   splituppersaveseek)rF   r-   rN   
pil_formats       r   r3   r3   u   s\    	9L%b)//1JU
HH\H-ar   src_wsrc_hc                     X-  nX2::  a  g[         R                  " X#-  5      n[        S[        X-  5      5      n[        S[        X-  5      5      nXUS-  -  nXfS-  -  nXV4$ )a	  Return downscaled (w, h) with even dims fitting ``total_pixels``, or None if already fits.

Source aspect ratio is preserved; output may drift by a fraction of a percent because both dimensions
are rounded down to even values (many  codecs require divisible-by-2).
Nr!   )mathsqrtmaxintr]   r^   r,   pixelsscalenew_wnew_hs          r   _compute_downscale_dimsri      sk     ]FIIl+,E3u}%&E3u}%&E	QYE	QYE<r   c                     U R                  SS5      n[        UR                  S   UR                  S   [        U5      5      nUc  U $ Uu  pE[	        X$USS5      R                  SS5      $ )a  Downscale input image tensor to roughly the specified total pixels.

Output dimensions are rounded down to even values so that the result is guaranteed to fit within ``total_pixels``
and is compatible with codecs that require even dimensions (e.g. yuv420p).
r    r	   r;   r!   lanczosdisabled)r'   ri   r&   rc   r   )r   r,   samplesdimsrg   rh   s         r   r?   r?      sj     mmB"G"7==#3W]]15Es<GXYD|LE'%JGOOPQSUVVr   max_sidec                   U R                  SS5      nUR                  S   UR                  S   pC[        XC5      nXQ::  a  U $ X-  n[        XF-  5      n[        X6-  5      n[	        X'USS5      n	U	R                  SS5      n	U	$ )zQDownscale input image tensor so the largest dimension is at most max_side pixels.r    r	   r!   r;   rk   rl   )r'   r&   rb   roundr   )
r   ro   rm   heightwidthmax_dimscale_by	new_width
new_heightss
             r   "downscale_image_tensor_by_max_sidery      s    mmB"GMM!$gmmA&6E% G!He&'Iv()Jw:y*MA			!RAHr   c                 *    [        XU5      nSU SU 3$ )aY  Converts a tensor image to a Data URI string.

Args:
    image_tensor: Input torch.Tensor image.
    total_pixels: Maximum total pixels for downscaling. If None, no downscaling is performed.
    mime_type: Target image MIME type (e.g., 'image/png', 'image/jpeg', 'image/webp').

Returns:
    Data URI string (e.g., 'data:image/png;base64,...').
data:;base64,)rQ   )rG   r,   r-   base64_strings       r   tensor_to_data_urir~      s#     ,L	RM9+Xm_55r   audiocontainer_format
codec_namec                     U S   nU S   n[        U5      n[        XSX5      nUR                  5       n[        R                  " U5      R                  S5      $ )z+Converts an audio input to a base64 string.sample_ratewaveformrI   )"audio_tensor_to_contiguous_ndarrayaudio_ndarray_to_bytesiorJ   rK   rL   rM   )r   r   r   r   r   audio_data_npaudio_bytes_ioaudio_bytess           r   audio_to_base64_stringr      sW    ]+K":.H6x@M-mJZgN ))+KK(//88r   videocodecc           	      p   [        5       nU R                  UU=(       d%    [        U S[        R                  R
                  5      U=(       d%    [        U S[        R                  R                  5      S9  UR                  S5        [        R                  " UR                  5       5      R                  S5      $ )a  
Converts a video input to a base64 string.

Args:
    video: The video input to convert
    container_format: Optional container format to use (defaults to video.container if available)
    codec: Optional codec to use (defaults to video.codec if available)
	containerr   )rW   r   r   rI   )r   save_togetattrr   VideoContainerMP4
VideoCodecH264r[   rK   rL   rJ   rM   )r   r   r   video_bytes_ios       r   video_to_base64_stringr      s     YN	MMX75+u?S?S?W?W#XEwugu/?/?/D/DE  
 N3356==gFFr   r   r   c                    [        5       n[        R                  " USUS9 nUR                  X1S9n[        R                  R                  U SU R                  S   S:  a  SOSS	9nXl        SUl        UR                  U5       H  nUR                  U5        M     UR                  S
5       H  nUR                  U5        M     S
S
S
5        UR                  S5        U$ ! , (       d  f       N!= f)z<
Encodes a numpy array of audio data into a BytesIO object.
wr   rW   ratefltpr   r	   stereomonorW   layoutN)r   avr   
add_stream
AudioFramefrom_ndarrayr&   r   ptsencodemuxr[   )	r   r   r   r   r   output_containeraudio_streamframepackets	            r   r   r      s     YN	c2B	CGW'22:2P**,221598v + 

 (	"))%0F  ( 1 #))$/F  ( 0 
D" % 
D	Cs   B#C
C-r   c                    U R                   S:w  d  U R                  S   S:w  a  [        S5      eU R                  S   S:  a  U S   n U R                  S5      R	                  5       R                  5       R                  5       nUR                  [        R                  :w  a  UR                  [        R                  5      nU$ )a+  
Prepares audio waveform for av library by converting to a contiguous numpy array.

Args:
    waveform: a tensor of shape (1, channels, samples) derived from a Comfy `AUDIO` type.

Returns:
    Contiguous numpy array of the audio waveform. If the audio was batched,
        the first item is taken.
r;   r   r	   z5Expected waveform tensor shape (1, channels, samples))ndimr&   
ValueErrorr@   r>   
contiguousrA   dtyper   r   r   )r   r   s     r   r   r     s     }}X^^A.!3PQQ ~~a1A; $$Q'++-88:@@BMbjj(%,,RZZ8r   c                 d   U S   R                  5       n[        5       n[        R                  " USSS9nUR	                  SU S   S9nSUl        [        R                  R                  UR                  S	S
5      R                  S
S5      R                  5       R                  5       SUR                  S	   S
:X  a  SOSS9nU S   Ul        S	Ul        UR                  UR!                  U5      5        UR                  UR!                  S 5      5        UR#                  5         UR%                  S	5        U$ )Nr   r   mp3r   
libmp3lamer   r   i  r   r	   r    fltr   r   r   )r>   r   r   r   r   bit_rater   r   r'   reshapefloatrA   r&   r   r   r   r   closer[   )r   r   output_bufferr   
out_streamr   s         r   audio_input_to_mp3r     s   Z $$&HIMww}3uE!,,\m@T,UJ JMM&&A&&q"-335;;=!*a/vX ' E
 m,EEI**512**401qr   duration_secc                    [        5       nSnSn U R                  5       n[        R                  " USS9n[        R                  " USSS9nSnSnUR                   GHS  n[
        R                  " SUR                  [        U5      5        [        U[        R                  5      (       a|  UR                  SUR                  S	9nUR                  Ul        UR                  Ul        S
Ul        [
        R                  " SUR                  UR                  UR                  5        M  [        U[        R                  5      (       d  M  UR                  SUR                   S	9nUR                   Ul        UR"                  Ul        [
        R                  " SUR                   UR$                  5        GMV     UR                  R&                  S   R                  n	[)        X-  5      n
U
S-  S-  nUS:X  a  [+        S5      eSnSnU(       a  UR-                  SS9 H8  nX:  a    O2UR/                  U5       H  nUR1                  U5        M     US-  nM:     UR/                  5        H  nUR1                  U5        M     [
        R                  " SX5        U(       a  UR3                  S5        UR-                  SS9 HC  nUR4                  U:  a    O2UR/                  U5       H  nUR1                  U5        M     US-  nME     UR/                  5        H  nUR1                  U5        M     [
        R                  " SU5        UR7                  5         UR7                  5         UR3                  S5        [8        R:                  " U5      $ ! [<         aC  nUb  UR7                  5         Ub  UR7                  5         [?        S[A        U5       35      UeSnAff = f)a1  
Returns a new VideoInput object trimmed from the beginning to the specified duration,
using av to avoid loading entire video into memory.

Args:
    video: Input video to trim
    duration_sec: Duration in seconds to keep from the beginning

Returns:
    VideoFromFile object that owns the output buffer
Nrr   r   mp4r   zFound stream: type=%s, class=%sh264r   yuv420pz!Added video stream: %sx%s @ %sfpsaacz%Added audio stream: %sHz, %s channelsr      z7Video too short: need at least 16 frames for Moonvalleyr   r	   z$Encoded %s video frames (target: %s)r   zEncoded %s audio frameszFailed to trim video: )!r   get_stream_sourcer   r   streamslogginginfotype
isinstanceVideoStreamr   average_raters   rr   pix_fmtAudioStreamr   r   channelsr   rc   r   rM   r   r   r[   timer   r   VideoFromFile	ExceptionRuntimeErrorstr)r   r   r   input_containerr   input_sourcevideo_streamr   streamfpsestimated_framestarget_framesframe_countaudio_frame_countr   r   es                    r   
trim_videor   6  sh    IMOYE ..0 '',S977=s5I %--FLL:FKKfV&"..11/::6H[H[:\%+\\"&,mm#'0$@&,,PVP]P]_e_r_rsFBNN33/::5vGYGY:Z+1+=+=(&,mm#DfFXFXZ`ZiZij ." %%++A.;;|12)R/25AVWW (//a/8/ +11%8F$((0 9q  9 '--/ $$V, 0 LL?\   #(//a/8::- +11%8F$((0 9!Q&! 9 '--/ $$V, 0 LL24EF 	  	1&&}55 E&!!#'""$3CF8<=1DEs   D2N H>N 
O>OO
max_pixelsc                 ^    U R                  5       u  p#[        X#U5      nUc  U $ [        X5      $ )a  Downscale a video to fit within ``max_pixels`` (w * h), preserving aspect ratio.

Returns the original video object untouched when it already fits. Preserves frame rate, duration, and audio.
Aspect ratio is preserved up to a fraction of a percent (even-dim rounding).
)get_dimensionsri   _apply_video_scale)r   r   r]   r^   
scale_dimss        r   downscale_video_to_max_pixelsr     s6     '')LE(zBJe00r   c                     X-  nX2:  a  g[         R                  " X#-  5      n[         R                  " X-  5      n[         R                  " X-  5      nUS-  (       a  US-  nUS-  (       a  US-  nXV4$ )aO  Return upscaled (w, h) with even dims meeting at least ``total_pixels``, or None if already large enough.

Source aspect ratio is preserved; output may drift by a fraction of a percent because both dimensions
are rounded up to even values (many codecs require divisible-by-2). The result is guaranteed to be at
least ``total_pixels``.
Nr!   r	   )r`   ra   ceilrd   s          r   _compute_upscale_dimsr     sm     ]FIIl+,EIIem$EIIem$Eqy
qy
<r   
min_pixelsc                 ^    U R                  5       u  p#[        X#U5      nUc  U $ [        X5      $ )a  Upscale a video to meet at least ``min_pixels`` (w * h), preserving aspect ratio.

Returns the original video object untouched when it already meets the minimum. Preserves frame rate,
duration, and audio. Aspect ratio is preserved up to a fraction of a percent (even-dim rounding).
Note: upscaling a low-resolution source does not add real detail; downstream model quality may suffer.
)r   r   r   )r   r   r]   r^   r   s        r   upscale_video_to_min_pixelsr     s6     '')LE&uZ@Je00r   r   c                    Uu  p#[        5       nSnSnU R                  5       u  px[        U=(       d    U5      n	 U R                  5       n
[        R
                  " U
SS9n[        R
                  " USSS9nUR                  SU R                  5       S9nX+l        X;l	        S	Ul
        SnUR                   H`  n[        U[        R                  5      (       d  M$  UR                  S
UR                  S9nUR                  Ul        UR                  Ul          O   UR                  R                   S   nU	(       a  [#        X~R$                  -  5      OSnU(       a  [#        Xx-   UR$                  -  5      OSnU(       a  UR'                  XS9  SnUR)                  SS9 H  nU	(       a4  UR*                  b  UR*                  U:  a  M)  Ub  UR*                  U:  a    OmUR-                  X#S	S9n[        R.                  R1                  UR3                  S	S9S	S9nUR5                  U5       H  nUR7                  U5        M     US-  nM     UR5                  5        H  nUR7                  U5        M     US:X  a  [9        SU SU S35      eUb  UR'                  S5        UR)                  SS9 Ht  nU	(       a:  UR:                  b  UR:                  U:  a  M)  U(       a  UR:                  Xx-   :  a    O4SUl        UR5                  U5       H  nUR7                  U5        M     Mv     UR5                  5        H  nUR7                  U5        M     UR=                  5         UR=                  5         UR'                  S5        [>        R@                  " U5      $ ! [B         aC  nUb  UR=                  5         Ub  UR=                  5         [E        S[G        U5       35      UeSnAff = f)zNRe-encode ``video`` scaled to ``scale_dims`` with a single decode/encode pass.Nr   r   r   r   r   r   r   r   r   r   )r   r   )rs   rr   rW   rV   r	   z&resize produced no frames (start_time=z, duration=z" selected nothing from the source)r   zFailed to resize video: )$r   get_active_trim_windowboolr   r   r   r   get_frame_raters   rr   r   r   r   r   r   r   r   rc   	time_baser[   rM   r   reformat
VideoFramer   
to_ndarrayr   r   r   r   r   r   r   r   r   r   )r   r   out_wout_hr   r   r   
start_timedurationtrimmingr   r   r   r   in_video	start_ptsend_ptsencodedr   r   audio_framer   s                         r   r   r     s   LEIMO !779JJ*(+HHG..0'',S977=s5I'226@T@T@V2W"#(%--F&"..11/::5vGYGY:Z+1+=+=(&,mm# . #**003<DC
%7%778!	GO#z,0B0BBCUY   <$++!+4E99$		I(=&599+?NNYNOE MM..u/?/?y/?/QZc.dE&--e4 $$V, 5qLG 5 #))+F  ( , a<8KPXz Z4 4 
 #  #.55A5>"''/;3C3Cj3P K$4$4z7L$L"&*11+>F$((0 ?  ? '--/ $$V, 0 	 1&&}55 G&!!#'""$5c!fX>?QFGs   BN J7N 
O>OOwavc                 0   U R                   R                  (       a  U $ U R                   [        R                  :X  a  U R	                  5       S-  $ U R                   [        R
                  :X  a  U R	                  5       S-  $ [        SU R                    35      e)zOConvert audio to float 32 bits PCM format. Copy-paste from nodes_audio.py file.i   l        zUnsupported wav dtype: )r   is_floating_pointr   int16r   int32r   )r   s    r   _f32_pcmr  (  sm    
yy""
	ekk	!yy{e$$	ekk	!yy{e$$
.syyk:
;;r   r   c                    [         R                  " [        U 5      5       nUR                  R                  (       d  [        S5      eUR                  R                  S   n[        UR                  R                  5      nUn/ nUR                  =(       d    SnUR                  UR                  S9 H  nUR                  5       n[        R                  " U5      n	U	R                  S:X  a  U	R!                  S5      n	OU	R"                  S   U:w  a4  U	R"                  S   U:X  a!  U	R%                  SS5      R'                  5       n	OAU	R"                  S   U:w  a.  U	R)                  SU5      R+                  5       R'                  5       n	UR-                  U	5        M     SSS5        W(       d  [        S5      e[        R.                  " USS9n
[1        U
5      n
U
R!                  S5      R'                  5       WS	.$ ! , (       d  f       Nb= f)
z
Decode any common audio container from bytes using PyAV and return
a Comfy AUDIO dict: {"waveform": [1, C, T] float32, "sample_rate": int}.
z"No audio stream found in response.r   r	   )r   r    NzDecoded zero audio frames.r$   )r   r   )r   r   r   r   r   r   rc   codec_contextr   r   rM   indexr   r   r   r   r   r&   	transposer   r   tappendr(   r  )r   afr   in_srout_srframes
n_channelsr   arrbufr   s              r   audio_bytes_to_audio_inputr  3  s   
 
%	&"zzABB!!!$F((445%'__)
YYv||Y4E""$C""3'Cxx1}mmA&1+		"0KmmAq)4461+kk"j1335@@BMM# 5 
', 566
))F
"C
3-Ca(335fMM7 
'	&s   E9G55
Hmaskc                    UR                   u  pgpU R                  S5      n U R                  SS5      n [        XXrUS9n U R                  SS5      n U(       d  U R	                  S5      n U(       d  U S:  R                  5       n U $ )zaResize mask to be the same dimensions as an image, while maintaining proper format for API calls.r    r	   )rs   rr   upscale_methodcropg      ?)r&   r   r'   r   r@   r   )	r  r   r  r  allow_gradientadd_channel_dim_rr   rs   s	            r   resize_mask_to_imager  V  sz      ++Au>>"D<<AD$F`deD<<2D||Bs
!!#Kr   c                 V    U R                  S5      n [        R                  " U /S-  SS9$ )z]Make mask have the expected amount of dims (4) and channels (3) to be recognized as an image.r    r;   r$   )r   r   r(   )r  s    r   convert_mask_to_imager  k  s'    >>"D99dVaZR((r   filepathc                     [        U S5       nUR                  5       nSSS5        [        R                  " W5      R	                  S5      $ ! , (       d  f       N3= f)z(Converts a text file to a base64 string.rbNrI   )r   readrK   rL   rM   )r  ffile_contents      r   text_filepath_to_base64_stringr$  q  sC    	h	vvx 
L)0099 
	s   A
Ac                 b    [        U 5      n[        R                  " U 5      u  p#Uc  SnSU SU 3$ )z#Converts a text file to a data URI.zapplication/octet-streamr{   r|   )r$  	mimetypes
guess_type)r  r}   r-   r  s       r   text_filepath_to_data_urir(  x  s<    28<M''1LI.	9+Xm_55r   )RGBA)r*   )r*   r+   )r+   )i   )r   r   )NN)znearest-exactrl   TF);rK   r   r`   r&  r4   ior   r   rA   r   r   PILr   comfy.utilsr   comfy_api.latestr   r   r   _helpersr
   r   Tensorr   r)   rc   r9   r2   rQ   r3   tupleri   r?   ry   r~   Audior   Videor   r   r   ndarrayr   r   r   r   r   r   r   r   r   r  bytesdictr  r  r  r$  r(   r   r   <module>r7     sC         	    & 4 4 +67 6# 65<< 6(.u|| .U\\ .ell .(  +'	<< * Tz	
 2
 
S4Z 
RWR]R] 
  + !,,!*! ! 		!.  g 3 s # %PSUXPX/\`J` "W%,, Wc WTYT`T` Well  QVQ]Q] "  + 6,,6*6 6 		6&9%++ 9 9Z] 9jm 9 59%)G;;G**T1G d"G 		G4 "	::  	
 > "** 4ekk g .iEekk iE iE5;; iEX
1 
1# 
1%++ 
1 S  cSVhZ^H^ (1u{{ 1 1 1TGekk TGuS#X TG5;; TGn<%,, <5<< < NE  Nd  NL #	
,,<<*) ) ):S :S :6 6 6r   