
    +j^                        d dl Z d dlZd dlZd dlZd dlZd dlmZ d dlZd dlZ	d dl
Z
d dlmZ d dlmZ d dlmZmZmZ ddlmZ dJd	ed
ede
j        fdZde
j        de
j        de
j        fdZdddde
j        dedz  dedz  defdZdKde
j        dedz  dej        fdZ	 	 dLde
j        dedz  dedefdZdMdej        dedefdZdedededeeef         dz  fdZdNde
j        dede
j        fd Z de
j        d!ede
j        fd"Z!	 	 dLde
j        dedz  dedefd#Z"dOd&ej#        d'ed(edefd)Z$	 	 dPd*ej%        d'ej&        dz  d+ej'        dz  defd,Z(	 	 dOd-e	j)        d.ed'ed(edef
d/Z*d0e
j        de	j)        fd1Z+d&ej#        defd2Z,d*ej%        d3e-dej%        fd4Z.d*ej%        d5edej%        fd6Z/dedededeeef         dz  fd7Z0d*ej%        d8edej%        fd9Z1d*ej%        d:eeef         dej%        fd;Z2d<e
j        de
j        fd=Z3d>e4de5fd?Z6	 	 	 	 dQdDe
j        de
j        fdEZ7dDej        de
j        fdFZ8dGedefdHZ9dGedefdIZ:dS )R    N)BytesIO)Image)common_upscale)Input	InputImplTypes   )mimetype_to_extensionRGBAimage_bytesiomodereturnc                 
   t          j        |           }|                    |          }t          j        |                              t          j                  dz  }t          j        |          	                    d          S )a  Converts image data from BytesIO to a torch.Tensor.

    Args:
        image_bytesio: BytesIO object containing the image data.
        mode: The PIL mode to convert the image to (e.g., "RGB", "RGBA").

    Returns:
        A torch.Tensor representing the image (1, H, W, C).

    Raises:
        PIL.UnidentifiedImageError: If the image data cannot be identified.
        ValueError: If the specified mode is invalid.
    g     o@r   )
r   openconvertnparrayastypefloat32torch
from_numpy	unsqueeze)r   r   imageimage_arrays       @/home/wildlama/comfy/ComfyUI/comfy_api_nodes/util/conversions.pybytesio_to_image_tensorr      se     J}%%EMM$E(5//((44u<KK((221555    image1image2c                    | j         dd         |j         dd         k    rQt          |                    dd          | j         d         | j         d         dd                              dd          }t          j        | |fd          S )	z
    Converts a pair of image tensors to a batch tensor.
    If the images are not the same size, the smaller image is resized to
    match the larger image.
    r	   N   bilinearcenterr   dim)shaper   movedimr   cat)r   r   s     r   image_tensor_pair_to_batchr*   '   s     |ABB6<+++NN2q!!LOLO
 
 '!R.. 	 9ff%1----r     @ 	image/png)total_pixels	mime_typer   r-   r.   c                    |sd}t          | |          }t          ||          }t          j                     dt	          |           |_        |S )a  Converts a torch.Tensor image to a named BytesIO object.

    Args:
        image: Input torch.Tensor image.
        total_pixels: Maximum total pixels for downscaling. If None, no downscaling is performed.
        mime_type: Target image MIME type (e.g., 'image/png', 'image/jpeg', 'image/webp', 'video/mp4').

    Returns:
        Named BytesIO object containing the image data, with pointer set to the start of buffer.
    r,   r-   r.   .)tensor_to_pilpil_to_bytesiouuiduuid4r
   name)r   r-   r.   	pil_image
img_binarys        r   tensor_to_bytesior:   8   s_        	e,???I	Y???JJJ(=i(H(HJJJOr   c                 n   t          | j                  dk    r| d         } |                                 }|6t          |                    d          |                                          }|                                dz                      t          j	                  }t          j        |          }|S )zVConverts a single torch.Tensor image [H, W, C] to a PIL Image, optionally downscaling.   r   Nr0      )lenr'   cpudownscale_image_tensorr   squeezenumpyr   r   uint8r   	fromarray)r   r-   input_tensorimage_npimgs        r   r3   r3   Q   s    
5;!a99;;L-l.D.DQ.G.GVbccckkmm""$$s*2228<<H
/(
#
#CJr   image_tensorc                     t          | |          }t          ||          }|                                }t          j        |                              d          }|S )a  Convert [B, H, W, C] or [H, W, C] tensor to a base64 string.

    Args:
        image_tensor: Input torch.Tensor image.
        total_pixels: Maximum total pixels for downscaling. If None, no downscaling is performed.
        mime_type: Target image MIME type (e.g., 'image/png', 'image/jpeg', 'image/webp', 'video/mp4').

    Returns:
        Base64 encoded string of the image.
    r0   r1   utf-8)r3   r4   getvaluebase64	b64encodedecode)rH   r-   r.   r8   img_byte_arr	img_bytesbase64_encoded_strings          r   tensor_to_base64_stringrR   ^   s^     lFFFI!)yAAAL%%''I",Y77>>wGG  r   rG   c                     |sd}t                      }|                    d          d                                         }|dk    rd}|                     ||           |                    d           |S )z)Converts a PIL Image to a BytesIO object.r,   /r!   JPGJPEGformatr   )r   splituppersaveseek)rG   r.   rO   
pil_formats       r   r4   r4   u   sz      	99L%%b)//11JU
HH\*H---ar   src_wsrc_hc                     | |z  }||k    rdS t          j        ||z            }t          dt          | |z                      }t          dt          ||z                      }||dz  z  }||dz  z  }||fS )a  Return downscaled (w, h) with even dims fitting ``total_pixels``, or None if already fits.

    Source aspect ratio is preserved; output may drift by a fraction of a percent because both dimensions
    are rounded down to even values (many  codecs require divisible-by-2).
    Nr"   )mathsqrtmaxintr^   r_   r-   pixelsscalenew_wnew_hs          r   _compute_downscale_dimsrj      s     U]FtIlV+,,E3uu}%%&&E3uu}%%&&E	UQYE	UQYE%<r      c                     |                      dd          }t          |j        d         |j        d         t          |                    }|| S |\  }}t	          |||dd                               dd          S )a  Downscale input image tensor to roughly the specified total pixels.

    Output dimensions are rounded down to even values so that the result is guaranteed to fit within ``total_pixels``
    and is compatible with codecs that require even dimensions (e.g. yuv420p).
    r!   r	   r<   r"   Nlanczosdisabled)r(   rj   r'   rd   r   )r   r-   samplesdimsrh   ri   s         r   r@   r@      sy     mmB""G"7=#3W]15Es<GXGXYYD|LE5'5%JGGOOPQSUVVVr   max_sidec                :   |                      dd          }|j        d         |j        d         }}t          ||          }||k    r| S ||z  }t          ||z            }t          ||z            }t	          |||dd          }	|	                     dd          }	|	S )zQDownscale input image tensor so the largest dimension is at most max_side pixels.r!   r	   r"   r<   rm   rn   )r(   r'   rc   roundr   )
r   rq   ro   heightwidthmax_dimscale_by	new_width
new_heightss
             r   "downscale_image_tensor_by_max_sider{      s    mmB""GM!$gmA&6EF%  G('!Heh&''Iv())Jw	:y*MMA			!RAHr   c                 4    t          | ||          }d| d| S )au  Converts a tensor image to a Data URI string.

    Args:
        image_tensor: Input torch.Tensor image.
        total_pixels: Maximum total pixels for downscaling. If None, no downscaling is performed.
        mime_type: Target image MIME type (e.g., 'image/png', 'image/jpeg', 'image/webp').

    Returns:
        Data URI string (e.g., 'data:image/png;base64,...').
    data:;base64,)rR   )rH   r-   r.   base64_strings       r   tensor_to_data_urir      s+     ,L,	RRM5955m555r   mp4aacaudiocontainer_format
codec_namec                     | d         }| d         }t          |          }t          ||||          }|                                }t          j        |                              d          S )z+Converts an audio input to a base64 string.sample_ratewaveformrJ   )"audio_tensor_to_contiguous_ndarrayaudio_ndarray_to_bytesiorK   rL   rM   rN   )r   r   r   r   r   audio_data_npaudio_bytes_ioaudio_bytess           r   audio_to_base64_stringr      sh    ]+K":.H6x@@M-m[JZ\fggN ))++KK((//888r   videocodecc           	      j   t                      }|                     ||pt          | dt          j        j                  |pt          | dt          j        j                             |                    d           t          j
        |                                                              d          S )a  
    Converts a video input to a base64 string.

    Args:
        video: The video input to convert
        container_format: Optional container format to use (defaults to video.container if available)
        codec: Optional codec to use (defaults to video.codec if available)
    	containerr   )rX   r   r   rJ   )r   save_togetattrr   VideoContainerMP4
VideoCodecH264r\   rL   rM   rK   rN   )r   r   r   video_bytes_ios       r   video_to_base64_stringr      s     YYN	MMX75+u?S?W#X#XEwugu/?/DEE    
 N335566==gFFFr   r   r   c                    t                      }t          j        |d|          5 }|                    ||          }t          j                            | d| j        d         dk    rdnd	          }||_        d|_        |	                    |          D ]}|
                    |           |	                    d
          D ]}|
                    |           	 d
d
d
           n# 1 swxY w Y   |                    d           |S )zD
    Encodes a numpy array of audio data into a BytesIO object.
    wr   rX   ratefltpr   r	   stereomonorX   layoutN)r   avr   
add_stream
AudioFramefrom_ndarrayr'   r   ptsencodemuxr\   )	r   r   r   r   r   output_containeraudio_streamframepackets	            r   r   r      sg    YYN	c2B	C	C	C )GW'22:K2PP**,2159988v + 
 

 (	"))%00 	) 	)F  (((( #))$// 	) 	)F  ((((	)) ) ) ) ) ) ) ) ) ) ) ) ) ) )" s   B5C((C,/C,r   c                    | j         dk    s| j        d         dk    rt          d          | j        d         dk    r| d         } |                     d                                                                                                          }|j        t          j	        k    r|
                    t          j	                  }|S )aG  
    Prepares audio waveform for av library by converting to a contiguous numpy array.

    Args:
        waveform: a tensor of shape (1, channels, samples) derived from a Comfy `AUDIO` type.

    Returns:
        Contiguous numpy array of the audio waveform. If the audio was batched,
            the first item is taken.
    r<   r   r	   z5Expected waveform tensor shape (1, channels, samples))ndimr'   
ValueErrorrA   r?   
contiguousrB   dtyper   r   r   )r   r   s     r   r   r     s     }X^A.!33PQQQ ~a1A; $$Q''++--88::@@BBMbj((%,,RZ88r   c                    | d                                          }t                      }t          j        |dd          }|                    d| d                   }d|_        t          j                            |                    d	d
          	                    d
d          
                                                                d|j        d	         d
k    rdnd          }| d         |_        d	|_        |                    |                    |                     |                    |                    d                      |                                 |                    d	           |S )Nr   r   mp3r   
libmp3lamer   r   i  r   r	   r!   fltr   r   r   )r?   r   r   r   r   bit_rater   r   r(   reshapefloatrB   r'   r   r   r   r   closer\   )r   r   output_bufferr   
out_streamr   s         r   audio_input_to_mp3r     sU   Z $$&&HIIMw}3uEEE!,,\m@T,UUJ JM&&A&&q"--3355;;==!*a//vvX '  E
 m,EEI**511222**400111qr   duration_secc                 n   t                      }d}d}	 |                                 }t          j        |d          }t          j        |dd          }d}d}|j        D ]}t          j        d|j        t          |                     t          |t          j	                  rb|
                    d|j        	          }|j        |_        |j        |_        d
|_        t          j        d|j        |j        |j                   t          |t          j                  rT|
                    d|j        	          }|j        |_        |j        |_        t          j        d|j        |j                   |j        j        d         j        }	t)          ||	z            }
|
dz  dz  }|dk    rt+          d          d}d}|r|                    d          D ]<}||k    r n3|                    |          D ]}|                    |           |dz  }=|                                D ]}|                    |           t          j        d||           |r|                    d           |                    d          D ]A}|j        |k    r n3|                    |          D ]}|                    |           |dz  }B|                                D ]}|                    |           t          j        d|           |                                 |                                 |                    d           t9          j        |          S # t<          $ rQ}||                                 ||                                 t?          dtA          |                     |d}~ww xY w)aQ  
    Returns a new VideoInput object trimmed from the beginning to the specified duration,
    using av to avoid loading entire video into memory.

    Args:
        video: Input video to trim
        duration_sec: Duration in seconds to keep from the beginning

    Returns:
        VideoFromFile object that owns the output buffer
    Nrr   r   r   r   zFound stream: type=%s, class=%sh264r   yuv420pz!Added video stream: %sx%s @ %sfpsr   z%Added audio stream: %sHz, %s channelsr      z7Video too short: need at least 16 frames for Moonvalleyr   r	   z$Encoded %s video frames (target: %s)r   zEncoded %s audio frameszFailed to trim video: )!r   get_stream_sourcer   r   streamslogginginfotype
isinstanceVideoStreamr   average_rateru   rt   pix_fmtAudioStreamr   r   channelsr   rd   r   rN   r   r   r\   timer   r   VideoFromFile	ExceptionRuntimeErrorstr)r   r   r   input_containerr   input_sourcevideo_streamr   streamfpsestimated_framestarget_framesframe_countaudio_frame_countr   r   es                    r   
trim_videor   6  s,    IIMOYE ..00 ',S9997=s5III %- 	k 	kFL:FKfVVV&".11 k/::6H[:\\%+\"&,m#'0$@&,PVP]_e_rssssFBN33 k/::5vGY:ZZ+1+=(&,m#DfFXZ`Zijjj %+A.;|c122)R/25AVWWW  	](//a/88 ! !-//E +11%88 1 1F$((0000q  '--// - - $$V,,,,L?m\\\  	G  ###(//a/88 ' ':--E +11%88 1 1F$((0000!Q&!! '--// - - $$V,,,,L24EFFF 	    	1&}555 E E E&!!###'""$$$<CFF<<==1DEs   MM 
N4#AN//N4
max_pixelsc                 z    |                                  \  }}t          |||          }|| S t          | |          S )a  Downscale a video to fit within ``max_pixels`` (w * h), preserving aspect ratio.

    Returns the original video object untouched when it already fits. Preserves frame rate, duration, and audio.
    Aspect ratio is preserved up to a fraction of a percent (even-dim rounding).
    )get_dimensionsrj   _apply_video_scale)r   r   r^   r_   
scale_dimss        r   downscale_video_to_max_pixelsr     sE     ''))LE5(zBBJeZ000r   c                     | |z  }||k    rdS t          j        ||z            }t          j        | |z            }t          j        ||z            }|dz  r|dz  }|dz  r|dz  }||fS )a_  Return upscaled (w, h) with even dims meeting at least ``total_pixels``, or None if already large enough.

    Source aspect ratio is preserved; output may drift by a fraction of a percent because both dimensions
    are rounded up to even values (many codecs require divisible-by-2). The result is guaranteed to be at
    least ``total_pixels``.
    Nr"   r	   )ra   rb   ceilre   s          r   _compute_upscale_dimsr     s     U]FtIlV+,,EIeem$$EIeem$$Eqy 
qy 
%<r   
min_pixelsc                 z    |                                  \  }}t          |||          }|| S t          | |          S )a  Upscale a video to meet at least ``min_pixels`` (w * h), preserving aspect ratio.

    Returns the original video object untouched when it already meets the minimum. Preserves frame rate,
    duration, and audio. Aspect ratio is preserved up to a fraction of a percent (even-dim rounding).
    Note: upscaling a low-resolution source does not add real detail; downstream model quality may suffer.
    )r   r   r   )r   r   r^   r_   r   s        r   upscale_video_to_min_pixelsr     sE     ''))LE5&ueZ@@JeZ000r   r   c                    |\  }}t                      }d}d}|                                 \  }}t          |p|          }		 |                                 }
t	          j        |
d          }t	          j        |dd          }|                    d|                                           }||_        ||_	        d	|_
        d}|j        D ]R}t          |t          j                  r6|                    d
|j                  }|j        |_        |j        |_         nS|j        j        d         }|	rt#          ||j        z            nd}|rt#          ||z   |j        z            nd}|r|                    ||           d}|                    d          D ]}|	r"|j        |j        |k     r||j        |k    r n|                    ||d	          }t          j                            |                    d	          d	          }|                    |          D ]}|                    |           |dz  }|                                D ]}|                    |           |dk    rt9          d| d| d          ||                    d           |                    d          D ]]}|	r%|j        |j        |k     r|r|j        ||z   k    r n5d|_        |                    |          D ]}|                    |           ^|                                D ]}|                    |           |                                 |                                 |                    d           t?          j         |          S # tB          $ rQ}||                                 ||                                 tE          dtG          |                     |d}~ww xY w)zNRe-encode ``video`` scaled to ``scale_dims`` with a single decode/encode pass.Nr   r   r   r   r   r   r   r   r   r   )r   r   )ru   rt   rX   rW   r	   z&resize produced no frames (start_time=z, duration=z" selected nothing from the source)r   zFailed to resize video: )$r   get_active_trim_windowboolr   r   r   r   get_frame_rateru   rt   r   r   r   r   r   r   r   rd   	time_baser\   rN   r   reformat
VideoFramer   
to_ndarrayr   r   r   r   r   r   r   r   r   r   )r   r   out_wout_hr   r   r   
start_timedurationtrimmingr   r   r   r   in_video	start_ptsend_ptsencodedr   r   audio_framer   s                         r   r   r     s   LE5IIMO !7799JJ*(++HHG..00',S9997=s5III'226@T@T@V@V2WW"#(%- 	 	F&".11 /::5vGY:ZZ+1+=(&,m#	 #*03<DKC
X%77888!	GOY#zH,0BBCCCUY 	=  8 <<<$++!+44 	 	E 9$	I(=(=&59+?+?ENNuYNOOE M..u/?/?y/?/Q/QZc.ddE&--e44 - - $$V,,,,qLGG"))++ 	) 	)F  ((((a<<4 4 4PX 4 4 4  
 #  ###.55A5>> 	1 	1 "'/;3Cj3P3P  K$4zH7L$L$L"&*11+>> 1 1F$((00001&--// - - $$V,,,,   1&}555 G G G&!!###'""$$$>c!ff>>??QFGs   MN 
O)AO$$O)wavc                     | j         j        r| S | j         t          j        k    r|                                 dz  S | j         t          j        k    r|                                 dz  S t          d| j                    )zOConvert audio to float 32 bits PCM format. Copy-paste from nodes_audio.py file.i   l        zUnsupported wav dtype: )r   is_floating_pointr   int16r   int32r   )r  s    r   _f32_pcmr  (  sq    
y" %
	ek	!	!yy{{e$$	ek	!	!yy{{e$$
:sy::
;
;;r   r   c                     t          j        t          |                     5 }|j        j        st          d          |j        j        d         }t          |j        j                  }|}g }|j	        pd}|
                    |j                  D ]}|                                }t          j        |          }	|	j        dk    r|	                    d          }	n|	j        d         |k    r:|	j        d         |k    r)|	                    dd                                          }	nK|	j        d         |k    r:|	                    d|                                                                          }	|                    |	           	 ddd           n# 1 swxY w Y   |st          d          t          j        |d          }
t1          |
          }
|
                    d                                          |d	S )
z
    Decode any common audio container from bytes using PyAV and return
    a Comfy AUDIO dict: {"waveform": [1, C, T] float32, "sample_rate": int}.
    z"No audio stream found in response.r   r	   )r   r!   NzDecoded zero audio frames.r%   )r   r   )r   r   r   r   r   r   rd   codec_contextr   r   rN   indexr   r   r   r   r   r'   	transposer   r   tappendr)   r  )r   afr   in_srout_srframes
n_channelsr   arrbufr  s              r   audio_bytes_to_audio_inputr  3  s   
 
%%	&	& "z 	CABBB!!$F(455%'_)
YYv|Y44 		 		E""$$C"3''Cx1}}mmA&&1++	"0K0KmmAq))44661++kk"j113355@@BBMM#		              ,  75666
)F
"
"
"C
3--Ca((3355fMMMs   E&FFFnearest-exactrn   TFmaskc                 *   |j         \  }}}}|                     d          } |                     dd          } t          | ||||          } |                     dd          } |s|                     d          } |s| dk                                    } | S )zaResize mask to be the same dimensions as an image, while maintaining proper format for API calls.r!   r	   )ru   rt   upscale_methodcropg      ?)r'   r   r(   r   rA   r   )	r  r   r  r  allow_gradientadd_channel_dim_rt   ru   s	            r   resize_mask_to_imager  V  s      +Avua>>"D<<AD$eF>`deeeD<<2D  ||B $s
!!##Kr   c                 `    |                      d          } t          j        | gdz  d          S )z]Make mask have the expected amount of dims (4) and channels (3) to be recognized as an image.r!   r<   r%   )r   r   r)   )r  s    r   convert_mask_to_imager   k  s.    >>"D9dVaZR((((r   filepathc                     t          | d          5 }|                                }ddd           n# 1 swxY w Y   t          j        |                              d          S )z(Converts a text file to a base64 string.rbNrJ   )r   readrL   rM   rN   )r!  ffile_contents      r   text_filepath_to_base64_stringr'  q  s    	h		  vvxx                             L))00999s   266c                 f    t          |           }t          j        |           \  }}|d}d| d| S )z#Converts a text file to a data URI.Nzapplication/octet-streamr}   r~   )r'  	mimetypes
guess_type)r!  r   r.   r  s       r   text_filepath_to_data_urir+  x  sD    28<<M'11LIq.	5955m555r   )r   )r+   )r+   r,   )r,   )rk   )r   r   )NN)r  rn   TF);rL   r   ra   r)  r5   ior   r   rB   r   r   PILr   comfy.utilsr   comfy_api.latestr   r   r   _helpersr
   r   Tensorr   r*   rd   r:   r3   rR   r4   tuplerj   r@   r{   r   Audior   Videor   r   r   ndarrayr   r   r   r   r   r   r   r   r   r  bytesdictr  r  r   r'  r+   r   r   <module>r9     sc                 				            & & & & & & 4 4 4 4 4 4 4 4 4 4 + + + + + +6 67 6# 65< 6 6 6 6(.u| .U\ .el . . . .(  +'	  < * Tz	
    2
 
 
S4Z 
RWR] 
 
 
 
  + ! !,!*! ! 		! ! ! !.   g    3 s # %PSUXPX/\`J`    "W W%, Wc WTYT` W W W Wel  QVQ]    "  + 6 6,6*6 6 		6 6 6 6&9 9%+ 9 9Z] 9jm 9 9 9 9 59%)G G;G*T1G d"G 		G G G G4 "	 :  	
    > "*    4ek g    .iEek iE iE5; iE iE iE iEX
1 
1# 
1%+ 
1 
1 
1 
1 S  cSVhZ^H^    (1u{ 1 1 1 1 1 1TGek TGuS#X TG5; TG TG TG TGn<%, <5< < < < < NE  Nd  N  N  N  NL #	 
,<   *) ) ) ) ) ):S :S : : : :6 6 6 6 6 6 6 6r   