
    
3j                        S SK Jr  S SKJr  S SKJr  S SKrS SKr	S SK
r
S SKJr  SSKJrJr  \" \5      r\" 5       r\(       a  S SKrO\" S5      eS	\R(                  R*                  S
\S\R.                  R0                  4S jrS	\R(                  R*                  S\R.                  R0                  S\R4                  SS4S jrS	\R(                  R*                  S\R.                  R0                  S\
R8                  S
\SS4
S jr SS\\	R>                  R>                     \R@                  -  \
R8                  -  \\
R8                     -  S\S\
R8                  S
\S\!S\SS4S jjr"g)    )Iterator)Fraction)chainN)tqdm   )
get_loggeris_av_availablez`PyAV is required to use LTX 2.0 video export utilities. You can install it with `pip install av`	containeraudio_sample_ratereturnc                     U R                  SUS9nXR                  l        SUR                  l        [	        SU5      UR                  l        U$ )z'
Prepare the audio stream for writing.
aacratestereo   )
add_streamcodec_contextsample_ratelayoutr   	time_base)r
   r   audio_streams      _/home/wildlama/miniconda3/lib/python3.13/site-packages/diffusers/pipelines/ltx2/export_utils.py_prepare_audio_streamr   (   sP     ''4E'FL->*(0L%+3A7H+IL(    r   frame_inc                 (   UR                   nUR                  =(       d    SnUR                  =(       d    SnUR                  =(       d    UR                  n[        R
                  R                  R                  UUUS9nSnUR                  U5       HU  n	U	R                  c  Xl	        XR                  -  nUR                  U	l        U R                  UR                  U	5      5        MW     UR                  5        H  n
U R                  U
5        M     g )Nfltpr   )formatr   r   r   )r   r   r   r   avaudio	resamplerAudioResamplerresampleptssamplesmuxencode)r
   r   r   cctarget_formattarget_layouttarget_rateaudio_resampleraudio_next_ptsrframepackets              r   _resample_audior1   3   s     
	#	#B II'MII)M..8H$8$8Khh((77 8 O N!**84::'J..(%11l))&12 5 %%'f (r   r&   c                 x   UR                   S:X  a	  US S 2S 4   nUR                  S   S:w  a  UR                  S   S:X  a  UR                  nUR                  S   S:w  a  [        SUR                   S35      eUR                  [
        R                  :w  a:  [
        R                  " USS5      nUS-  R                  [
        R                  5      n[        R                  R                  UR                  5       R                  SS	5      R                  5       R                  5       S
SS9nX4l        [#        XU5        g )Nr      r   z,Expected samples with 2 channels; got shape .g      g      ?g    @s16r   )r   r   )ndimshapeT
ValueErrordtypetorchint16cliptor    
AudioFramefrom_ndarray
contiguousreshapecpunumpyr   r1   )r
   r   r&   r   r   s        r   _write_audiorF   P   s    ||q!T'"}}Q1q!1Q!6))}}Q1GVWXYY }}#**WdC0W$((5}}))$$Q+//1779 * H
 -IX6r   videofpsr!   output_pathvideo_chunks_numberc                    [        U [        5      (       a~  [        U S   [        R                  R                  5      (       aR  U  Vs/ s H  n[        R
                  " U5      PM     nn[        R                  " USS9n [        R                  " U 5      n O[        U [        R                  5      (       a  [        R                  " [        R                  " U 5      U :*  U [        R                  " U 5      :*  5      n[        R                  " U5      (       a#  U S-  R                  5       R                  S5      n O[         R#                  S5        [        R                  " U 5      n [        U [        R$                  5      (       a   [        R&                  " XSS9n [)        U 5      n [+        U 5      n	U	R,                  u  pp[.        R0                  " USS9nUR3                  S	[5        U5      S
9nXl        Xl        SUl        Ub  Uc  [=        S5      e[?        X5      n[A        [C        U	/U 5      USS9 Hr  nURE                  S5      RG                  5       nU HJ  n[.        RH                  RK                  USS9nURM                  U5       H  nURO                  U5        M     ML     Mt     URM                  5        H  nURO                  U5        M     Ub  [Q        UWX#5        URS                  5         gs  snf )a  
Encodes a video with audio using the PyAV library. Based on code from the original LTX-2 repo:
https://github.com/Lightricks/LTX-2/blob/4f410820b198e05074a1e92de793e3b59e9ab5a0/packages/ltx-pipelines/src/ltx_pipelines/utils/media_io.py#L182

Args:
    video (`List[PIL.Image.Image]` or `np.ndarray` or `torch.Tensor`):
        A video tensor of shape [frames, height, width, channels] with integer pixel values in [0, 255]. If the
        input is a `np.ndarray`, it is expected to be a float array with values in [0, 1] (which is what pipelines
        usually return with `output_type="np"`).
    fps (`int`)
        The frames per second (FPS) of the encoded video.
    audio (`torch.Tensor`, *optional*):
        An audio waveform of shape [audio_channels, samples].
    audio_sample_rate: (`int`, *optional*):
        The sampling rate of the audio waveform. For LTX 2, this is typically 24000 (24 kHz).
    output_path (`str`):
        The path to save the encoded video to.
    video_chunks_number (`int`, *optional*, defaults to `1`):
        The number of chunks to split the video into for encoding. Each chunk will be encoded separately. The
        number of chunks to use often depends on the tiling config for the video VAE.
r   )axis   uint8zSupplied `numpy.ndarray` does not have values in [0, 1]. The values will be assumed to be pixel values in [0, ..., 255] and will be used as is.)dimw)modelibx264r   yuv420pNz4audio_sample_rate is required when audio is providedzEncoding video chunks)totaldescrD   rgb24)r   )*
isinstancelistPILImagenparraystackr<   
from_numpyndarraylogical_and
zeros_like	ones_likeallroundastypeloggerwarningTensortensor_splititernextr8   r    openr   intwidthheightpix_fmtr:   r   r   r   r?   rE   
VideoFramerA   r(   r'   rF   close)rG   rH   r!   r   rI   rJ   framevideo_framesis_denormalizedfirst_chunk_ro   rn   r
   streamr   video_chunkvideo_chunk_cpuframe_arrayr0   s                       r   encode_videor|   n   sQ   : %:eAh		#H#H5:;UEU;A.  '	E2::	&	&..u)=)FQSQ]Q]^cQdHde66/""S['')009ENNB   '%&&""51EUu+K%++Au#.I!!)#c(!;FLMFN$STT,YJE;-7?RYpq%../557*KMM..{7.KE --.f% / + r --/f " YeGOOe <s    K,)r   )#collections.abcr   	fractionsr   	itertoolsr   rE   r[   	PIL.ImagerY   r<   r   utilsr   r	   __name__rf   _CAN_USE_AVr    ImportErrorr
   	Containerrm   r!   AudioStreamr   r@   r1   rh   rF   rX   rZ   r_   strr|    r   r   <module>r      s    %       0 
H	 
j 
R\\%;%; PS XZX`X`XlXl ||%%57XX5I5IUWUbUb	:7||%%7((&&7 \\7 	7
 
7H  !Q		 2::-<x?UUQ	Q <<Q 	Q
 Q Q 
Qr   