
    
3j]                        S SK r SSKJr  SSKJrJrJr  SSKJrJ	r	J
r
  SSKJrJrJrJrJrJrJrJr  SS	KJr  SS
KJrJr  SSKJrJrJr  \R:                  " \5      r " S S\5      r  " S S\5      r! " S S\5      r" " S S\5      r# " S S\5      r$\	" S\" 5       4S\ " 5       4S\$" 5       4S\" 5       4/5      r% " S S\5      r&g)    N   )logging   )AutoPipelineBlocksConditionalPipelineBlocksSequentialPipelineBlocks)
InputParamInsertableDictOutputParam   )HeliosAdditionalInputsStep HeliosAddNoiseToImageLatentsStep HeliosAddNoiseToVideoLatentsStepHeliosI2VSeedHistoryStepHeliosPrepareHistoryStepHeliosSetTimestepsStepHeliosTextInputStepHeliosV2VSeedHistoryStep)HeliosDecodeStep)HeliosChunkDenoiseStepHeliosI2VChunkDenoiseStep)HeliosImageVaeEncoderStepHeliosTextEncoderStepHeliosVideoVaeEncoderStepc                   @    \ rS rSrSr\\/rSS/rSS/r	\
S 5       rSrg	)
HeliosAutoVaeEncoderStep,   a  
Encoder step that encodes video or image inputs. This is an auto pipeline block.
   - `HeliosVideoVaeEncoderStep` (video_encoder) is used when `video` is provided.
   - `HeliosImageVaeEncoderStep` (image_encoder) is used when `image` is provided.
   - If neither is provided, step will be skipped.

  Components:
      vae (`AutoencoderKLWan`) video_processor (`VideoProcessor`)

  Inputs:
      video (`None`, *optional*):
          Input video for video-to-video generation
      height (`int`, *optional*, defaults to 384):
          The height in pixels of the generated image.
      width (`int`, *optional*, defaults to 640):
          The width in pixels of the generated image.
      num_latent_frames_per_chunk (`int`, *optional*, defaults to 9):
          Number of latent frames per temporal chunk.
      generator (`Generator`, *optional*):
          Torch generator for deterministic generation.
      image (`Image | list`, *optional*):
          Reference image(s) for denoising. Can be a single image or list of images.

  Outputs:
      image_latents (`Tensor`):
          The latent representation of the input image.
      video_latents (`Tensor`):
          Encoded video latents (chunked)
      fake_image_latents (`Tensor`):
          Fake image latents for history seeding
video_encoderimage_encodervideoimagec                      g)Na#  Encoder step that encodes video or image inputs. This is an auto pipeline block.
 - `HeliosVideoVaeEncoderStep` (video_encoder) is used when `video` is provided.
 - `HeliosImageVaeEncoderStep` (image_encoder) is used when `image` is provided.
 - If neither is provided, step will be skipped. selfs    r/home/wildlama/miniconda3/lib/python3.13/site-packages/diffusers/modular_pipelines/helios/modular_blocks_helios.pydescription$HeliosAutoVaeEncoderStep.descriptionQ   s    ?	
    r#   N)__name__
__module____qualname____firstlineno____doc__r   r   block_classesblock_namesblock_trigger_inputspropertyr'   __static_attributes__r#   r)   r&   r   r   ,   s<    @ /0IJM"O4K#W-
 
r)   r   c                   P    \ rS rSrSrSr\\\\	/r
/ SQr\S 5       r\S 5       rSrg)	HeliosCoreDenoiseStepb   az  
Denoise block that takes encoded conditions and runs the chunk-based denoising process.

  Components:
      transformer (`HeliosTransformer3DModel`) scheduler (`HeliosScheduler`) guider (`ClassifierFreeGuidance`)

  Inputs:
      num_videos_per_prompt (`int`, *optional*, defaults to 1):
          Number of videos to generate per prompt.
      prompt_embeds (`Tensor`):
          text embeddings used to guide the image generation. Can be generated from text_encoder step.
      negative_prompt_embeds (`Tensor`, *optional*):
          negative text embeddings used to guide the image generation. Can be generated from text_encoder step.
      height (`int`, *optional*, defaults to 384):
          The height in pixels of the generated image.
      width (`int`, *optional*, defaults to 640):
          The width in pixels of the generated image.
      num_frames (`int`, *optional*, defaults to 132):
          Total number of video frames to generate.
      num_latent_frames_per_chunk (`int`, *optional*, defaults to 9):
          Number of latent frames per temporal chunk.
      history_sizes (`list`, *optional*, defaults to [16, 2, 1]):
          Sizes of long/mid/short history buffers for temporal context.
      keep_first_frame (`bool`, *optional*, defaults to True):
          Whether to keep the first frame as a prefix in history.
      num_inference_steps (`int`, *optional*, defaults to 50):
          The number of denoising steps.
      sigmas (`list`, *optional*):
          Custom sigmas for the denoising process.
      generator (`Generator`, *optional*):
          Torch generator for deterministic generation.
      latents (`Tensor`, *optional*):
          Pre-generated noisy latents for image generation.
      timesteps (`Tensor`, *optional*):
          Timesteps for the denoising process.
      **denoiser_input_fields (`None`, *optional*):
          conditional model inputs for the denoiser: e.g. prompt_embeds, negative_prompt_embeds, etc.
      attention_kwargs (`dict`, *optional*):
          Additional kwargs for attention processors.

  Outputs:
      latent_chunks (`list`):
          List of per-chunk denoised latent tensors
helios)inputprepare_historyset_timestepschunk_denoisec                     g)NzWDenoise block that takes encoded conditions and runs the chunk-based denoising process.r#   r$   s    r&   r'   !HeliosCoreDenoiseStep.description   s    hr)   c                 "    [        S[        SS9/$ Nlatent_chunksz)List of per-chunk denoised latent tensors	type_hintr'   r   listr$   s    r&   outputsHeliosCoreDenoiseStep.outputs       OtItuvvr)   r#   N)r*   r+   r,   r-   r.   
model_namer   r   r   r   r/   r0   r2   r'   rE   r3   r#   r)   r&   r5   r5   b   sT    +Z J 	M QKi i w wr)   r5   c            
           \ rS rSrSrSr\\" \R                  " S5      /\" S\
R                  SS9/S9\\\\\/r/ S	Qr\S
 5       r\S 5       rSrg)HeliosI2VCoreDenoiseStep   a
  
I2V denoise block that seeds history with image latents and uses I2V-aware chunk preparation.

  Components:
      transformer (`HeliosTransformer3DModel`) scheduler (`HeliosScheduler`) guider (`ClassifierFreeGuidance`)

  Inputs:
      num_videos_per_prompt (`int`, *optional*, defaults to 1):
          Number of videos to generate per prompt.
      prompt_embeds (`Tensor`):
          text embeddings used to guide the image generation. Can be generated from text_encoder step.
      negative_prompt_embeds (`Tensor`, *optional*):
          negative text embeddings used to guide the image generation. Can be generated from text_encoder step.
      image_latents (`Tensor`):
          image latents used to guide the image generation. Can be generated from vae_encoder step.
      fake_image_latents (`Tensor`, *optional*):
          Fake image latents used as history seed for I2V generation.
      image_noise_sigma_min (`float`, *optional*, defaults to 0.111):
          Minimum sigma for image latent noise.
      image_noise_sigma_max (`float`, *optional*, defaults to 0.135):
          Maximum sigma for image latent noise.
      video_noise_sigma_min (`float`, *optional*, defaults to 0.111):
          Minimum sigma for video/fake-image latent noise.
      video_noise_sigma_max (`float`, *optional*, defaults to 0.135):
          Maximum sigma for video/fake-image latent noise.
      generator (`Generator`, *optional*):
          Torch generator for deterministic generation.
      num_frames (`int`, *optional*, defaults to 132):
          Total number of video frames to generate.
      num_latent_frames_per_chunk (`int`, *optional*, defaults to 9):
          Number of latent frames per temporal chunk.
      history_sizes (`list`, *optional*, defaults to [16, 2, 1]):
          Sizes of long/mid/short history buffers for temporal context.
      keep_first_frame (`bool`, *optional*, defaults to True):
          Whether to keep the first frame as a prefix in history.
      num_inference_steps (`int`, *optional*, defaults to 50):
          The number of denoising steps.
      sigmas (`list`, *optional*):
          Custom sigmas for the denoising process.
      latents (`Tensor`, *optional*):
          Pre-generated noisy latents for image generation.
      timesteps (`Tensor`, *optional*):
          Timesteps for the denoising process.
      **denoiser_input_fields (`None`, *optional*):
          conditional model inputs for the denoiser: e.g. prompt_embeds, negative_prompt_embeds, etc.
      attention_kwargs (`dict`, *optional*):
          Additional kwargs for attention processors.

  Outputs:
      latent_chunks (`list`):
          List of per-chunk denoised latent tensors
r7   image_latentsfake_image_latentsz;Fake image latents used as history seed for I2V generation.rA   image_latent_inputsadditional_batch_inputs)r8   additional_inputsadd_noise_imager9   seed_historyr:   r;   c                     g)Nz]I2V denoise block that seeds history with image latents and uses I2V-aware chunk preparation.r#   r$   s    r&   r'   $HeliosI2VCoreDenoiseStep.description       nr)   c                 "    [        S[        SS9/$ r?   rC   r$   s    r&   rE    HeliosI2VCoreDenoiseStep.outputs   rG   r)   r#   N)r*   r+   r,   r-   r.   rH   r   r   r	   templatetorchTensorr   r   r   r   r   r/   r0   r2   r'   rE   r3   r#   r)   r&   rJ   rJ      s    3j J"!+!4!4_!E F(#ll ]%		
 	)  !!M$K o o w wr)   rJ   c            
           \ rS rSrSrSr\\" \R                  " S5      /\" S\
R                  SS9/S9\\\\\/r/ S	Qr\S
 5       r\S 5       rSrg)HeliosV2VCoreDenoiseStepi  av
  
V2V denoise block that seeds history with video latents and uses I2V-aware chunk preparation.

  Components:
      transformer (`HeliosTransformer3DModel`) scheduler (`HeliosScheduler`) guider (`ClassifierFreeGuidance`)

  Inputs:
      num_videos_per_prompt (`int`, *optional*, defaults to 1):
          Number of videos to generate per prompt.
      prompt_embeds (`Tensor`):
          text embeddings used to guide the image generation. Can be generated from text_encoder step.
      negative_prompt_embeds (`Tensor`, *optional*):
          negative text embeddings used to guide the image generation. Can be generated from text_encoder step.
      image_latents (`Tensor`, *optional*):
          image latents used to guide the image generation. Can be generated from vae_encoder step.
      video_latents (`Tensor`, *optional*):
          Encoded video latents for V2V generation.
      num_latent_frames_per_chunk (`int`, *optional*, defaults to 9):
          Number of latent frames per temporal chunk.
      image_noise_sigma_min (`float`, *optional*, defaults to 0.111):
          Minimum sigma for image latent noise.
      image_noise_sigma_max (`float`, *optional*, defaults to 0.135):
          Maximum sigma for image latent noise.
      video_noise_sigma_min (`float`, *optional*, defaults to 0.111):
          Minimum sigma for video latent noise.
      video_noise_sigma_max (`float`, *optional*, defaults to 0.135):
          Maximum sigma for video latent noise.
      generator (`Generator`, *optional*):
          Torch generator for deterministic generation.
      num_frames (`int`, *optional*, defaults to 132):
          Total number of video frames to generate.
      history_sizes (`list`, *optional*, defaults to [16, 2, 1]):
          Sizes of long/mid/short history buffers for temporal context.
      keep_first_frame (`bool`, *optional*, defaults to True):
          Whether to keep the first frame as a prefix in history.
      num_inference_steps (`int`, *optional*, defaults to 50):
          The number of denoising steps.
      sigmas (`list`, *optional*):
          Custom sigmas for the denoising process.
      latents (`Tensor`, *optional*):
          Pre-generated noisy latents for image generation.
      timesteps (`Tensor`, *optional*):
          Timesteps for the denoising process.
      **denoiser_input_fields (`None`, *optional*):
          conditional model inputs for the denoiser: e.g. prompt_embeds, negative_prompt_embeds, etc.
      attention_kwargs (`dict`, *optional*):
          Additional kwargs for attention processors.

  Outputs:
      latent_chunks (`list`):
          List of per-chunk denoised latent tensors
r7   rL   video_latentsz)Encoded video latents for V2V generation.rA   rN   )r8   rQ   add_noise_videor9   rS   r:   r;   c                     g)Nz]V2V denoise block that seeds history with video latents and uses I2V-aware chunk preparation.r#   r$   s    r&   r'   $HeliosV2VCoreDenoiseStep.descriptionS  rV   r)   c                 "    [        S[        SS9/$ r?   rC   r$   s    r&   rE    HeliosV2VCoreDenoiseStep.outputsW  rG   r)   r#   N)r*   r+   r,   r-   r.   rH   r   r   r	   rY   rZ   r[   r   r   r   r   r   r/   r0   r2   r'   rE   r3   r#   r)   r&   r]   r]     s    3j J"!+!4!4_!E F#u||It%	
 	)  !M K o o w wr)   r]   c                   P    \ rS rSrSr\\\/r/ SQr	SS/r
SrSS jr\S	 5       rS
rg)HeliosAutoCoreDenoiseStepi^  aD  
Core denoise step that selects the appropriate denoising block.
   - `HeliosV2VCoreDenoiseStep` (video2video) for video-to-video tasks.
   - `HeliosI2VCoreDenoiseStep` (image2video) for image-to-video tasks.
   - `HeliosCoreDenoiseStep` (text2video) for text-to-video tasks.

  Components:
      transformer (`HeliosTransformer3DModel`) scheduler (`HeliosScheduler`) guider (`ClassifierFreeGuidance`)

  Inputs:
      num_videos_per_prompt (`int`, *optional*, defaults to 1):
          Number of videos to generate per prompt.
      prompt_embeds (`Tensor`):
          text embeddings used to guide the image generation. Can be generated from text_encoder step.
      negative_prompt_embeds (`Tensor`, *optional*):
          negative text embeddings used to guide the image generation. Can be generated from text_encoder step.
      image_latents (`Tensor`, *optional*):
          image latents used to guide the image generation. Can be generated from vae_encoder step.
      video_latents (`Tensor`, *optional*):
          Encoded video latents for V2V generation.
      num_latent_frames_per_chunk (`int`, *optional*, defaults to 9):
          Number of latent frames per temporal chunk.
      image_noise_sigma_min (`float`, *optional*, defaults to 0.111):
          Minimum sigma for image latent noise.
      image_noise_sigma_max (`float`, *optional*, defaults to 0.135):
          Maximum sigma for image latent noise.
      video_noise_sigma_min (`float`, *optional*, defaults to 0.111):
          Minimum sigma for video latent noise.
      video_noise_sigma_max (`float`, *optional*, defaults to 0.135):
          Maximum sigma for video latent noise.
      generator (`Generator`, *optional*):
          Torch generator for deterministic generation.
      num_frames (`int`, *optional*, defaults to 132):
          Total number of video frames to generate.
      history_sizes (`list`):
          Sizes of long/mid/short history buffers for temporal context.
      keep_first_frame (`bool`, *optional*, defaults to True):
          Whether to keep the first frame as a prefix in history.
      num_inference_steps (`int`, *optional*, defaults to 50):
          The number of denoising steps.
      sigmas (`list`):
          Custom sigmas for the denoising process.
      latents (`Tensor`, *optional*):
          Pre-generated noisy latents for image generation.
      timesteps (`Tensor`, *optional*):
          Timesteps for the denoising process.
      **denoiser_input_fields (`None`, *optional*):
          conditional model inputs for the denoiser: e.g. prompt_embeds, negative_prompt_embeds, etc.
      attention_kwargs (`dict`, *optional*):
          Additional kwargs for attention processors.
      fake_image_latents (`Tensor`, *optional*):
          Fake image latents used as history seed for I2V generation.
      height (`int`, *optional*, defaults to 384):
          The height in pixels of the generated image.
      width (`int`, *optional*, defaults to 640):
          The width in pixels of the generated image.

  Outputs:
      latent_chunks (`list`):
          List of per-chunk denoised latent tensors
)video2videoimage2video
text2videor^   rM   rh   Nc                     Ub  gUb  gg )Nrf   rg   r#   )r%   r^   rM   s      r&   select_block&HeliosAutoCoreDenoiseStep.select_block  s    $ + r)   c                      g)Na  Core denoise step that selects the appropriate denoising block.
 - `HeliosV2VCoreDenoiseStep` (video2video) for video-to-video tasks.
 - `HeliosI2VCoreDenoiseStep` (image2video) for image-to-video tasks.
 - `HeliosCoreDenoiseStep` (text2video) for text-to-video tasks.r#   r$   s    r&   r'   %HeliosAutoCoreDenoiseStep.description  s    O	
r)   r#   )NN)r*   r+   r,   r-   r.   r]   rJ   r5   r/   r0   r1   default_block_namerj   r2   r'   r3   r#   r)   r&   re   re   ^  sH    <| ./GI^_M>K+-AB% 
 
r)   re   text_encodervae_encoderdenoisedecodec                       \ rS rSrSrSr\R                  5       r\R                  5       r
SS0SSS.SSS.S.r\S	 5       r\S
 5       rSrg)HeliosAutoBlocksi  a  
Auto Modular pipeline for text-to-video, image-to-video, and video-to-video tasks using Helios.

  Supported workflows:
    - `text2video`: requires `prompt`
    - `image2video`: requires `prompt`, `image`
    - `video2video`: requires `prompt`, `video`

  Components:
      text_encoder (`UMT5EncoderModel`) tokenizer (`AutoTokenizer`) guider (`ClassifierFreeGuidance`) vae
      (`AutoencoderKLWan`) video_processor (`VideoProcessor`) transformer (`HeliosTransformer3DModel`) scheduler
      (`HeliosScheduler`)

  Inputs:
      prompt (`str`):
          The prompt or prompts to guide image generation.
      negative_prompt (`str`, *optional*):
          The prompt or prompts not to guide the image generation.
      max_sequence_length (`int`, *optional*, defaults to 512):
          Maximum sequence length for prompt encoding.
      video (`None`, *optional*):
          Input video for video-to-video generation
      height (`int`, *optional*, defaults to 384):
          The height in pixels of the generated image.
      width (`int`, *optional*, defaults to 640):
          The width in pixels of the generated image.
      num_latent_frames_per_chunk (`int`, *optional*, defaults to 9):
          Number of latent frames per temporal chunk.
      generator (`Generator`, *optional*):
          Torch generator for deterministic generation.
      image (`Image | list`, *optional*):
          Reference image(s) for denoising. Can be a single image or list of images.
      num_videos_per_prompt (`int`, *optional*, defaults to 1):
          Number of videos to generate per prompt.
      image_latents (`Tensor`, *optional*):
          image latents used to guide the image generation. Can be generated from vae_encoder step.
      video_latents (`Tensor`, *optional*):
          Encoded video latents for V2V generation.
      image_noise_sigma_min (`float`, *optional*, defaults to 0.111):
          Minimum sigma for image latent noise.
      image_noise_sigma_max (`float`, *optional*, defaults to 0.135):
          Maximum sigma for image latent noise.
      video_noise_sigma_min (`float`, *optional*, defaults to 0.111):
          Minimum sigma for video latent noise.
      video_noise_sigma_max (`float`, *optional*, defaults to 0.135):
          Maximum sigma for video latent noise.
      num_frames (`int`, *optional*, defaults to 132):
          Total number of video frames to generate.
      history_sizes (`list`):
          Sizes of long/mid/short history buffers for temporal context.
      keep_first_frame (`bool`, *optional*, defaults to True):
          Whether to keep the first frame as a prefix in history.
      num_inference_steps (`int`, *optional*, defaults to 50):
          The number of denoising steps.
      sigmas (`list`):
          Custom sigmas for the denoising process.
      latents (`Tensor`, *optional*):
          Pre-generated noisy latents for image generation.
      timesteps (`Tensor`, *optional*):
          Timesteps for the denoising process.
      **denoiser_input_fields (`None`, *optional*):
          conditional model inputs for the denoiser: e.g. prompt_embeds, negative_prompt_embeds, etc.
      attention_kwargs (`dict`, *optional*):
          Additional kwargs for attention processors.
      fake_image_latents (`Tensor`, *optional*):
          Fake image latents used as history seed for I2V generation.
      output_type (`str`, *optional*, defaults to np):
          Output format: 'pil', 'np', 'pt'.

  Outputs:
      videos (`list`):
          The generated videos.
r7   promptT)ru   r!   )ru   r    )rh   rg   rf   c                     g)Nz_Auto Modular pipeline for text-to-video, image-to-video, and video-to-video tasks using Helios.r#   r$   s    r&   r'   HeliosAutoBlocks.description  s    pr)   c                 0    [         R                  " S5      /$ )Nvideos)r   rY   r$   s    r&   rE   HeliosAutoBlocks.outputs  s    $$X.//r)   r#   N)r*   r+   r,   r-   r.   rH   AUTO_BLOCKSvaluesr/   keysr0   _workflow_mapr2   r'   rE   r3   r#   r)   r&   rt   rt     sw    HT J&&(M""$K  &"&6"&6M q q 0 0r)   rt   )'rZ   utilsr   modular_pipeliner   r   r   modular_pipeline_utilsr	   r
   r   before_denoiser   r   r   r   r   r   r   r   decodersr   rq   r   r   encodersr   r   r   
get_loggerr*   loggerr   r5   rJ   r]   re   r{   rt   r#   r)   r&   <module>r      s      f f L L	 	 	 ' F a a 
		H	%,
1 ,
l=w4 =wDYw7 Yw|Ww7 WwxR
 9 R
j 	.01	023	-/0	#%&	\0/ \0r)   