
    
3j                     >   S SK r SSKJr  SSKJrJr  SSKJrJrJ	r	  SSKJ
r
JrJr   SS	\S
\ R                  S\S\S\ R                  4
S jjrS\ R                  S\S\\\4   4S jr " S S\5      r " S S\5      r " S S\5      r " S S\5      r " S S\5      rg)    N   )QwenImageMultiControlNetModel   )ModularPipelineBlocksPipelineState)ComponentSpec
InputParamOutputParam   )QwenImageLayeredPachifierQwenImageModularPipelineQwenImagePachifier
input_nameinput_tensor
batch_sizenum_images_per_promptreturnc           	          [        U[        R                  5      (       d  [        SU  S35      eUR                  S   S:X  a  X#-  nO7UR                  S   U:X  a  UnO![        SU  SU SUR                  S    35      eUR                  USS9nU$ )a  Repeat tensor elements to match the final batch size.

This function expands a tensor's batch dimension to match the final batch size (batch_size * num_images_per_prompt)
by repeating each element along dimension 0.

The input tensor must have batch size 1 or batch_size. The function will:
- If batch size is 1: repeat each element (batch_size * num_images_per_prompt) times
- If batch size equals batch_size: repeat each element num_images_per_prompt times

Args:
    input_name (str): Name of the input tensor (used for error messages)
    input_tensor (torch.Tensor): The tensor to repeat. Must have batch size 1 or batch_size.
    batch_size (int): The base batch size (number of prompts)
    num_images_per_prompt (int, optional): Number of images to generate per prompt. Defaults to 1.

Returns:
    torch.Tensor: The repeated tensor with final batch size (batch_size * num_images_per_prompt)

Raises:
    ValueError: If input_tensor is not a torch.Tensor or has invalid batch size

Examples:
    tensor = torch.tensor([[1, 2, 3]]) # shape: [1, 3] repeated = repeat_tensor_to_batch_size("image", tensor,
    batch_size=2, num_images_per_prompt=2) repeated # tensor([[1, 2, 3], [1, 2, 3], [1, 2, 3], [1, 2, 3]]) - shape:
    [4, 3]

    tensor = torch.tensor([[1, 2, 3], [4, 5, 6]]) # shape: [2, 3] repeated = repeat_tensor_to_batch_size("image",
    tensor, batch_size=2, num_images_per_prompt=2) repeated # tensor([[1, 2, 3], [1, 2, 3], [4, 5, 6], [4, 5, 6]])
    - shape: [4, 3]
`z` must be a tensorr   r   z!` must have have batch size 1 or z
, but got dim)
isinstancetorchTensor
ValueErrorshaperepeat_interleave)r   r   r   r   	repeat_bys        f/home/wildlama/miniconda3/lib/python3.13/site-packages/diffusers/modular_pipelines/qwenimage/inputs.pyrepeat_tensor_to_batch_sizer       s    J lELL111ZL(:;<< !!6				A	*	,)	
|<ZL
S_SeSefgShRij
 	

  11)1CL    latentsvae_scale_factorc                     U R                   S:w  a(  U R                   S:w  a  [        SU R                    35      eU R                  SS u  p#X!-  nX1-  nXE4$ )a  Calculate image dimensions from latent tensor dimensions.

This function converts latent space dimensions to image space dimensions by multiplying the latent height and width
by the VAE scale factor.

Args:
    latents (torch.Tensor): The latent tensor. Must have 4 or 5 dimensions.
        Expected shapes: [batch, channels, height, width] or [batch, channels, frames, height, width]
    vae_scale_factor (int): The scale factor used by the VAE to compress images.
        Typically 8 for most VAEs (image is 8x larger than latents in each dimension)

Returns:
    tuple[int, int]: The calculated image dimensions as (height, width)

Raises:
    ValueError: If latents tensor doesn't have 4 or 5 dimensions

      z6unpacked latents must have 4 or 5 dimensions, but got N)ndimr   r   )r"   r#   latent_heightlatent_widthheightwidths         r    calculate_dimension_from_latentsr-   P   sa    ( ||qW\\Q.QRYR^R^Q_`aa")--"4M-F+E=r!   c                       \ rS rSrSrSr\S\4S j5       r\S\	\
   4S j5       r\S\	\   4S j5       r\S 5       rS	\S
\S\4S jrSrg)QwenImageTextInputsStepp   ah  
Text input processing step that standardizes text embeddings for the pipeline.
  This step:
    1. Determines `batch_size` and `dtype` based on `prompt_embeds`
    2. Ensures all text embeddings have consistent batch sizes (batch_size * num_images_per_prompt)

  This block should be placed after all encoder steps to process the text embeddings before they are used in
  subsequent pipeline steps.

  Inputs:
      num_images_per_prompt (`int`, *optional*, defaults to 1):
          The number of images to generate per prompt.
      prompt_embeds (`Tensor`):
          text embeddings used to guide the image generation. Can be generated from text_encoder step.
      prompt_embeds_mask (`Tensor`):
          mask for the text embeddings. Can be generated from text_encoder step.
      negative_prompt_embeds (`Tensor`, *optional*):
          negative text embeddings used to guide the image generation. Can be generated from text_encoder step.
      negative_prompt_embeds_mask (`Tensor`, *optional*):
          mask for the negative text embeddings. Can be generated from text_encoder step.

  Outputs:
      batch_size (`int`):
          The batch size of the prompt embeddings
      dtype (`dtype`):
          The data type of the prompt embeddings
      prompt_embeds (`Tensor`):
          The prompt embeddings. (batch-expanded)
      prompt_embeds_mask (`Tensor`):
          The encoder attention mask. (batch-expanded)
      negative_prompt_embeds (`Tensor`):
          The negative prompt embeddings. (batch-expanded)
      negative_prompt_embeds_mask (`Tensor`):
          The negative prompt embeddings mask. (batch-expanded)
	qwenimager   c                     SnSnX-   $ )NzText input processing step that standardizes text embeddings for the pipeline.
This step:
  1. Determines `batch_size` and `dtype` based on `prompt_embeds`
  2. Ensures all text embeddings have consistent batch sizes (batch_size * num_images_per_prompt)z

This block should be placed after all encoder steps to process the text embeddings before they are used in subsequent pipeline steps. )selfsummary_sectionplacement_sections      r   description#QwenImageTextInputsStep.description   s    p 	 h22r!   c                     [         R                  " S5      [         R                  " S5      [         R                  " S5      [         R                  " S5      [         R                  " S5      /$ )Nr   prompt_embedsprompt_embeds_masknegative_prompt_embedsnegative_prompt_embeds_mask)r	   templater4   s    r   inputsQwenImageTextInputsStep.inputs   sX      780 45 89 =>
 	
r!   c           
          [        S[        SS9[        S[        R                  SS9[         R                  " SSS9[         R                  " S	SS9[         R                  " S
SS9[         R                  " SSS9/$ )Nr   z'The batch size of the prompt embeddingsname	type_hintr7   dtypez&The data type of the prompt embeddingsr:   zbatch-expanded)noter;   r<   r=   )r
   intr   rF   r>   r?   s    r   intermediate_outputs,QwenImageTextInputsStep.intermediate_outputs   ss     \SFopWIqr  7GH  !5<LM  !9@PQ  !>EUV
 	
r!   c                 X   Ub  Uc  [        S5      eUc  Ub  [        S5      eUR                  S   U R                  S   :w  a  [        S5      eUb+  UR                  S   U R                  S   :w  a  [        S5      eUb,  UR                  S   U R                  S   :w  a  [        S5      eg g )NzS`negative_prompt_embeds_mask` is required when `negative_prompt_embeds` is not NonezJcannot pass `negative_prompt_embeds_mask` without `negative_prompt_embeds`r   zE`prompt_embeds_mask` must have the same batch size as `prompt_embeds`zI`negative_prompt_embeds` must have the same batch size as `prompt_embeds`zN`negative_prompt_embeds_mask` must have the same batch size as `prompt_embeds`)r   r   r:   r;   r<   r=   s       r   check_inputs$QwenImageTextInputsStep.check_inputs   s     "-2M2Urss!).I.Uijj##A&-*=*=a*@@dee#/4J4P4PQR4SWdWjWjklWm4mhii (38S8Y8YZ[8\`m`s`stu`v8vmnn 9w3r!   
componentsstatec                    U R                  U5      nU R                  UR                  UR                  UR                  UR
                  S9  UR                  R                  S   Ul        UR                  R                  Ul        UR                  R                  u  pEnUR                  R                  SUR                  S5      Ul        UR                  R                  UR                  UR                  -  US5      Ul        UR                  R                  SUR                  S5      Ul        UR                  R                  UR                  UR                  -  U5      Ul        UR                  b  UR                  R                  u  pEnUR                  R                  SUR                  S5      Ul        UR                  R                  UR                  UR                  -  US5      Ul        UR
                  R                  SUR                  S5      Ul        UR
                  R                  UR                  UR                  -  U5      Ul        U R                  X#5        X4$ )NrL   r   r   )get_block_staterM   r:   r;   r<   r=   r   r   rF   repeatr   viewset_block_state)r4   rO   rP   block_state_seq_lens         r   __call__ QwenImageTextInputsStep.__call__   s*   **51%33*==#.#E#E(3(O(O	 	 	
 "-!:!:!@!@!C'55;;#1177A$/$=$=$D$DQHiHikl$m!$/$=$=$B$B""[%F%FFQS%
! *5)G)G)N)NqR]RsRsuv)w&)4)G)G)L)L""[%F%FF*
& --9'>>DDMA1<1S1S1Z1Z;44a2K. 2=1S1S1X1X&&)J)JJGUW2K. 7B6]6]6d6d;44a7K3 7B6]6]6b6b&&)J)JJG7K3 	U0  r!   r3   N)__name__
__module____qualname____firstlineno____doc__
model_namepropertystrr7   listr	   r@   r
   rI   staticmethodrM   r   r   rZ   __static_attributes__r3   r!   r   r/   r/   p   s    "H J3S 3 3 
Z( 
 
 
d;&7 
 
 o o.+!#; +!M +!Vc +!r!   r/   c                      ^  \ rS rSrSrSr  SS\\   S-  S\\   S-  4U 4S jjjr\	S\
4S	 j5       r\	S\\   4S
 j5       r\	S\\   4S j5       r\	S\\   4S j5       rS\S\S\4S jrSrU =r$ )QwenImageAdditionalInputsStepi  a1  
Input processing step that:
    1. For image latent inputs: Updates height/width if None, patchifies, and expands batch size
    2. For additional batch inputs: Expands batch dimensions to match final batch size

  Configured inputs:
    - Image latent inputs: ['image_latents']

  This block should be placed after the encoder steps and the text input step.

  Components:
      pachifier (`QwenImagePachifier`)

  Inputs:
      num_images_per_prompt (`int`, *optional*, defaults to 1):
          The number of images to generate per prompt.
      batch_size (`int`, *optional*, defaults to 1):
          Number of prompts, the final batch size of model inputs should be batch_size * num_images_per_prompt. Can
          be generated in input step.
      height (`int`, *optional*):
          The height in pixels of the generated image.
      width (`int`, *optional*):
          The width in pixels of the generated image.
      image_latents (`Tensor`):
          image latents used to guide the image generation. Can be generated from vae_encoder step.

  Outputs:
      image_height (`int`):
          The image height calculated from the image latents dimension
      image_width (`int`):
          The image width calculated from the image latents dimension
      height (`int`):
          if not provided, updated to image height
      width (`int`):
          if not provided, updated to image width
      image_latents (`Tensor`):
          image latents used to guide the image generation. Can be generated from vae_encoder step. (patchified and
          batch-expanded)
r1   Nimage_latent_inputsadditional_batch_inputsc                   > Uc  [         R                  " S5      /nUc  / n[        U[        5      (       d  [	        S[        U5       35      eU H/  n[        U[         5      (       a  M  [	        S[        U5       35      e   [        U[        5      (       d  [	        S[        U5       35      eU H/  n[        U[         5      (       a  M  [	        S[        U5       35      e   Xl        X l        [        TU ]%  5         g Nimage_latentsz,image_latent_inputs must be a list, but got z:image_latent_inputs must be a list of InputParam, but got z0additional_batch_inputs must be a list, but got z>additional_batch_inputs must be a list of InputParam, but got 
r	   r>   r   rd   r   type_image_latent_inputs_additional_batch_inputssuper__init__r4   ri   rj   input_param	__class__s       r   rs   &QwenImageAdditionalInputsStep.__init__,  s     &#-#6#6#G"H"*&(#-t44KDQdLeKfghh2!+z::$'abfgrbsat%uvv  3 1488OPTUlPmOnopp6!+z::$XY]^iYjXkl   7 %8!(?%r!   r   c                 l   SnSnU R                   (       d  U R                  (       a|  SnU R                   (       a,  USU R                    Vs/ s H  o3R                  PM     sn 3-  nU R                  (       a,  USU R                   Vs/ s H  o3R                  PM     sn 3-  nSnX-   U-   $ s  snf s  snf )NzInput processing step that:
  1. For image latent inputs: Updates height/width if None, patchifies, and expands batch size
  2. For additional batch inputs: Expands batch dimensions to match final batch size 

Configured inputs:
  - Image latent inputs: 
  - Additional batch inputs: N

This block should be placed after the encoder steps and the text input step.rp   rq   rD   r4   r5   inputs_infopr6   s        r   r7   )QwenImageAdditionalInputsStep.descriptionK      c 	 $$(E(E2K((!<dNgNg=hNgffNg=h<ijj,,!@RVRoRoApRoQ&&RoAp@qrrn,/@@@ >iAp   
B,B1c                 "    [        S[        SS9/$ N	pachifierfrom_config)default_creation_methodr   r   r?   s    r   expected_components1QwenImageAdditionalInputsStep.expected_components_       +'9S`a
 	
r!   c                     [         R                  " S5      [         R                  " S5      [         R                  " S5      [         R                  " S5      /nXR                  U R                  -   -  nU$ Nr   r   r+   r,   r	   r>   rp   rq   r4   r@   s     r   r@   $QwenImageAdditionalInputsStep.inputse  sd      78-)(	
 	++d.K.KKKr!   c           	      $   [        S[        SS9[        S[        SS9/n[        U R                  5      S:  a<  UR	                  [        S[        SS95        UR	                  [        S	[        S
S95        U R                   H>  nUR	                  [        UR
                  UR                  UR                  S-   S95        M@     U R                   H>  nUR	                  [        UR
                  UR                  UR                  S-   S95        M@     U$ )Nimage_height<The image height calculated from the image latents dimensionrC   image_width;The image width calculated from the image latents dimensionr   r+   (if not provided, updated to image heightr,   'if not provided, updated to image widthz  (patchified and batch-expanded) (batch-expanded)	r
   rH   lenrp   appendrD   rE   r7   rq   r4   outputsru   s      r   rI   2QwenImageAdditionalInputsStep.intermediate_outputsr  s    #Z
 "Y
 t(()A-NNSFpq NNCEno
  44KNN$)))33 + 7 7:\ \ 5  88KNN$)))33 + 7 7:M M 9 r!   rO   rP   c                    U R                  U5      nU R                   H  nUR                  n[        X55      nUc  M  [	        XaR
                  5      u  pxUR                  =(       d    UUl        UR                  =(       d    UUl        [        US5      (       d  Xsl	        [        US5      (       d  Xl
        UR                  R                  U5      n[        UUUR                  UR                  S9n[!        X5U5        M     U R"                   HK  nUR                  n	[        X95      n
U
c  M  [        U	U
UR                  UR                  S9n
[!        X9U
5        MM     U R%                  X#5        X4$ )Nr   r   r   r   r   r   )rS   rp   rD   getattrr-   r#   r+   r,   hasattrr   r   r   pack_latentsr    r   r   setattrrq   rV   r4   rO   rP   rW   ru   image_latent_input_nameimage_latent_tensorr+   r,   r   r   s              r   rZ   &QwenImageAdditionalInputsStep.__call__  sh   **51  44K&1&6&6#")+"O"* ==PRmRmnMF!,!3!3!=vK + 1 1 :UK;77+1(;66*/' #-"6"6"C"CDW"X #>20&1&G&G&11	# K:MN7 5<  88K$))J";;L#6%)&1&G&G&11	L K\: 9 	U0  r!   rq   rp   NNr\   r]   r^   r_   r`   ra   rd   r	   rs   rb   rc   r7   r   r   r@   r
   rI   r   r   rZ   rf   __classcell__rv   s   @r   rh   rh     s    &P J 8<;?!*-4 "&j!1D!8 > AS A A& 
T-%8 
 

 
Z( 
 
 +d;&7 + +Z2!#; 2!M 2!Vc 2! 2!r!   rh   c                      ^  \ rS rSrSrSr  SS\\   S-  S\\   S-  4U 4S jjjr\	S\
4S	 j5       r\	S\\   4S
 j5       r\	S\\   4S j5       r\	S\\   4S j5       rS\S\S\4S jrSrU =r$ )%QwenImageEditPlusAdditionalInputsStepi  a  
Input processing step for Edit Plus that:
    1. For image latent inputs (list): Collects heights/widths, patchifies each, concatenates, expands batch
    2. For additional batch inputs: Expands batch dimensions to match final batch size
    Height/width defaults to last image in the list.

  Configured inputs:
    - Image latent inputs: ['image_latents']

  This block should be placed after the encoder steps and the text input step.

  Components:
      pachifier (`QwenImagePachifier`)

  Inputs:
      num_images_per_prompt (`int`, *optional*, defaults to 1):
          The number of images to generate per prompt.
      batch_size (`int`, *optional*, defaults to 1):
          Number of prompts, the final batch size of model inputs should be batch_size * num_images_per_prompt. Can
          be generated in input step.
      height (`int`, *optional*):
          The height in pixels of the generated image.
      width (`int`, *optional*):
          The width in pixels of the generated image.
      image_latents (`Tensor`):
          image latents used to guide the image generation. Can be generated from vae_encoder step.

  Outputs:
      image_height (`list`):
          The image heights calculated from the image latents dimension
      image_width (`list`):
          The image widths calculated from the image latents dimension
      height (`int`):
          if not provided, updated to image height
      width (`int`):
          if not provided, updated to image width
      image_latents (`Tensor`):
          image latents used to guide the image generation. Can be generated from vae_encoder step. (patchified,
          concatenated, and batch-expanded)
zqwenimage-edit-plusNri   rj   c                   > Uc  [         R                  " S5      /nUc  / n[        U[        5      (       d  [	        S[        U5       35      eU H/  n[        U[         5      (       a  M  [	        S[        U5       35      e   [        U[        5      (       d  [	        S[        U5       35      eU H/  n[        U[         5      (       a  M  [	        S[        U5       35      e   Xl        X l        [        TU ]%  5         g rl   rn   rt   s       r   rs   .QwenImageEditPlusAdditionalInputsStep.__init__      
 &#-#6#6#G"H"*&(#-t44KDQdLeKfghh2!+z::$'abfgrbsat%uvv  3 1488OPTUlPmOnopp6!+z::$XY]^iYjXkl   7 %8!(?%r!   r   c                 l   SnSnU R                   (       d  U R                  (       a|  SnU R                   (       a,  USU R                    Vs/ s H  o3R                  PM     sn 3-  nU R                  (       a,  USU R                   Vs/ s H  o3R                  PM     sn 3-  nSnX-   U-   $ s  snf s  snf )Na  Input processing step for Edit Plus that:
  1. For image latent inputs (list): Collects heights/widths, patchifies each, concatenates, expands batch
  2. For additional batch inputs: Expands batch dimensions to match final batch size
  Height/width defaults to last image in the list.ry   rz   r{   r|   r}   r~   r   s        r   r7   1QwenImageEditPlusAdditionalInputsStep.description   s    A 	 $$(E(E2K((!<dNgNg=hNgffNg=h<ijj,,!@RVRoRoApRoQ&&RoAp@qrrn,/@@@ >iApr   c                 "    [        S[        SS9/$ r   r   r?   s    r   r   9QwenImageEditPlusAdditionalInputsStep.expected_components5  r   r!   c                     [         R                  " S5      [         R                  " S5      [         R                  " S5      [         R                  " S5      /nXR                  U R                  -   -  nU$ r   r   r   s     r   r@   ,QwenImageEditPlusAdditionalInputsStep.inputs;  sd      78-)(	
 	++d.K.KKKr!   c           	      @   [        S[        [           SS9[        S[        [           SS9/n[        U R                  5      S:  a<  UR                  [        S[        SS95        UR                  [        S	[        S
S95        U R                   H>  nUR                  [        UR                  UR                  UR                  S-   S95        M@     U R                   H>  nUR                  [        UR                  UR                  UR                  S-   S95        M@     U$ )Nr   z=The image heights calculated from the image latents dimensionrC   r   z<The image widths calculated from the image latents dimensionr   r+   r   r,   r   z/ (patchified, concatenated, and batch-expanded)r   )
r
   rd   rH   r   rp   r   rD   rE   r7   rq   r   s      r   rI   :QwenImageEditPlusAdditionalInputsStep.intermediate_outputsI  s    #s)[
 "s)Z
 t(()A-NNSFpq NNCEno
  44KNN$)))33 + 7 7:k k 5  88KNN$)))33 + 7 7:M M 9 r!   rO   rP   c           	         U R                  U5      nU R                   GHF  nUR                  n[        X55      nUc  M   [	        U[
        5      nU(       d  U/n/ n/ n	/ n
[        U5       H  u  p[        XR                  5      u  pUR                  U5        U	R                  U5        UR                  R                  U5      n[        U SU S3UUR                  UR                  S9nU
R                  U5        M     [        R                   " U
SS9n
Xl        Xl        UR&                  =(       d    US   Ul        UR(                  =(       d    U	S   Ul        [+        X5U
5        GMI     U R,                   HK  nUR                  n[        X?5      nUc  M  [        UUUR                  UR                  S9n[+        X?U5        MM     U R/                  X#5        X4$ )N[]r   r   r   rR   )rS   rp   rD   r   r   rd   	enumerater-   r#   r   r   r   r    r   r   r   catr   r   r+   r,   r   rq   rV   )r4   rO   rP   rW   ru   r   r   is_listimage_heightsimage_widthspacked_image_latent_tensorsiimg_latent_tensorr+   r,   r   r   s                    r   rZ   .QwenImageEditPlusAdditionalInputsStep.__call__w  s   **51  44K&1&6&6#")+"O"* !4d;G':&;#ML*,'(12E(F$ @ARToTo p$$V,##E* %/$8$8$E$EFW$X! %@"9!:!A3a@!2*5*K*K*55	%! ,223DE! )G& +0))4OUV*W' (5$&2# "-!3!3!H}R7HK + 1 1 E\"5EKK:UVW 5\  88K$))J";;L#6%)&1&G&G&11	L K\: 9 	U0  r!   r   r   r   r   s   @r   r   r     s    'R 'J 8<;?!*-4 "&j!1D!8 < AS A A( 
T-%8 
 

 Z(   +d;&7 + +ZB!#; B!M B!Vc B! B!r!   r   c                      ^  \ rS rSrSrSr  SS\\   S-  S\\   S-  4U 4S jjjr\	S\
4S	 j5       r\	S\\   4S
 j5       r\	S\\   4S j5       r\	S\\   4S j5       rS\S\S\4S jrSrU =r$ )$QwenImageLayeredAdditionalInputsStepi  a  
Input processing step for Layered that:
    1. For image latent inputs: Updates height/width if None, patchifies with layered pachifier, and expands batch
       size
    2. For additional batch inputs: Expands batch dimensions to match final batch size

  Configured inputs:
    - Image latent inputs: ['image_latents']

  This block should be placed after the encoder steps and the text input step.

  Components:
      pachifier (`QwenImageLayeredPachifier`)

  Inputs:
      num_images_per_prompt (`int`, *optional*, defaults to 1):
          The number of images to generate per prompt.
      batch_size (`int`, *optional*, defaults to 1):
          Number of prompts, the final batch size of model inputs should be batch_size * num_images_per_prompt. Can
          be generated in input step.
      image_latents (`Tensor`):
          image latents used to guide the image generation. Can be generated from vae_encoder step.

  Outputs:
      image_height (`int`):
          The image height calculated from the image latents dimension
      image_width (`int`):
          The image width calculated from the image latents dimension
      height (`int`):
          if not provided, updated to image height
      width (`int`):
          if not provided, updated to image width
      image_latents (`Tensor`):
          image latents used to guide the image generation. Can be generated from vae_encoder step. (patchified
          with layered pachifier and batch-expanded)
zqwenimage-layeredNri   rj   c                   > Uc  [         R                  " S5      /nUc  / n[        U[        5      (       d  [	        S[        U5       35      eU H/  n[        U[         5      (       a  M  [	        S[        U5       35      e   [        U[        5      (       d  [	        S[        U5       35      eU H/  n[        U[         5      (       a  M  [	        S[        U5       35      e   Xl        X l        [        TU ]%  5         g rl   rn   rt   s       r   rs   -QwenImageLayeredAdditionalInputsStep.__init__  r   r!   r   c                 l   SnSnU R                   (       d  U R                  (       a|  SnU R                   (       a,  USU R                    Vs/ s H  o3R                  PM     sn 3-  nU R                  (       a,  USU R                   Vs/ s H  o3R                  PM     sn 3-  nSnX-   U-   $ s  snf s  snf )NzInput processing step for Layered that:
  1. For image latent inputs: Updates height/width if None, patchifies with layered pachifier, and expands batch size
  2. For additional batch inputs: Expands batch dimensions to match final batch sizery   rz   r{   r|   r}   r~   r   s        r   r7   0QwenImageLayeredAdditionalInputsStep.description  r   r   c                 "    [        S[        SS9/$ r   )r   r   r?   s    r   r   8QwenImageLayeredAdditionalInputsStep.expected_components  s     +'@Zgh
 	
r!   c                     [         R                  " S5      [         R                  " S5      /nXR                  U R                  -   -  nU$ )Nr   r   r   r   s     r   r@   +QwenImageLayeredAdditionalInputsStep.inputs   sH      78-
 	++d.K.KKKr!   c           	      $   [        S[        SS9[        S[        SS9/n[        U R                  5      S:  a<  UR	                  [        S[        SS95        UR	                  [        S	[        S
S95        U R                   H>  nUR	                  [        UR
                  UR                  UR                  S-   S95        M@     U R                   H>  nUR	                  [        UR
                  UR                  UR                  S-   S95        M@     U$ )Nr   r   rC   r   r   r   r+   r   r,   r   z7 (patchified with layered pachifier and batch-expanded)r   r   r   s      r   rI   9QwenImageLayeredAdditionalInputsStep.intermediate_outputs,  s    #Z
 "Y
 t(()A-NNSFpq NNCEno
  44KNN$)))33 + 7 7:s s 5  88KNN$)))33 + 7 7:M M 9 r!   rO   rP   c                    U R                  U5      nU R                   H  nUR                  n[        X55      nUc  M  UR                  S   UR
                  -  nUR                  S   UR
                  -  nXsl        Xl        [        US5      (       d  Xsl	        [        US5      (       d  Xl
        UR                  R                  U5      n[        UUUR                  UR                  S9n[!        X5U5        M     U R"                   HK  nUR                  n	[        X95      n
U
c  M  [        U	U
UR                  UR                  S9n
[!        X9U
5        MM     U R%                  X#5        X4$ )Nr   r%   r   r   r   )rS   rp   rD   r   r   r#   r+   r,   r   r   r   r   r   r    r   r   r   rq   rV   r   s              r   rZ   -QwenImageLayeredAdditionalInputsStep.__call__Y  sr   **51  44K&1&6&6#")+"O"* )..q1J4O4OOF'--a0:3N3NNE!' %;77+1(;66*/' #-"6"6"C"CDW"X #>20&1&G&G&11	# K:MN; 5@  88K$))J";;L#6%)&1&G&G&11	L K\: 9 	U0  r!   r   r   r   r   s   @r   r   r     s    #J %J 8<;?!*-4 "&j!1D!8 < AS A A& 
T-%8 
 

 	Z( 	 	 *d;&7 * *X4!#; 4!M 4!Vc 4! 4!r!   r   c                       \ rS rSrSrSr\S\4S j5       r\S\	\
   4S j5       r\S\	\   4S j5       r\R                  " 5       S\S	\S\4S
 j5       rSrg)QwenImageControlNetInputsStepi  a!  
prepare the `control_image_latents` for controlnet. Insert after all the other inputs steps.

  Inputs:
      control_image_latents (`Tensor`):
          The control image latents to use for the denoising process. Can be generated in controlnet vae encoder
          step.
      batch_size (`int`, *optional*, defaults to 1):
          Number of prompts, the final batch size of model inputs should be batch_size * num_images_per_prompt. Can
          be generated in input step.
      num_images_per_prompt (`int`, *optional*, defaults to 1):
          The number of images to generate per prompt.
      height (`int`, *optional*):
          The height in pixels of the generated image.
      width (`int`, *optional*):
          The width in pixels of the generated image.

  Outputs:
      control_image_latents (`Tensor`):
          The control image latents (patchified and batch-expanded).
      height (`int`):
          if not provided, updated to control image height
      width (`int`):
          if not provided, updated to control image width
r1   r   c                     g)Nz\prepare the `control_image_latents` for controlnet. Insert after all the other inputs steps.r3   r?   s    r   r7   )QwenImageControlNetInputsStep.description  s    mr!   c                     [        SS[        R                  SS9[         R                  " S5      [         R                  " S5      [         R                  " S5      [         R                  " S5      /$ )	Ncontrol_image_latentsTzlThe control image latents to use for the denoising process. Can be generated in controlnet vae encoder step.)rD   requiredrE   r7   r   r   r+   r,   )r	   r   r   r>   r?   s    r   r@   $QwenImageControlNetInputsStep.inputs  sd     ,,, K	 - 78)(
 	
r!   c                 n    [        S[        R                  SS9[        S[        SS9[        S[        SS9/$ )Nr   z:The control image latents (patchified and batch-expanded).rC   r+   z0if not provided, updated to control image heightr,   z/if not provided, updated to control image width)r
   r   r   rH   r?   s    r   rI   2QwenImageControlNetInputsStep.intermediate_outputs  s?     ,,,X
 XBtuWArs
 	
r!   rO   rP   c                    U R                  U5      n[        UR                  [        5      (       a  / n[	        UR
                  5       H  u  pV[        XaR                  5      u  pxUR                  =(       d    UUl        UR                  =(       d    UUl	        UR                  R                  U5      n[        SU S3UUR                  UR                  S9nUR                  U5        M     XCl        O[        UR
                  UR                  5      u  pxUR                  =(       d    UUl        UR                  =(       d    UUl	        UR                  R                  UR
                  5      Ul        [        SUR
                  UR                  UR                  S9Ul        UR
                  Ul        U R!                  X#5        X4$ )Nzcontrol_image_latents[r   r   r   )rS   r   
controlnetr   r   r   r-   r#   r+   r,   r   r   r    r   r   r   rV   )	r4   rO   rP   rW   r   r   control_image_latents_r+   r,   s	            r   rZ   &QwenImageControlNetInputsStep.__call__  s   **51j++-JKK$&!-6{7X7X-Y) @AWYtYt u%0%7%7%A6"$/$5$5$>! *4)=)=)J)JKa)b& *E!7s!<!7*5*K*K*55	*& &,,-CD# .Z& 1F- =11:3N3NMF "-!3!3!=vK + 1 1 :UK 1;0D0D0Q0QR]RsRs0tK- 1L2(>>&1&G&G&11	1K- 1<0Q0QK-U0  r!   r3   N)r\   r]   r^   r_   r`   ra   rb   rc   r7   rd   r	   r@   r
   rI   r   no_gradr   r   rZ   rf   r3   r!   r   r   r     s    4 JnS n n 
Z( 
 
 	
d;&7 	
 	
 ]]_2!#; 2!M 2!Vc 2! 2!r!   r   )r   )r   modelsr   modular_pipeliner   r   modular_pipeline_utilsr   r	   r
   r   r   r   rc   r   rH   r    tupler-   r/   rh   r   r   r   r3   r!   r   <module>r      s      3 C K K e e "#	55,,5 5 	5
 \\5pell c V[\_ad\dVe @M!3 M!bQ!$9 Q!jc!,A c!TM!+@ M!bo!$9 o!r!   