
    
3j                        S r SSKrSSKrSSKJrJrJr  SSKJr  SSK	J
r
  SSKJrJrJrJr  SSKJrJrJr  SS	KJr  SS
KJr  SSKJr  SSKJrJr  SSKJrJrJ r   SSKJ!r!  SSK"J#r#J$r$J%r%J&r&J'r'J(r(J)r)J*r*J+r+  \RX                  " \-5      r.S\R^                  S\R^                  4S jr0S\*\+SS4S\1\2\1   -  S\1S\3S\3S\Rh                  S-  4
S jjr5SS\&\'S4S\1\2\1   -  S\R^                  S-  S\1S\3S\Rh                  S-  4
S jjr6SS\$\#\%S4S\1\2\1   -  S\R^                  \2\Rn                  Rn                  \Rn                  Rn                  4   -  S-  S\1S\1S\3S\Rh                  S-  4S jjr8 SFS \R^                  S!\Rr                  S-  S"\14S# jjr:  SGS\R^                  S$\S!\Rr                  S\Rh                  S%\Rv                  S&\3S"\14S' jjr< " S( S)\5      r= " S* S+\5      r> " S, S-\5      r? " S. S/\5      r@ " S0 S1\5      rA " S2 S3\5      rB " S4 S5\5      rC " S6 S7\5      rD " S8 S9\5      rE " S: S;\5      rF " S< S=\5      rG " S> S?\5      rH " S@ SA\5      rI " SB SC\5      rJ " SD SE\5      rKg)Hz6
Text and VAE encoder blocks for QwenImage pipelines.
    N)"Qwen2_5_VLForConditionalGenerationQwen2TokenizerQwen2VLProcessor   )
FrozenDict)ClassifierFreeGuidance)InpaintProcessorVaeImageProcessoris_valid_imageis_valid_image_imagelist)AutoencoderKLQwenImageQwenImageControlNetModelQwenImageMultiControlNetModel)calculate_dimensions)logging)unwrap_module   )ModularPipelineBlocksPipelineState)ComponentSpec
InputParamOutputParam   )QwenImageModularPipeline)	 QWENIMAGE_EDIT_PLUS_IMG_TEMPLATE#QWENIMAGE_EDIT_PLUS_PROMPT_TEMPLATE-QWENIMAGE_EDIT_PLUS_PROMPT_TEMPLATE_START_IDXQWENIMAGE_EDIT_PROMPT_TEMPLATE(QWENIMAGE_EDIT_PROMPT_TEMPLATE_START_IDX#QWENIMAGE_LAYERED_CAPTION_PROMPT_CN#QWENIMAGE_LAYERED_CAPTION_PROMPT_ENQWENIMAGE_PROMPT_TEMPLATE#QWENIMAGE_PROMPT_TEMPLATE_START_IDXhidden_statesmaskc                     UR                  5       nUR                  SS9nX   n[        R                  " XCR	                  5       SS9nU$ )Nr   dimr   )boolsumtorchsplittolist)r$   r%   	bool_maskvalid_lengthsselectedsplit_results         h/home/wildlama/miniconda3/lib/python3.13/site-packages/diffusers/modular_pipelines/qwenimage/encoders.py_extract_masked_hiddenr3   1   sD    		IMMaM(M'H;;x)=)=)?QGL       promptprompt_template_encode prompt_template_encode_start_idxtokenizer_max_lengthdevicec                 D   [        U[        5      (       a  U/OUnUnUnU V	s/ s H  oR                  U	5      PM     n
n	U" XU-   SSSS9R                  U5      nU " UR                  UR
                  SS9nUR                  S   n[        XR
                  5      nU V	s/ s H  oUS  PM	     nn	U V	s/ s H@  n	[        R                  " U	R                  S5      [        R                  U	R                  S9PMB     nn	[        U V	s/ s H  oR                  S5      PM     sn	5      n[        R                  " U Vs/ s HL  n[        R                  " UUR!                  UUR                  S5      -
  UR                  S5      5      /5      PMN     sn5      n[        R                  " U Vs/ s H<  n[        R                  " UUR!                  UUR                  S5      -
  5      /5      PM>     sn5      nUR                  US	9nUU4$ s  sn	f s  sn	f s  sn	f s  sn	f s  snf s  snf )
NTpt)
max_lengthpadding
truncationreturn_tensors)	input_idsattention_maskoutput_hidden_statesr   dtyper:   r   r:   )
isinstancestrformattorA   rB   r$   r3   r+   onessizelongr:   maxstackcat	new_zeros)text_encoder	tokenizerr6   r7   r8   r9   r:   templatedrop_idxetxt
txt_tokensencoder_hidden_statesr$   split_hidden_statesattn_mask_listmax_seq_lenuprompt_embedsencoder_attention_masks                       r2   get_qwen_prompt_embedsra   9   s    $FC00fXfF%H/H'-
.v!??1vC
.x7RVgkbj  )&&!00!
 *77;M0@Y@YZ1DE1DAXY<1DEXklXkSTejj%**QXXNXkNl*=>*=Qvvay*=>?KKKRefReQAq{{;#:AFF1IFG	HRefM #[[GUV~!Aq{{;#:;<	=~V "$$F$3M0001 / Fl>f 	Ws&   HH	2AHH9AH'AHimagec                 T   [        U[        5      (       a  U/OUnUnUnU V	s/ s H  oR                  U	5      PM     n
n	U" U
USSS9R                  U5      nU " US   US   UR	                  S5      UR	                  S5      SS9nUR
                  S	   n[        XS   5      nU V	s/ s H  oUS  PM	     nn	U V	s/ s H@  n	[        R                  " U	R                  S
5      [        R                  U	R                  S9PMB     nn	[        U V	s/ s H  oR                  S
5      PM     sn	5      n[        R                  " U Vs/ s HL  n[        R                  " UUR                  UUR                  S
5      -
  UR                  S5      5      /5      PMN     sn5      n[        R                  " U Vs/ s H<  n[        R                  " UUR                  UUR                  S
5      -
  5      /5      PM>     sn5      nUR                  US9nUU4$ s  sn	f s  sn	f s  sn	f s  sn	f s  snf s  snf )NTr<   textimagesr>   r@   rA   rB   pixel_valuesimage_grid_thwrA   rB   rg   rh   rC   rD   r   rE   r   rG   )rH   rI   rJ   rK   getr$   r3   r+   rL   rM   rN   r:   rO   rP   rQ   rR   )rS   	processorr6   rb   r7   r8   r:   rU   rV   rW   rX   model_inputsoutputsr$   r[   r\   r]   r^   r_   r`   s                       r2   get_qwen_prompt_embeds_editrn   a   s    $FC00fXfF%H/H'-
.v!??1vC
.	
 	bj  {+#$45!%%n5#''(89!G ))"-M0M]@^_1DE1DAXY<1DEXklXkSTejj%**QXXNXkNl*=>*=Qvvay*=>?KKKRefReQAq{{;#:AFF1IFG	HRefM #[[GUV~!Aq{{;#:;<	=~V "$$F$3M000? /& Fl>f 	Ws&   H&H:AHHAH /AH%img_template_encodec                    [        U[        5      (       a  U/OUn[        U[        5      (       a-  Sn[        U5       H  u  pXR	                  U	S-   5      -  nM     OUb  UR	                  S5      nOSnUnUnU Vs/ s H  oR	                  X-   5      PM     nnU" UUSSS9R                  U5      nU " US   US   UR                  S5      UR                  S	5      SS
9nUR                  S   n[        UUS   5      nU Vs/ s H  oUS  PM	     nnU Vs/ s H@  n[        R                  " UR                  S5      [        R                  UR                  S9PMB     nn[        U Vs/ s H  oR                  S5      PM     sn5      n[        R                  " U Vs/ s HL  n[        R                   " UUR#                  UUR                  S5      -
  UR                  S5      5      /5      PMN     sn5      n[        R                  " U Vs/ s H<  n[        R                   " UUR#                  UUR                  S5      -
  5      /5      PM>     sn5      nUR                  US9nUU4$ s  snf s  snf s  snf s  snf s  snf s  snf )N r   Tr<   rd   rA   rB   rg   rh   ri   rD   r   rE   rG   )rH   rI   list	enumeraterJ   rK   rj   r$   r3   r+   rL   rM   rN   r:   rO   rP   rQ   rR   )rS   rk   r6   rb   r7   ro   r8   r:   base_img_promptiimgrU   rV   rW   rX   rl   rm   r$   r[   r\   r]   r^   r_   r`   s                           r2    get_qwen_prompt_embeds_edit_plusrw      s`    $FC00fXfF%&FA99!a%@@O '		-44Q7%H/H9?
@A???./C
@	
 	bj  {+#$45!%%n5#''(89!G ))"-M0M]@^_1DE1DAXY<1DEXklXkSTejj%**QXXNXkNl*=>*=Qvvay*=>?KKKRefReQAq{{;#:AFF1IFG	HRefM #[[GUV~!Aq{{;#:;<	=~V "$$F$3M000; A$ Fl>f 	Ws'   <I(I-AI2(I7AI<AJencoder_output	generatorsample_modec                    [        U S5      (       a!  US:X  a  U R                  R                  U5      $ [        U S5      (       a   US:X  a  U R                  R                  5       $ [        U S5      (       a  U R                  $ [        S5      e)Nlatent_distsampleargmaxlatentsz3Could not access latents of provided encoder_output)hasattrr|   r}   moder   AttributeError)rx   ry   rz   s      r2   retrieve_latentsr      s}     ~}--+2I))00;;		/	/K84K))..00		+	+%%%RSSr4   vaerF   latent_channelsc                    [        U [        R                  5      (       d  [        S[	        U 5       S35      eU R                  5       S:X  a  U R                  S5      n O1U R                  5       S:w  a  [        SU R                  5        S35      eU R                  X4S9n [        U[        5      (       a]  [        U R                  S   5       Vs/ s H$  n[        UR                  XUS	-    5      X'   US
9PM&     nn[        R                  " USS9nO[        UR                  U 5      X&S
9n[        R                  " UR                  R                   5      R#                  S	US	S	S	5      R                  UR$                  UR&                  5      n	[        R                  " UR                  R(                  5      R#                  S	US	S	S	5      R                  UR$                  UR&                  5      n
X-
  U
-  nU$ s  snf )Nz#Expected image to be a tensor, got .   r      z Expected image dims 4 or 5, got )r:   rF   r   r   )ry   rz   r'   )rH   r+   Tensor
ValueErrortyper(   	unsqueezerK   rr   rangeshaper   encoderQ   tensorconfiglatents_meanviewr:   rF   latents_std)rb   r   ry   r:   rF   r   rz   ru   image_latentsr   r   s              r2   encode_vae_imager      s    eU\\**>tE{m1MNN yy{a"		;EIIK=JKKHHFH0E)T"" 5;;q>*
* SZZ!a%(89Y\_jk* 	 
 		-Q7(E):iiSZZ,,-	a!Q	*	M  -"5"5	6  	SZZ++,	a!Q	*	M  -"5"5	6 
 #1[@M'
s   +G4c                       \ rS rSrSrSr\S\4S j5       r\S\	\
   4S j5       r\S\	\   4S j5       r\S\	\   4S j5       r\R"                  " 5       S	\S
\4S j5       rSrg)QwenImageEditResizeStepi  ac  
Image Resize step that resize the image to target area while maintaining the aspect ratio.

  Components:
      image_resize_processor (`VaeImageProcessor`)

  Inputs:
      image (`Image | list`):
          Reference image(s) for denoising. Can be a single image or list of images.

  Outputs:
      resized_image (`list`):
          The resized images
qwenimage-editreturnc                     g)NzZImage Resize step that resize the image to target area while maintaining the aspect ratio. selfs    r2   description#QwenImageEditResizeStep.description*  s    kr4   c                 :    [        S[        [        SS05      SS9/$ Nimage_resize_processorvae_scale_factor   from_configr   default_creation_methodr   r
   r   r   s    r2   expected_components+QwenImageEditResizeStep.expected_components.  -     (!!#5r":;(5	
 	
r4   c                 0    [         R                  " S5      /$ Nrb   r   rU   r   s    r2   inputsQwenImageEditResizeStep.inputs9  s    ##G,--r4   c                 X    [        S[        [        R                  R                     SS9/$ Nresized_imagezThe resized imagesname	type_hintr   r   rr   PILImager   s    r2   intermediate_outputs,QwenImageEditResizeStep.intermediate_outputs=  ,     $syy/0
 	
r4   
componentsstatec           	         U R                  U5      nUR                  n[        U5      (       d  [        S[	        U5       35      e[        U5      (       a  U/nUS   R                  u  pV[        SXV-  5      u  pxn	U V
s/ s H  n
UR                  R                  XUS9PM     nn
Xl
        U R                  X#5        X4$ s  sn
f )N/Images must be image or list of images but are r      heightwidth)get_block_staterb   r   r   r   r   rM   r   r   resizer   set_block_state)r   r   r   block_staterf   image_widthimage_heightcalculated_widthcalculated_height_rb   resized_imagess               r2   __call__ QwenImageEditResizeStep.__call__G  s    **51""'//NtTZ|n]^^&!!XF$*1INN!1EkS^Sm1n.Q  
 --44U\l4m 	 

 %3!U0  
s   >$B>r   N__name__
__module____qualname____firstlineno____doc__
model_namepropertyrI   r   rr   r   r   r   r   r   r   r+   no_gradr   r   r   __static_attributes__r   r4   r2   r   r     s     "JlS l l 
T-%8 
 
 .Z( . . 
d;&7 
 
 ]]_!#; !M ! !r4   r   c                       \ rS rSrSrSr\S\4S j5       r\S\	\
   4S j5       r\S\	\   4S j5       r\S\	\   4S j5       r\S	\4S
 j5       r\R(                  " 5       S\S\4S j5       rSrg)QwenImageLayeredResizeStepia  a  
Image Resize step that resize the image to a target area (defined by the resolution parameter from user) while
maintaining the aspect ratio.

  Components:
      image_resize_processor (`VaeImageProcessor`)

  Inputs:
      image (`Image | list`):
          Reference image(s) for denoising. Can be a single image or list of images.
      resolution (`int`, *optional*, defaults to 640):
          The target area to resize the image to, can be 1024 or 640

  Outputs:
      resized_image (`list`):
          The resized images
qwenimage-layeredr   c                     g)NzImage Resize step that resize the image to a target area (defined by the resolution parameter from user) while maintaining the aspect ratio.r   r   s    r2   r   &QwenImageLayeredResizeStep.descriptionv  s     ^r4   c                 :    [        S[        [        SS05      SS9/$ r   r   r   s    r2   r   .QwenImageLayeredResizeStep.expected_componentsz  r   r4   c                 N    [         R                  " S5      [        SS[        SS9/$ )Nrb   
resolution  z:The target area to resize the image to, can be 1024 or 640r   defaultr   r   )r   rU   intr   s    r2   r   !QwenImageLayeredResizeStep.inputs  s0     (!X	
 	
r4   c                 X    [        S[        [        R                  R                     SS9/$ r   r   r   s    r2   r   /QwenImageLayeredResizeStep.intermediate_outputs  r   r4   r   c                 ,    U S;  a  [        SU  35      eg )N)r5   r   z&Resolution must be 1024 or 640 but is r   r   s    r2   check_inputs'QwenImageLayeredResizeStep.check_inputs  s"    [(Ej\RSS )r4   r   r   c           	         U R                  U5      nU R                  UR                  S9  UR                  n[	        U5      (       d  [        S[        U5       35      e[        U5      (       a  U/nUS   R                  u  pVUR                  UR                  -  n[        XuU-  5      u  pn
U Vs/ s H  nUR                  R                  XUS9PM     nnXl        U R                  X#5        X4$ s  snf )Nr   r   r   r   )r   r   r   rb   r   r   r   r   rM   r   r   r   r   r   )r   r   r   r   rf   r   r   target_arear   r   r   rb   r   s                r2   r   #QwenImageLayeredResizeStep.__call__  s   **51[%;%;<""'//NtTZ|n]^^&!!XF$*1INN!!,,{/E/EE1EkamSm1n.Q  
 --44U\l4m 	 

 %3!U0  
s   0$C0r   N)r   r   r   r   r   r   r   rI   r   rr   r   r   r   r   r   r   staticmethodr   r   r+   r   r   r   r   r   r   r4   r2   r   r   a  s    $ %J^S ^ ^ 
T-%8 
 
 	
Z( 	
 	
 
d;&7 
 
 T T T ]]_!#; !M ! !r4   r   c                       \ rS rSrSrSr\S\4S j5       r\S\	\
   4S j5       r\S\	\   4S j5       r\S\	\   4S j5       r\R"                  " 5       S	\S
\4S j5       rSrg)QwenImageEditPlusResizeStepi  a  
Resize images for QwenImage Edit Plus pipeline.
  Produces two outputs: resized_image (1024x1024) for VAE encoding, resized_cond_image (384x384) for VL text
  encoding. Each image is resized independently based on its own aspect ratio.

  Components:
      image_resize_processor (`VaeImageProcessor`)

  Inputs:
      image (`Image | list`):
          Reference image(s) for denoising. Can be a single image or list of images.

  Outputs:
      resized_image (`list`):
          Images resized to 1024x1024 target area for VAE encoding
      resized_cond_image (`list`):
          Images resized to 384x384 target area for VL text encoding
qwenimage-edit-plusr   c                      g)NzResize images for QwenImage Edit Plus pipeline.
Produces two outputs: resized_image (1024x1024) for VAE encoding, resized_cond_image (384x384) for VL text encoding.
Each image is resized independently based on its own aspect ratio.r   r   s    r2   r   'QwenImageEditPlusResizeStep.description  s    Q	
r4   c                 :    [        S[        [        SS05      SS9/$ r   r   r   s    r2   r   /QwenImageEditPlusResizeStep.expected_components  r   r4   c                 0    [         R                  " S5      /$ r   r   r   s    r2   r   "QwenImageEditPlusResizeStep.inputs  s     ##G,--r4   c                     [        S[        [        R                  R                     SS9[        S[        [        R                  R                     SS9/$ )Nr   z8Images resized to 1024x1024 target area for VAE encodingr   resized_cond_imagez:Images resized to 384x384 target area for VL text encodingr   r   s    r2   r   0QwenImageEditPlusResizeStep.intermediate_outputs  sL     $syy/V
 )syy/X
 	
r4   r   r   c           	         U R                  U5      nUR                  n[        U5      (       d  [        S[	        U5       35      e[        U5      (       a  U/n/ n/ nU H  nUR                  u  p[        SX-  5      u  pnUR                  UR                  R                  X{U
S95        [        SX-  5      u  pnUR                  UR                  R                  X~US95        M     XSl        Xcl        U R                  X#5        X4$ )Nr   r   r   i @ )r   rb   r   r   r   r   rM   r   appendr   r   r   r   r   )r   r   r   r   rf   r   resized_cond_imagesrb   r   r   	vae_width
vae_heightr   vl_width	vl_heights                  r2   r   $QwenImageEditPlusResizeStep.__call__  s   **51""'//NtTZ|n]^^&!!XF  E(-

%K (<KIc'd$I1!!*"C"C"J"J5kt"J"uv &:)[E_%`"H&&1188X`8a  %3!)<&U0  r4   r   Nr   r   r4   r2   r   r     s    & 'J
S 
 
 
T-%8 
 
 .Z( . . 
d;&7 
 
 ]]_!#; !M ! !r4   r   c                      ^  \ rS rSrSrSrU 4S jr\S\4S j5       r	\S\
\   4S j5       r\S\
\   4S j5       r\S\
\   4S	 j5       r\R$                  " 5       S
\S\S\4S j5       rSrU =r$ )"QwenImageLayeredGetImagePromptStepi#  a%  
Auto-caption step that generates a text prompt from the input image if none is provided.
  Uses the VL model (text_encoder) to generate a description of the image. If prompt is already provided, this step
  passes through unchanged.

  Components:
      text_encoder (`Qwen2_5_VLForConditionalGeneration`) processor (`Qwen2VLProcessor`)

  Inputs:
      prompt (`str`, *optional*):
          The prompt or prompts to guide image generation.
      resized_image (`Image`):
          The image to generate caption from, should be resized use the resize step
      use_en_prompt (`bool`, *optional*, defaults to False):
          Whether to use English prompt template

  Outputs:
      prompt (`str`):
          The prompt or prompts to guide image generation. If not provided, updated using image caption
r   c                 N   > [         U l        [        U l        [        TU ]  5         g N)r!   image_caption_prompt_enr    image_caption_prompt_cnsuper__init__r   	__class__s    r2   r  +QwenImageLayeredGetImagePromptStep.__init__;  s    'J$'J$r4   r   c                      g)NzAuto-caption step that generates a text prompt from the input image if none is provided.
Uses the VL model (text_encoder) to generate a description of the image.
If prompt is already provided, this step passes through unchanged.r   r   s    r2   r   .QwenImageLayeredGetImagePromptStep.description@  s    Q	
r4   c                 B    [        S[        5      [        S[        5      /$ )NrS   rk   )r   r   r   r   s    r2   r   6QwenImageLayeredGetImagePromptStep.expected_componentsH  s$     .*LM+'78
 	
r4   c           	          [         R                  " SSS9[        SS[        R                  R                  SS9[        SS[        S	S
9/$ )Nr6   F)requiredr   TzIThe image to generate caption from, should be resized use the resize stepr   r  r   r   use_en_promptz&Whether to use English prompt templater   )r   rU   r   r   r)   r   s    r2   r   )QwenImageLayeredGetImagePromptStep.inputsO  sU     5 $))//g	 $D	
 	
r4   c                 "    [        S[        SS9/$ )Nr6   z]The prompt or prompts to guide image generation. If not provided, updated using image captionr   )r   rI   r   s    r2   r   7QwenImageLayeredGetImagePromptStep.intermediate_outputsc  s     {
 	
r4   r   r   c                    U R                  U5      nUR                  nUR                  b   UR                  S:X  d  UR                  S:X  a  UR                  (       a  U R                  nOU R
                  nUR                  UUR                  SSS9R                  U5      nUR                  R                  " S0 UDSS0D6n[        UR                  U5       VV	s/ s H  u  pU	[        U5      S  PM     n
nn	UR                  R                  U
SSS	9S
   nUR                  5       Ul        U R!                  X#5        X4$ s  sn	nf )Nrq    Tr<   rd   max_new_tokensi   F)skip_special_tokensclean_up_tokenization_spacesr   r   )r   _execution_devicer6   r  r  r  rk   r   rK   rS   generateziprA   lenbatch_decodestripr   )r   r   r   r   r:   caption_promptrl   generated_idsin_idsout_idsgenerated_ids_trimmedoutput_texts               r2   r   +QwenImageLayeredGetImagePromptStep.__call__m  sO   **51-- %););r)A[EWEW[^E^((!%!=!=!%!=!=%//#"00#	 0 
 bj  '33<<`|`\_`M>A,BXBXZg>h%>h?6F&>h " % %..;;%4^c < K "-!2!2!4KU0  %s   D=)r  r  )r   r   r   r   r   r   r  r   rI   r   rr   r   r   r   r   r   r   r+   r   r   r   r   r   __classcell__r  s   @r2   r  r  #  s    * %J
 
S 
 
 
T-%8 
 
 
Z( 
 
& 
d;&7 
 
 ]]_!#; !M !Vc ! !r4   r  c                      ^  \ rS rSrSrSrU 4S jr\S\4S j5       r	\S\
\   4S j5       r\S\
\   4S j5       r\S\
\   4S	 j5       r\S
 5       r\R(                  " 5       S\S\4S j5       rSrU =r$ )QwenImageTextEncoderStepi  a  
Text Encoder step that generates text embeddings to guide the image generation.

  Components:
      text_encoder (`Qwen2_5_VLForConditionalGeneration`): The text encoder to use tokenizer (`Qwen2Tokenizer`):
      The tokenizer to use guider (`ClassifierFreeGuidance`)

  Inputs:
      prompt (`str`):
          The prompt or prompts to guide image generation.
      negative_prompt (`str`, *optional*):
          The prompt or prompts not to guide the image generation.
      max_sequence_length (`int`, *optional*, defaults to 1024):
          Maximum sequence length for prompt encoding.

  Outputs:
      prompt_embeds (`Tensor`):
          The prompt embeddings.
      prompt_embeds_mask (`Tensor`):
          The encoder attention mask.
      negative_prompt_embeds (`Tensor`):
          The negative prompt embeddings.
      negative_prompt_embeds_mask (`Tensor`):
          The negative prompt embeddings mask.
	qwenimagec                 \   > [         U l        [        U l        SU l        [
        TU ]  5         g )Nr5   )r"   r7   r#   r8   r9   r  r  r  s    r2   r  !QwenImageTextEncoderStep.__init__  s&    &?#0S-$(!r4   r   c                     g)NzOText Encoder step that generates text embeddings to guide the image generation.r   r   s    r2   r   $QwenImageTextEncoderStep.description  s    `r4   c           
      r    [        S[        SS9[        S[        SS9[        S[        [	        SS05      S	S
9/$ )NrS   zThe text encoder to use)r   rT   zThe tokenizer to useguiderguidance_scale      @r   r   )r   r   r   r   r   r   s    r2   r   ,QwenImageTextEncoderStep.expected_components  sH     .*LZst+~CYZ&!#3S"9:(5		
 		
r4   c                     [         R                  " S5      [         R                  " S5      [         R                  " SSS9/$ )Nr6   negative_promptmax_sequence_lengthr5   )r   r   r   s    r2   r   QwenImageTextEncoderStep.inputs  s<     ) 12 5tD
 	
r4   c                     [         R                  " S5      [         R                  " S5      [         R                  " S5      [         R                  " S5      /$ Nr_   prompt_embeds_masknegative_prompt_embedsnegative_prompt_embeds_maskr   rU   r   s    r2   r   -QwenImageTextEncoderStep.intermediate_outputs  I       1  !56  !9:  !>?	
 	
r4   c                 >   [        U [        5      (       d,  [        U [        5      (       d  [        S[	        U 5       35      eUbA  [        U[        5      (       d,  [        U[        5      (       d  [        S[	        U5       35      eUb  US:  a  [        SU 35      eg g )N2`prompt` has to be of type `str` or `list` but is ;`negative_prompt` has to be of type `str` or `list` but is r5   z9`max_sequence_length` cannot be greater than 1024 but is rH   rI   rr   r   r   )r6   rA  rB  s      r2   r   %QwenImageTextEncoderStep.check_inputs  s    &#&&z&$/G/GQRVW]R^Q_`aa '4455Z[_`o[pZqrss*/BT/IXYlXmnoo 0J*r4   r   r   c           
         U R                  U5      nUR                  nU R                  UR                  UR                  UR
                  5        [        UR                  UR                  UR                  U R                  U R                  U R                  US9u  Ul        Ul        UR                  S S 2S UR
                  24   Ul        UR                  S S 2S UR
                  24   Ul        S Ul        S Ul        UR                   (       a  UR                  =(       d    Sn[        UR                  UR                  UU R                  U R                  U R                  US9u  Ul        Ul        UR                  S S 2S UR
                  24   Ul        UR                  S S 2S UR
                  24   Ul        U R#                  X#5        X4$ )N)r6   r7   r8   r9   r:   rq   )r   r%  r   r6   rA  rB  ra   rS   rT   r7   r8   r9   r_   rF  rG  rH  requires_unconditional_embedsr   r   r   r   r   r:   rA  s         r2   r   !QwenImageTextEncoderStep.__call__  s   **51--+,,k.I.I;KjKjkDZ##  %%#'#>#>-1-R-R!%!:!:E
A!;#A %0$=$=aAb;CbCbAb>b$c!)4)G)GKl[MlMlKlHl)m&-1*26/33)99?ROZp''$$&'+'B'B151V1V%)%>%>[WK.0W 2=1S1S4[44442K. 7B6]6]4[44447K3 	U0  r4   )r7   r8   r9   r   r   r   r   r   r   r  r   rI   r   rr   r   r   r   r   r   r   r   r   r+   r   r   r   r   r   r2  r3  s   @r2   r5  r5    s    4 J aS a a 

T-%8 

 

 
Z( 
 
 
d;&7 
 
 p p ]]_(!#; (!M (! (!r4   r5  c                      ^  \ rS rSrSrSrU 4S jr\S\4S j5       r	\S\
\   4S j5       r\S\
\   4S j5       r\S\
\   4S	 j5       r\S
 5       r\R(                  " 5       S\S\4S j5       rSrU =r$ )QwenImageEditTextEncoderStepi  a  
Text Encoder step that processes both prompt and image together to generate text embeddings for guiding image
generation.

  Components:
      text_encoder (`Qwen2_5_VLForConditionalGeneration`) processor (`Qwen2VLProcessor`) guider
      (`ClassifierFreeGuidance`)

  Inputs:
      prompt (`str`):
          The prompt or prompts to guide image generation.
      negative_prompt (`str`, *optional*):
          The prompt or prompts not to guide the image generation.
      resized_image (`Image`):
          The image prompt to encode, should be resized using resize step

  Outputs:
      prompt_embeds (`Tensor`):
          The prompt embeddings.
      prompt_embeds_mask (`Tensor`):
          The encoder attention mask.
      negative_prompt_embeds (`Tensor`):
          The negative prompt embeddings.
      negative_prompt_embeds_mask (`Tensor`):
          The negative prompt embeddings mask.
r6  c                 N   > [         U l        [        U l        [        TU ]  5         g r  )r   r7   r   r8   r  r  r  s    r2   r  %QwenImageEditTextEncoderStep.__init__4  s    &D#0X-r4   r   c                     g)NzyText Encoder step that processes both prompt and image together to generate text embeddings for guiding image generation.r   r   s    r2   r   (QwenImageEditTextEncoderStep.description9  s     Kr4   c           
      v    [        S[        5      [        S[        5      [        S[        [	        SS05      SS9/$ NrS   rk   r<  r=  r>  r   r   r   r   r   r   r   r   s    r2   r   0QwenImageEditTextEncoderStep.expected_components=  C     .*LM+'78&!#3S"9:(5		
 		
r4   c           	          [         R                  " S5      [         R                  " S5      [        SS[        R                  R                  SS9/$ )Nr6   rA  r   Tz?The image prompt to encode, should be resized using resize stepr  r   rU   r   r   r   s    r2   r   #QwenImageEditTextEncoderStep.inputsJ  sG     ) 12$))//]		
 		
r4   c                     [         R                  " S5      [         R                  " S5      [         R                  " S5      [         R                  " S5      /$ rE  rI  r   s    r2   r   1QwenImageEditTextEncoderStep.intermediate_outputsW  rK  r4   c                    [        U [        5      (       d,  [        U [        5      (       d  [        S[	        U 5       35      eUbC  [        U[        5      (       d-  [        U[        5      (       d  [        S[	        U5       35      eg g g NrM  rN  rO  r6   rA  s     r2   r   )QwenImageEditTextEncoderStep.check_inputs`      &#&&z&$/G/GQRVW]R^Q_`aa '4455Z[_`o[pZqrss 6 5 (r4   r   r   c           
      d   U R                  U5      nU R                  UR                  UR                  5        UR                  n[        UR                  UR                  UR                  UR                  U R                  U R                  US9u  Ul        Ul        S Ul        S Ul        UR                  (       ac  UR                  =(       d    Sn[        UR                  UR                  UUR                  U R                  U R                  US9u  Ul        Ul        U R!                  X#5        X4$ )N)r6   rb   r7   r8   r:   r!  )r   r   r6   rA  r%  rn   rS   rk   r   r7   r8   r_   rF  rG  rH  rR  r   rS  s         r2   r   %QwenImageEditTextEncoderStep.__call__l  s   **51+,,k.I.IJ--D_##  %%++#'#>#>-1-R-RE
A!;#A .2*26/33)99@SOZu''$$&!//'+'B'B151V1V[WK.0W 	U0  r4   )r7   r8   rU  r3  s   @r2   rW  rW    s    6 J
 KS K K 

T-%8 

 

 

Z( 

 

 
d;&7 
 
 	t 	t ]]_ !#;  !M  !  !r4   rW  c                      ^  \ rS rSrSrSrU 4S jr\S\4S j5       r	\S\
\   4S j5       r\S\
\   4S j5       r\S\
\   4S	 j5       r\S
 5       r\R(                  " 5       S\S\4S j5       rSrU =r$ ) QwenImageEditPlusTextEncoderStepi  a  
Text Encoder step for QwenImage Edit Plus that processes prompt and multiple images together to generate text
embeddings for guiding image generation.

  Components:
      text_encoder (`Qwen2_5_VLForConditionalGeneration`) processor (`Qwen2VLProcessor`) guider
      (`ClassifierFreeGuidance`)

  Inputs:
      prompt (`str`):
          The prompt or prompts to guide image generation.
      negative_prompt (`str`, *optional*):
          The prompt or prompts not to guide the image generation.
      resized_cond_image (`Tensor`):
          The image(s) to encode, can be a single image or list of images, should be resized to 384x384 using
          resize step

  Outputs:
      prompt_embeds (`Tensor`):
          The prompt embeddings.
      prompt_embeds_mask (`Tensor`):
          The encoder attention mask.
      negative_prompt_embeds (`Tensor`):
          The negative prompt embeddings.
      negative_prompt_embeds_mask (`Tensor`):
          The negative prompt embeddings mask.
r   c                 d   > [         U l        [        U l        [        U l        [        TU ]  5         g r  )r   r7   r   ro   r   r8   r  r  r  s    r2   r  )QwenImageEditPlusTextEncoderStep.__init__  s&    &I##C 0]-r4   r   c                      g)NzText Encoder step for QwenImage Edit Plus that processes prompt and multiple images together to generate text embeddings for guiding image generation.r   r   s    r2   r   ,QwenImageEditPlusTextEncoderStep.description  s    H	
r4   c           
      v    [        S[        5      [        S[        5      [        S[        [	        SS05      SS9/$ r]  r^  r   s    r2   r   4QwenImageEditPlusTextEncoderStep.expected_components  r`  r4   c           	          [         R                  " S5      [         R                  " S5      [        SS[        R                  SS9/$ )Nr6   rA  r   TzoThe image(s) to encode, can be a single image or list of images, should be resized to 384x384 using resize stepr  )r   rU   r+   r   r   s    r2   r   'QwenImageEditPlusTextEncoderStep.inputs  sF     ) 12),, N		
 		
r4   c                     [         R                  " S5      [         R                  " S5      [         R                  " S5      [         R                  " S5      /$ rE  rI  r   s    r2   r   5QwenImageEditPlusTextEncoderStep.intermediate_outputs  rK  r4   c                    [        U [        5      (       d,  [        U [        5      (       d  [        S[	        U 5       35      eUbC  [        U[        5      (       d-  [        U[        5      (       d  [        S[	        U5       35      eg g g rg  rO  rh  s     r2   r   -QwenImageEditPlusTextEncoderStep.check_inputs  rj  r4   r   r   c                    U R                  U5      nU R                  UR                  UR                  5        UR                  n[        UR                  UR                  UR                  UR                  U R                  U R                  U R                  US9u  Ul        Ul        S Ul        S Ul        UR                   (       an  UR                  =(       d    Sn[        UR                  UR                  UUR                  U R                  U R                  U R                  US9u  Ul        Ul        U R#                  X#5        X4$ )N)r6   rb   r7   ro   r8   r:   r!  )r   r   r6   rA  r%  rw   rS   rk   r   r7   ro   r8   r_   rF  rG  rH  rR  r   rS  s         r2   r   )QwenImageEditPlusTextEncoderStep.__call__  s)   **51+,,k.I.IJ--Dd##  %%00#'#>#> $ 8 8-1-R-R	E
A!;#A .2*26/33)99@SO0++((*%88+/+F+F(,(@(@595Z5Z!	 XK.0W 	U0  r4   )ro   r7   r8   rU  r3  s   @r2   rn  rn    s    8 'J 
S 
 
 

T-%8 

 

 

Z( 

 

 
d;&7 
 
 	t 	t ]]_$!#; $!M $! $!r4   rn  c                       \ rS rSrSrSr\S\4S j5       r\S\	\
   4S j5       r\S\	\   4S j5       r\S\	\   4S j5       r\S	 5       r\R&                  " 5       S
\S\4S j5       rSrg)&QwenImageInpaintProcessImagesInputStepi  a  
Image Preprocess step for inpainting task. This processes the image and mask inputs together. Images will be
resized to the given height and width.

  Components:
      image_mask_processor (`InpaintProcessor`)

  Inputs:
      mask_image (`Image`):
          Mask image for inpainting.
      image (`Image | list`):
          Reference image(s) for denoising. Can be a single image or list of images.
      height (`int`, *optional*):
          The height in pixels of the generated image.
      width (`int`, *optional*):
          The width in pixels of the generated image.
      padding_mask_crop (`int`, *optional*):
          Padding for mask cropping in inpainting.

  Outputs:
      processed_image (`Tensor`):
          The processed image
      processed_mask_image (`Tensor`):
          The processed mask image
      mask_overlay_kwargs (`dict`):
          The kwargs for the postprocess step to apply the mask overlay
r6  r   c                     g)NzImage Preprocess step for inpainting task. This processes the image and mask inputs together. Images will be resized to the given height and width.r   r   s    r2   r   2QwenImageInpaintProcessImagesInputStep.description9  s     er4   c                 :    [        S[        [        SS05      SS9/$ Nimage_mask_processorr   r   r   r   r   r	   r   r   s    r2   r   :QwenImageInpaintProcessImagesInputStep.expected_components=  -     & !#5r":;(5	
 	
r4   c                     [         R                  " S5      [         R                  " S5      [         R                  " S5      [         R                  " S5      [         R                  " S5      /$ )N
mask_imagerb   r   r   padding_mask_cropr   r   s    r2   r   -QwenImageInpaintProcessImagesInputStep.inputsH  sU     -()( 34
 	
r4   c                     [        S[        R                  SS9[        S[        R                  SS9[        S[        SS9/$ Nprocessed_imageThe processed imager   processed_mask_imagezThe processed mask imagemask_overlay_kwargsz=The kwargs for the postprocess step to apply the mask overlayr   r+   r   dictr   s    r2   r   ;QwenImageInpaintProcessImagesInputStep.intermediate_outputsR  sM     &,,1
 +,,6
 *[
 	
r4   c                     U b  XS-  -  S:w  a  [        SUS-   SU  35      eUb   XS-  -  S:w  a  [        SUS-   SU 35      eg g Nr   r   zHeight must be divisible by z but is zWidth must be divisible by r   r   r   r   s      r2   r   3QwenImageInpaintProcessImagesInputStep.check_inputsf  {    &q,@"AQ"F;<Lq<P;QQYZ`YabccQ*>!?1!D:;Ka;O:PPXY^X_`aa "Er4   r   r   c                    U R                  U5      nU R                  UR                  UR                  UR                  S9  UR                  =(       d    UR
                  nUR                  =(       d    UR                  nUR                  R                  UR                  UR                  UUUR                  S9u  Ul        Ul        Ul        U R                  X#5        X4$ )Nr  rb   r%   r   r   r  )r   r   r   r   r   default_heightdefault_widthr  
preprocessrb   r  r  r  r  r  r   r   r   r   r   r   r   s         r2   r   /QwenImageInpaintProcessImagesInputStep.__call__n  s    **51%%[->->Q[QlQl 	 	
 ##@z'@'@!!=Z%=%= ++66!'' ++"-"?"? 7  	g#[%E{Gf 	U0  r4   r   Nr   r   r   r   r   r   r   rI   r   rr   r   r   r   r   r   r   r   r   r+   r   r   r   r   r   r   r4   r2   r~  r~    s    8 JeS e e 
T-%8 
 
 
Z( 
 
 
d;&7 
 
& b b ]]_!#; !M ! !r4   r~  c                       \ rS rSrSrSr\S\4S j5       r\S\	\
   4S j5       r\S\	\   4S j5       r\S\	\   4S j5       r\R"                  " 5       S	\S
\4S j5       rSrg)*QwenImageEditInpaintProcessImagesInputStepi  a  
Image Preprocess step for inpainting task. This processes the image and mask inputs together. Images should be
resized first.

  Components:
      image_mask_processor (`InpaintProcessor`)

  Inputs:
      mask_image (`Image`):
          Mask image for inpainting.
      resized_image (`Image`):
          The resized image. should be generated using a resize step
      padding_mask_crop (`int`, *optional*):
          Padding for mask cropping in inpainting.

  Outputs:
      processed_image (`Tensor`):
          The processed image
      processed_mask_image (`Tensor`):
          The processed mask image
      mask_overlay_kwargs (`dict`):
          The kwargs for the postprocess step to apply the mask overlay
r   r   c                     g)Nz}Image Preprocess step for inpainting task. This processes the image and mask inputs together. Images should be resized first.r   r   s    r2   r   6QwenImageEditInpaintProcessImagesInputStep.description  s     Or4   c                 :    [        S[        [        SS05      SS9/$ r  r  r   s    r2   r   >QwenImageEditInpaintProcessImagesInputStep.expected_components  r  r4   c                     [         R                  " S5      [        SS[        R                  R                  SS9[         R                  " S5      /$ )Nr  r   T:The resized image. should be generated using a resize stepr  r  rb  r   s    r2   r   1QwenImageEditInpaintProcessImagesInputStep.inputs  sI     -$))//X	  34	
 		
r4   c                     [        S[        R                  SS9[        S[        R                  SS9[        S[        SS9/$ r  r  r   s    r2   r   ?QwenImageEditInpaintProcessImagesInputStep.intermediate_outputs  sH     .%,,Tij+,,6
 *[
 	
r4   r   r   c                    U R                  U5      nUR                  S   R                  u  pEUR                  R	                  UR                  UR
                  UUUR                  S9u  Ul        Ul        Ul	        U R                  X#5        X4$ )Nr   r  )r   r   rM   r  r  r  r  r  r  r  r   r   r   r   r   r   r   s         r2   r   3QwenImageEditInpaintProcessImagesInputStep.__call__  s    **51#11!499 ++66!// ++"-"?"? 7  	g#[%E{Gf 	U0  r4   r   Nr   r   r4   r2   r  r    s    0 "JOS O O 
T-%8 
 
 

Z( 

 

 
d;&7 
 
 ]]_!#; !M ! !r4   r  c                       \ rS rSrSrSr\S\4S j5       r\S\	\
   4S j5       r\S\	\   4S j5       r\S\	\   4S j5       r\S	 5       r\R&                  " 5       S
\S\4S j5       rSrg)QwenImageProcessImagesInputStepi  a  
Image Preprocess step. will resize the image to the given height and width.

  Components:
      image_processor (`VaeImageProcessor`)

  Inputs:
      image (`Image | list`):
          Reference image(s) for denoising. Can be a single image or list of images.
      height (`int`, *optional*):
          The height in pixels of the generated image.
      width (`int`, *optional*):
          The width in pixels of the generated image.

  Outputs:
      processed_image (`Tensor`):
          The processed image
r6  r   c                     g)NzKImage Preprocess step. will resize the image to the given height and width.r   r   s    r2   r   +QwenImageProcessImagesInputStep.description  s    \r4   c                 :    [        S[        [        SS05      SS9/$ Nimage_processorr   r   r   r   r   r   s    r2   r   3QwenImageProcessImagesInputStep.expected_components  -     !!!#5r":;(5	
 	
r4   c                     [         R                  " S5      [         R                  " S5      [         R                  " S5      /$ )Nrb   r   r   r   r   s    r2   r   &QwenImageProcessImagesInputStep.inputs  s8     ()(
 	
r4   c                 6    [        S[        R                  SS9/$ Nr  r  r   r   r+   r   r   s    r2   r   4QwenImageProcessImagesInputStep.intermediate_outputs  #     &,,1
 	
r4   c                     U b  XS-  -  S:w  a  [        SUS-   SU  35      eUb   XS-  -  S:w  a  [        SUS-   SU 35      eg g r  r   r  s      r2   r   ,QwenImageProcessImagesInputStep.check_inputs  r  r4   r   r   c                 z   U R                  U5      nU R                  UR                  UR                  UR                  S9  UR                  =(       d    UR
                  nUR                  =(       d    UR                  nUR                  R                  UR                  UUS9Ul
        U R                  X#5        X4$ )Nr  rb   r   r   )r   r   r   r   r   r  r  r  r  rb   r  r   r  s         r2   r   (QwenImageProcessImagesInputStep.__call__"  s    **51%%[->->Q[QlQl 	 	
 ##@z'@'@!!=Z%=%=&0&@&@&K&K## 'L '
# 	U0  r4   r   Nr  r   r4   r2   r  r    s    & J]S ] ] 
T-%8 
 
 
Z( 
 
 
d;&7 
 
 b b ]]_!#; !M ! !r4   r  c                       \ rS rSrSrSr\S\4S j5       r\S\	\
   4S j5       r\S\	\   4S j5       r\S\	\   4S j5       r\R"                  " 5       S	\S
\4S j5       rSrg)#QwenImageEditProcessImagesInputStepi7  a/  
Image Preprocess step. Images needs to be resized first.

  Components:
      image_processor (`VaeImageProcessor`)

  Inputs:
      resized_image (`list`):
          The resized image. should be generated using a resize step

  Outputs:
      processed_image (`Tensor`):
          The processed image
r   r   c                     g)Nz8Image Preprocess step. Images needs to be resized first.r   r   s    r2   r   /QwenImageEditProcessImagesInputStep.descriptionI  s    Ir4   c                 :    [        S[        [        SS05      SS9/$ r  r   r   s    r2   r   7QwenImageEditProcessImagesInputStep.expected_componentsM  r  r4   c                 Z    [        SS[        [        R                  R                     SS9/$ Nr   Tr  r  r   rr   r   r   r   s    r2   r   *QwenImageEditProcessImagesInputStep.inputsX  /     $syy/X	
 	
r4   c                 6    [        S[        R                  SS9/$ r  r  r   s    r2   r   8QwenImageEditProcessImagesInputStep.intermediate_outputsc  r  r4   r   r   c                     U R                  U5      nUR                  S   R                  u  pEUR                  R	                  UR                  UUS9Ul        U R                  X#5        X4$ )Nr   r  )r   r   rM   r  r  r  r   r  s         r2   r   ,QwenImageEditProcessImagesInputStep.__call__m  sq    **51#11!499&0&@&@&K&K++ 'L '
# 	U0  r4   r   Nr   r   r4   r2   r  r  7  s     "JJS J J 
T-%8 
 
 
Z( 
 
 
d;&7 
 
 ]]_!#; !M ! !r4   r  c                       \ rS rSrSrSr\S\4S j5       r\S\	\
   4S j5       r\S\	\   4S j5       r\S\	\   4S j5       r\R"                  " 5       S	\S
\4S j5       rSrg)'QwenImageEditPlusProcessImagesInputStepi~  as  
Image Preprocess step. Images can be resized first. If a list of images is provided, will return a list of
processed images.

  Components:
      image_processor (`VaeImageProcessor`)

  Inputs:
      resized_image (`list`):
          The resized image. should be generated using a resize step

  Outputs:
      processed_image (`Tensor`):
          The processed image
r   r   c                     g)Nz|Image Preprocess step. Images can be resized first. If a list of images is provided, will return a list of processed images.r   r   s    r2   r   3QwenImageEditPlusProcessImagesInputStep.description  s     Nr4   c                 :    [        S[        [        SS05      SS9/$ r  r   r   s    r2   r   ;QwenImageEditPlusProcessImagesInputStep.expected_components  r  r4   c                 Z    [        SS[        [        R                  R                     SS9/$ r  r  r   s    r2   r   .QwenImageEditPlusProcessImagesInputStep.inputs  r  r4   c                 6    [        S[        R                  SS9/$ r  r  r   s    r2   r   <QwenImageEditPlusProcessImagesInputStep.intermediate_outputs  r  r4   r   r   c           	      L   U R                  U5      nUR                  n[        U[        5      nU(       d  U/n/ nU H:  nUR                  u  pUR                  UR                  R                  XyUS95        M<     U(       a  Xcl        O
US   Ul        U R                  X#5        X4$ )Nr  r   )
r   r   rH   rr   rM   r  r  r  r  r   )
r   r   r   r   rb   is_image_listprocessed_imagesrv   	img_width
img_heights
             r2   r   0QwenImageEditPlusProcessImagesInputStep.__call__  s    **51))"5$/GEC$'HH!I##**55CZc5d  *:'*:1*=K'U0  r4   r   Nr   r   r4   r2   r  r  ~  s      'JNS N N 
T-%8 
 
 
Z( 
 
 
d;&7 
 
 ]]_!#; !M ! !r4   r  c                      ^  \ rS rSrSrSrSS\S-  S\S-  4U 4S jjjr\	S\
4S	 j5       r\	S\\   4S
 j5       r\	S\\   4S j5       r\	S\\   4S j5       r\R$                  " 5       S\S\S\4S j5       rSrU =r$ )QwenImageVaeEncoderStepi  a  
VAE Encoder step that converts processed_image into latent representations image_latents.
  Handles both single images and lists of images with varied resolutions.

  Components:
      vae (`AutoencoderKLQwenImage`)

  Inputs:
      processed_image (`Tensor`):
          The image tensor to encode
      generator (`Generator`, *optional*):
          Torch generator for deterministic generation.

  Outputs:
      image_latents (`Tensor`):
          The latent representation of the input image.
r6  Ninputoutputc                   > Uc  [        SS[        R                  SS9nUc  [        R                  " S5      n[        U[         5      (       d  [        S[        U5       35      e[        U[        5      (       d  [        S[        U5       35      eXl        X l	        UR                  U l        UR                  U l        [        TU ]9  5         g)	a  Initialize a VAE encoder step for converting images to latent representations.

Handles both single images and lists of images. When input is a list, outputs a list of latents. When input is
a single tensor, outputs a single latent tensor.

Args:
    input (InputParam, optional): Input parameter for the processed image. Defaults to "processed_image".
    output (OutputParam, optional): Output parameter for the image latents. Defaults to "image_latents".
Nr  TzThe image tensor to encoder  r   z input must be InputParam but is z"output must be OutputParam but is )r   r+   r   r   rU   rH   r   r   _input_outputr   _image_input_name_image_latents_output_namer  r  )r   r  r  r  s      r2   r   QwenImageVaeEncoderStep.__init__  s     =&[wE > ))/:F%,,?U}MNN&+..A$v,PQQ!&*0++'r4   r   c                 <    SU R                    SU R                   S3$ )NzVAE Encoder step that converts z into latent representations zI.
Handles both single images and lists of images with varied resolutions.)r  r  r   s    r2   r   #QwenImageVaeEncoderStep.description  s?     .d.D.D-EEbcg  dC  dC  cD DV V	
r4   c                 $    [        S[        5      /$ )Nr   )r   r   r   s    r2   r   +QwenImageVaeEncoderStep.expected_components  s    e%;<==r4   c                 F    U R                   [        R                  " S5      /$ )Nry   )r  r   rU   r   s    r2   r   QwenImageVaeEncoderStep.inputs  s#     KK,
 	
r4   c                     U R                   /$ r  )r  r   s    r2   r   ,QwenImageVaeEncoderStep.intermediate_outputs  s    ~r4   r   r   c                    U R                  U5      nUR                  nUR                  R                  n[	        X0R
                  5      n[        U[        5      nU(       d  U/n/ nU H>  n	UR                  [        U	UR                  UR                  UUUR                  S95        M@     U(       d  US   n[        X0R                  U5        U R                  X#5        X4$ )N)rb   r   ry   r:   rF   r   r   )r   r%  r   rF   getattrr  rH   rr   r  r   ry   num_channels_latentssetattrr  r   )
r   r   r   r   r:   rF   rb   r  r   rv   s
             r2   r    QwenImageVaeEncoderStep.__call__  s    **51--$$%;%;<"5$/GE C   ")33!$.$C$C	  )!,M<<mLU0  r4   )r  r  r  r  )NN)r   r   r   r   r   r   r   r   r  r   rI   r   rr   r   r   r   r   r+   r   r   r   r   r   r2  r3  s   @r2   r  r    s    $ Jj4/ d@R  : 
S 
 
 >T-%8 > > 
Z( 
 
 d;&7   ]]_!#; !M !Vc ! !r4   r  c                       \ rS rSrSrSr\S\4S j5       r\S\	\
   4S j5       r\S\	\   4S j5       r\S\	\   4S j5       r\S	 5       r\R&                  " 5       S
\S\S\4S j5       rSrg)!QwenImageControlNetVaeEncoderStepiA  a  
VAE Encoder step that converts `control_image` into latent representations control_image_latents.

  Components:
      vae (`AutoencoderKLQwenImage`) controlnet (`QwenImageControlNetModel`) control_image_processor
      (`VaeImageProcessor`)

  Inputs:
      control_image (`Image`):
          Control image for ControlNet conditioning.
      height (`int`, *optional*):
          The height in pixels of the generated image.
      width (`int`, *optional*):
          The width in pixels of the generated image.
      generator (`Generator`, *optional*):
          Torch generator for deterministic generation.

  Outputs:
      control_image_latents (`Tensor`):
          The latents representing the control image
r6  r   c                     g)NzbVAE Encoder step that converts `control_image` into latent representations control_image_latents.
r   r   s    r2   r   -QwenImageControlNetVaeEncoderStep.descriptionZ  s    tr4   c           
      z    [        S[        5      [        S[        5      [        S[        [	        SS05      SS9/nU$ )Nr   
controlnetcontrol_image_processorr   r   r   r   )r   r   r   r
   r   )r   r   s     r2   r   5QwenImageControlNetVaeEncoderStep.expected_components^  sH     %!78,(@A)!!#5r":;(5		

 r4   c                     [         R                  " S5      [         R                  " S5      [         R                  " S5      [         R                  " S5      /nU$ )Ncontrol_imager   r   ry   r   )r   r   s     r2   r   (QwenImageControlNetVaeEncoderStep.inputsl  sJ     0)(,	
 r4   c                 6    [        S[        R                  SS9/$ )Ncontrol_image_latentsz*The latents representing the control image)r   r   r  r   s    r2   r   6QwenImageControlNetVaeEncoderStep.intermediate_outputsv  s#     ',,H
 	
r4   c                     U b  XS-  -  S:w  a  [        SUS-   SU  35      eUb   XS-  -  S:w  a  [        SUS-   SU 35      eg g r  r   r  s      r2   r   .QwenImageControlNetVaeEncoderStep.check_inputs  r  r4   r   r   c                 R   U R                  U5      nU R                  UR                  UR                  UR                  5        UR
                  nUR                  R                  nUR                  =(       d    UR                  nUR                  =(       d    UR                  n[        UR                  5      n[        U[        5      (       a1  [        UR                  [        5      (       d  UR                  /Ul        [        U[        5      (       a~  / Ul        UR                   Hf  n	UR"                  R%                  U	UUS9n	['        U	UR                  UR(                  UUUR*                  SS9n
UR                   R-                  U
5        Mh     O[        U[.        5      (       aX  UR"                  R%                  UR                  UUS9n['        UUR                  UR(                  UUUR*                  SS9Ul        O[1        S[3        U5       35      eU R5                  X#5        X4$ )Nr  r}   )rb   r   ry   r:   rF   r   rz   z[Expected controlnet to be a QwenImageControlNetModel or QwenImageMultiControlNetModel, got )r   r   r   r   r   r%  r   rF   r  r  r   r  rH   r   r  rr   r  r   r  r   ry   r  r  r   r   r   r   )r   r   r   r   r:   rF   r   r   r  control_image_control_image_latents_r  s               r2   r   *QwenImageControlNetVaeEncoderStep.__call__  s    **51+,,k.?.?A\A\]--$$##@z'@'@!!=Z%=%=":#8#89
j"?@@T_TmTmosItIt)4)B)B(CK%j"?@@02K-"-";";!+!C!C!N!N(! "O " *:(")33!$.$C$C (*& 11889OP! #<$ 
$<==&>>II!// J M
 1A#NN%// * ? ?$1K- mnrs}n~m  A  	U0  r4   r   Nr  r   r4   r2   r  r  A  s    , JuS u u T-%8   Z(   
d;&7 
 
 b b ]]_:!#; :!M :!Vc :! :!r4   r  c                       \ rS rSrSrSr\S\4S j5       r\S\	\
   4S j5       r\S\	\   4S j5       r\R                  " 5       S\S\4S	 j5       rS
rg)"QwenImageLayeredPermuteLatentsStepi  az  
Permute image latents from (B, C, 1, H, W) to (B, 1, C, H, W) for Layered packing.

  Inputs:
      image_latents (`Tensor`):
          image latents used to guide the image generation. Can be generated from vae_encoder step.

  Outputs:
      image_latents (`Tensor`):
          The latent representation of the input image. (permuted from [B, C, 1, H, W] to [B, 1, C, H, W])
r   r   c                     g)NzRPermute image latents from (B, C, 1, H, W) to (B, 1, C, H, W) for Layered packing.r   r   s    r2   r   .QwenImageLayeredPermuteLatentsStep.description  s    cr4   c                 0    [         R                  " S5      /$ )Nr   r   r   s    r2   r   )QwenImageLayeredPermuteLatentsStep.inputs  s     0
 	
r4   c                 .    [         R                  " SSS9/$ )Nr   z0permuted from [B, C, 1, H, W] to [B, 1, C, H, W])noterI  r   s    r2   r   7QwenImageLayeredPermuteLatentsStep.intermediate_outputs  s       7ij
 	
r4   r   c                     U R                  U5      nUR                  nUR                  SSSSS5      Ul        U R                  X#5        X4$ )Nr   r   r   r   r   )r   r   permuter   )r   r   r   r   r   s        r2   r   +QwenImageLayeredPermuteLatentsStep.__call__  sO    **51 ++$+OOAq!Q$B!U0  r4   r   N)r   r   r   r   r   r   r   rI   r   rr   r   r   r   r   r+   r   r   r   r   r   r4   r2   r  r    s    
 %JdS d d 
Z( 
 

 
d;&7 
 

 ]]_!- !M ! !r4   r  )Nr}   )r   r~   )Lr   r   r+   transformersr   r   r   configuration_utilsr   guidersr   r  r	   r
   r   r   modelsr   r   r   +pipelines.qwenimage.pipeline_qwenimage_editr   utilsr   utils.torch_utilsr   modular_pipeliner   r   modular_pipeline_utilsr   r   r   r   prompt_templatesr   r   r   r   r   r    r!   r"   r#   
get_loggerr   loggerr   r3   rI   rr   r   r:   ra   rn   r   rw   	Generatorr   rF   r   r   r   r   r  r5  rW  rn  r~  r  r  r  r  r  r  r  r   r4   r2   <module>r'     sI     ] ] - - l l e e O  . C K K 6
 
 
 
		H	%%,, ell  #";,O $"&%1 $s)O%1  	%1
 '*%1 %1 LL4%1V #!%"@,T"&,1 $s)O,1 <<$	,1
  ,1 '*,1 LL4,1d #JN"E?,Y"&41 $s)O41 <<$syy		?@@4G	41
  41 41 '*41 LL441r `h
TLL
T-2__t-C
TY\
T( (<<(	( ( LL	(
 ;;( ( (DE!3 E!RX!!6 X!x]!"7 ]!Li!)> i!d}!4 }!Bw!#8 w!v@!'< @!Ri!-B i!ZX!1F X!xP!&; P!hC!*? C!NN!.C N!nh!3 h!XB!(= B!V(!)> (!r4   