
    
3j              	          S SK r SSKJr  SSKJrJrJr  SSKJrJ	r	J
r
  SSKJrJrJrJrJrJrJr  SS	KJrJrJrJr  SS
KJrJrJrJr  SSKJrJrJ r J!r!J"r"  SSK#J$r$J%r%J&r&  \RN                  " \(5      r) " S S\5      r* " S S\5      r+ " S S\5      r, " S S\5      r- " S S\5      r. " S S\5      r/ " S S\5      r0 " S S\5      r1 " S S\5      r2 " S S \5      r3 " S! S"\5      r4 " S# S$\5      r5 " S% S&\5      r6 " S' S(\5      r7 " S) S*\5      r8 " S+ S,\5      r9 " S- S.\5      r: " S/ S0\5      r;\	" S1\*" 5       4S2\-" 5       4S3\." 5       4S4\8" 5       4S5\;" 5       4/5      r< " S6 S7\5      r=g)8    N   )logging   )AutoPipelineBlocksConditionalPipelineBlocksSequentialPipelineBlocks)
InputParamInsertableDictOutputParam   )%QwenImageControlNetBeforeDenoiserStepQwenImageCreateMaskLatentsStepQwenImagePrepareLatentsStep'QwenImagePrepareLatentsWithStrengthStepQwenImageRoPEInputsStepQwenImageSetTimestepsStep%QwenImageSetTimestepsWithStrengthStep)QwenImageAfterDenoiseStepQwenImageDecoderStep'QwenImageInpaintProcessImagesOutputStep QwenImageProcessImagesOutputStep)QwenImageControlNetDenoiseStepQwenImageDenoiseStep%QwenImageInpaintControlNetDenoiseStepQwenImageInpaintDenoiseStep)!QwenImageControlNetVaeEncoderStep&QwenImageInpaintProcessImagesInputStepQwenImageProcessImagesInputStepQwenImageTextEncoderStepQwenImageVaeEncoderStep)QwenImageAdditionalInputsStepQwenImageControlNetInputsStepQwenImageTextInputsStepc                   P    \ rS rSrSrSr\" 5       /rS/rS/r	\
S\4S j5       rSrg	)
QwenImageAutoTextEncoderStep@   a  
Text encoder step that encodes the text prompt into a text embedding. This is an auto pipeline block.

  Components:
      text_encoder (`Qwen2_5_VLForConditionalGeneration`): The text encoder to use tokenizer (`Qwen2Tokenizer`):
      The tokenizer to use guider (`ClassifierFreeGuidance`)

  Inputs:
      prompt (`str`, *optional*):
          The prompt or prompts to guide image generation.
      negative_prompt (`str`, *optional*):
          The prompt or prompts not to guide the image generation.
      max_sequence_length (`int`, *optional*, defaults to 1024):
          Maximum sequence length for prompt encoding.

  Outputs:
      prompt_embeds (`Tensor`):
          The prompt embeddings.
      prompt_embeds_mask (`Tensor`):
          The encoder attention mask.
      negative_prompt_embeds (`Tensor`):
          The negative prompt embeddings.
      negative_prompt_embeds_mask (`Tensor`):
          The negative prompt embeddings mask.
	qwenimagetext_encoderpromptreturnc                     g)NzeText encoder step that encodes the text prompt into a text embedding. This is an auto pipeline block. selfs    x/home/wildlama/miniconda3/lib/python3.13/site-packages/diffusers/modular_pipelines/qwenimage/modular_blocks_qwenimage.pydescription(QwenImageAutoTextEncoderStep.description`   s    v    r,   N)__name__
__module____qualname____firstlineno____doc__
model_namer   block_classesblock_namesblock_trigger_inputspropertystrr0   __static_attributes__r,   r2   r/   r%   r%   @   sE    4 J-/0M!"K$:@S @ @r2   r%   c                   X    \ rS rSrSrSr\" 5       \" 5       /rSS/r	\
S\4S j5       rSrg	)
QwenImageInpaintVaeEncoderStepm   a  
This step is used for processing image and mask inputs for inpainting tasks. It:
   - Resizes the image to the target size, based on `height` and `width`.
   - Processes and updates `image` and `mask_image`.
   - Creates `image_latents`.

  Components:
      image_mask_processor (`InpaintProcessor`) vae (`AutoencoderKLQwenImage`)

  Inputs:
      mask_image (`Image`):
          Mask image for inpainting.
      image (`Image | list`):
          Reference image(s) for denoising. Can be a single image or list of images.
      height (`int`, *optional*):
          The height in pixels of the generated image.
      width (`int`, *optional*):
          The width in pixels of the generated image.
      padding_mask_crop (`int`, *optional*):
          Padding for mask cropping in inpainting.
      generator (`Generator`, *optional*):
          Torch generator for deterministic generation.

  Outputs:
      processed_image (`Tensor`):
          The processed image
      processed_mask_image (`Tensor`):
          The processed mask image
      mask_overlay_kwargs (`dict`):
          The kwargs for the postprocess step to apply the mask overlay
      image_latents (`Tensor`):
          The latent representation of the input image.
r'   
preprocessencoder*   c                      g)NzThis step is used for processing image and mask inputs for inpainting tasks. It:
 - Resizes the image to the target size, based on `height` and `width`.
 - Processes and updates `image` and `mask_image`.
 - Creates `image_latents`.r,   r-   s    r/   r0   *QwenImageInpaintVaeEncoderStep.description   s    *	
r2   r,   N)r3   r4   r5   r6   r7   r8   r   r    r9   r:   r<   r=   r0   r>   r,   r2   r/   r@   r@   m   sC     D J;=?V?XYM*K
S 
 
r2   r@   c                   X    \ rS rSrSrSr\" 5       \" 5       /rSS/r	\
S\4S j5       rSrg	)
QwenImageImg2ImgVaeEncoderStep   a  
Vae encoder step that preprocess andencode the image inputs into their latent representations.

  Components:
      image_processor (`VaeImageProcessor`) vae (`AutoencoderKLQwenImage`)

  Inputs:
      image (`Image | list`):
          Reference image(s) for denoising. Can be a single image or list of images.
      height (`int`, *optional*):
          The height in pixels of the generated image.
      width (`int`, *optional*):
          The width in pixels of the generated image.
      generator (`Generator`, *optional*):
          Torch generator for deterministic generation.

  Outputs:
      processed_image (`Tensor`):
          The processed image
      image_latents (`Tensor`):
          The latent representation of the input image.
r'   rB   rC   r*   c                     g)Nz^Vae encoder step that preprocess andencode the image inputs into their latent representations.r,   r-   s    r/   r0   *QwenImageImg2ImgVaeEncoderStep.description   s    or2   r,   N)r3   r4   r5   r6   r7   r8   r   r    r9   r:   r<   r=   r0   r>   r,   r2   r/   rG   rG      sE    . J468O8QRM*KpS p pr2   rG   c                   <    \ rS rSr\\/rSS/rSS/r\	S 5       r
Srg)	QwenImageAutoVaeEncoderStep   inpaintimg2img
mask_imageimagec                      g)NaZ  Vae encoder step that encode the image inputs into their latent representations.
This is an auto pipeline block.
 - `QwenImageInpaintVaeEncoderStep` (inpaint) is used when `mask_image` is provided.
 - `QwenImageImg2ImgVaeEncoderStep` (img2img) is used when `image` is provided.
 - if `mask_image` or `image` is not provided, step will be skipped.r,   r-   s    r/   r0   'QwenImageAutoVaeEncoderStep.description   s    U	
r2   r,   N)r3   r4   r5   r6   r@   rG   r9   r:   r;   r<   r0   r>   r,   r2   r/   rL   rL      s4    35STMi(K('2
 
r2   rL   c                   :    \ rS rSrSr\/rS/rS/r\	S 5       r
Srg))QwenImageOptionalControlNetVaeEncoderStep   a  
Vae encoder step that encode the image inputs into their latent representations.
  This is an auto pipeline block.
   - `QwenImageControlNetVaeEncoderStep` (controlnet) is used when `control_image` is provided.
   - if `control_image` is not provided, step will be skipped.

  Components:
      vae (`AutoencoderKLQwenImage`) controlnet (`QwenImageControlNetModel`) control_image_processor
      (`VaeImageProcessor`)

  Inputs:
      control_image (`Image`, *optional*):
          Control image for ControlNet conditioning.
      height (`int`, *optional*):
          The height in pixels of the generated image.
      width (`int`, *optional*):
          The width in pixels of the generated image.
      generator (`Generator`, *optional*):
          Torch generator for deterministic generation.

  Outputs:
      control_image_latents (`Tensor`):
          The latents representing the control image

controlnetcontrol_imagec                      g)Na  Vae encoder step that encode the image inputs into their latent representations.
This is an auto pipeline block.
 - `QwenImageControlNetVaeEncoderStep` (controlnet) is used when `control_image` is provided.
 - if `control_image` is not provided, step will be skipped.r,   r-   s    r/   r0   5QwenImageOptionalControlNetVaeEncoderStep.description   s    M	
r2   r,   N)r3   r4   r5   r6   r7   r   r9   r:   r;   r<   r0   r>   r,   r2   r/   rU   rU      s3    2 77M.K+,
 
r2   rU   c                   P    \ rS rSrSrSr\" 5       \" 5       /rSS/r	\
S 5       rSrg)	QwenImageImg2ImgInputStepi  a  
Input step that prepares the inputs for the img2img denoising step. It:

  Components:
      pachifier (`QwenImagePachifier`)

  Inputs:
      num_images_per_prompt (`int`, *optional*, defaults to 1):
          The number of images to generate per prompt.
      prompt_embeds (`Tensor`):
          text embeddings used to guide the image generation. Can be generated from text_encoder step.
      prompt_embeds_mask (`Tensor`):
          mask for the text embeddings. Can be generated from text_encoder step.
      negative_prompt_embeds (`Tensor`, *optional*):
          negative text embeddings used to guide the image generation. Can be generated from text_encoder step.
      negative_prompt_embeds_mask (`Tensor`, *optional*):
          mask for the negative text embeddings. Can be generated from text_encoder step.
      height (`int`, *optional*):
          The height in pixels of the generated image.
      width (`int`, *optional*):
          The width in pixels of the generated image.
      image_latents (`Tensor`):
          image latents used to guide the image generation. Can be generated from vae_encoder step.

  Outputs:
      batch_size (`int`):
          The batch size of the prompt embeddings
      dtype (`dtype`):
          The data type of the prompt embeddings
      prompt_embeds (`Tensor`):
          The prompt embeddings. (batch-expanded)
      prompt_embeds_mask (`Tensor`):
          The encoder attention mask. (batch-expanded)
      negative_prompt_embeds (`Tensor`):
          The negative prompt embeddings. (batch-expanded)
      negative_prompt_embeds_mask (`Tensor`):
          The negative prompt embeddings mask. (batch-expanded)
      image_height (`int`):
          The image height calculated from the image latents dimension
      image_width (`int`):
          The image width calculated from the image latents dimension
      height (`int`):
          if not provided, updated to image height
      width (`int`):
          if not provided, updated to image width
      image_latents (`Tensor`):
          image latents used to guide the image generation. Can be generated from vae_encoder step. (patchified and
          batch-expanded)
r'   text_inputsadditional_inputsc                     g)NzHInput step that prepares the inputs for the img2img denoising step. It:
r,   r-   s    r/   r0   %QwenImageImg2ImgInputStep.description9  s    Zr2   r,   N)r3   r4   r5   r6   r7   r8   r#   r!   r9   r:   r<   r0   r>   r,   r2   r/   r\   r\     s?    0d J,.0M0OPM "56KR Rr2   r\   c            	       p    \ rS rSrSrSr\" 5       \" \" S\	R                  SS9/S9/rSS	/r\S
 5       rSrg)QwenImageInpaintInputStepiA  aM	  
Input step that prepares the inputs for the inpainting denoising step. It:

  Components:
      pachifier (`QwenImagePachifier`)

  Inputs:
      num_images_per_prompt (`int`, *optional*, defaults to 1):
          The number of images to generate per prompt.
      prompt_embeds (`Tensor`):
          text embeddings used to guide the image generation. Can be generated from text_encoder step.
      prompt_embeds_mask (`Tensor`):
          mask for the text embeddings. Can be generated from text_encoder step.
      negative_prompt_embeds (`Tensor`, *optional*):
          negative text embeddings used to guide the image generation. Can be generated from text_encoder step.
      negative_prompt_embeds_mask (`Tensor`, *optional*):
          mask for the negative text embeddings. Can be generated from text_encoder step.
      height (`int`, *optional*):
          The height in pixels of the generated image.
      width (`int`, *optional*):
          The width in pixels of the generated image.
      image_latents (`Tensor`, *optional*):
          image latents used to guide the image generation. Can be generated from vae_encoder step.
      processed_mask_image (`Tensor`, *optional*):
          The processed mask image

  Outputs:
      batch_size (`int`):
          The batch size of the prompt embeddings
      dtype (`dtype`):
          The data type of the prompt embeddings
      prompt_embeds (`Tensor`):
          The prompt embeddings. (batch-expanded)
      prompt_embeds_mask (`Tensor`):
          The encoder attention mask. (batch-expanded)
      negative_prompt_embeds (`Tensor`):
          The negative prompt embeddings. (batch-expanded)
      negative_prompt_embeds_mask (`Tensor`):
          The negative prompt embeddings mask. (batch-expanded)
      image_height (`int`):
          The image height calculated from the image latents dimension
      image_width (`int`):
          The image width calculated from the image latents dimension
      height (`int`):
          if not provided, updated to image height
      width (`int`):
          if not provided, updated to image width
      image_latents (`Tensor`):
          image latents used to guide the image generation. Can be generated from vae_encoder step. (patchified and
          batch-expanded)
      processed_mask_image (`Tensor`):
          The processed mask image (batch-expanded)
r'   processed_mask_imagezThe processed mask image)name	type_hintr0   )additional_batch_inputsr]   r^   c                     g)NzKInput step that prepares the inputs for the inpainting denoising step. It:
r,   r-   s    r/   r0   %QwenImageInpaintInputStep.description  s    ]r2   r,   N)r3   r4   r5   r6   r7   r8   r#   r!   r	   torchTensorr9   r:   r<   r0   r>   r,   r2   r/   rb   rb   A  s\    4l J!% 6%,,\vw%	
M !"56KR Rr2   rb   c                   X    \ rS rSrSrSr\" 5       \" 5       /rSS/r	\
S\4S j5       rSrg	)
"QwenImageInpaintPrepareLatentsStepi  a  
This step prepares the latents/image_latents and mask inputs for the inpainting denoising step. It:
   - Add noise to the image latents to create the latents input for the denoiser.
   - Create the pachified latents `mask` based on the processedmask image.

  Components:
      scheduler (`FlowMatchEulerDiscreteScheduler`) pachifier (`QwenImagePachifier`)

  Inputs:
      latents (`Tensor`):
          The initial random noised, can be generated in prepare latent step.
      image_latents (`Tensor`):
          image latents used to guide the image generation. Can be generated from vae_encoder step. (Can be
          generated from vae encoder and updated in input step.)
      timesteps (`Tensor`):
          The timesteps to use for the denoising process. Can be generated in set_timesteps step.
      processed_mask_image (`Tensor`):
          The processed mask to use for the inpainting process.
      height (`int`):
          The height in pixels of the generated image.
      width (`int`):
          The width in pixels of the generated image.
      dtype (`dtype`, *optional*, defaults to torch.float32):
          The dtype of the model inputs, can be generated in input step.

  Outputs:
      initial_noise (`Tensor`):
          The initial random noised used for inpainting denoising.
      latents (`Tensor`):
          The scaled noisy latents to use for inpainting/image-to-image denoising.
      mask (`Tensor`):
          The mask to use for the inpainting process.
r'   add_noise_to_latentscreate_mask_latentsr*   c                      g)NzThis step prepares the latents/image_latents and mask inputs for the inpainting denoising step. It:
 - Add noise to the image latents to create the latents input for the denoiser.
 - Create the pachified latents `mask` based on the processedmask image.
r,   r-   s    r/   r0   .QwenImageInpaintPrepareLatentsStep.description  s    Y	
r2   r,   N)r3   r4   r5   r6   r7   r8   r   r   r9   r:   r<   r=   r0   r>   r,   r2   r/   rl   rl     sD     D J<>@^@`aM)+@AK
S 
 
r2   rl   c                       \ rS rSrSrSr\" 5       \" 5       \" 5       \	" 5       \
" 5       \" 5       /r/ SQr\S 5       r\S 5       rSrg)	QwenImageCoreDenoiseStepi  a  
step that denoise noise into image for text2image task. It includes the denoise loop, as well as prepare the inputs
(timesteps, latents, rope inputs etc.).

  Components:
      pachifier (`QwenImagePachifier`) scheduler (`FlowMatchEulerDiscreteScheduler`) guider
      (`ClassifierFreeGuidance`) transformer (`QwenImageTransformer2DModel`)

  Inputs:
      num_images_per_prompt (`int`, *optional*, defaults to 1):
          The number of images to generate per prompt.
      prompt_embeds (`Tensor`):
          text embeddings used to guide the image generation. Can be generated from text_encoder step.
      prompt_embeds_mask (`Tensor`):
          mask for the text embeddings. Can be generated from text_encoder step.
      negative_prompt_embeds (`Tensor`, *optional*):
          negative text embeddings used to guide the image generation. Can be generated from text_encoder step.
      negative_prompt_embeds_mask (`Tensor`, *optional*):
          mask for the negative text embeddings. Can be generated from text_encoder step.
      latents (`Tensor`, *optional*):
          Pre-generated noisy latents for image generation.
      height (`int`, *optional*):
          The height in pixels of the generated image.
      width (`int`, *optional*):
          The width in pixels of the generated image.
      generator (`Generator`, *optional*):
          Torch generator for deterministic generation.
      num_inference_steps (`int`, *optional*, defaults to 50):
          The number of denoising steps.
      sigmas (`list`, *optional*):
          Custom sigmas for the denoising process.
      attention_kwargs (`dict`, *optional*):
          Additional kwargs for attention processors.
      **denoiser_input_fields (`None`, *optional*):
          conditional model inputs for the denoiser: e.g. prompt_embeds, negative_prompt_embeds, etc.

  Outputs:
      latents (`Tensor`):
          Denoised latents.
r'   )inputprepare_latentsset_timestepsprepare_rope_inputsdenoiseafter_denoisec                     gNzstep that denoise noise into image for text2image task. It includes the denoise loop, as well as prepare the inputs (timesteps, latents, rope inputs etc.).r,   r-   s    r/   r0   $QwenImageCoreDenoiseStep.description       mr2   c                 0    [         R                  " S5      /$ Nlatentsr   templater-   s    r/   outputs QwenImageCoreDenoiseStep.outputs         +
 	
r2   r,   N)r3   r4   r5   r6   r7   r8   r#   r   r   r   r   r   r9   r:   r<   r0   r   r>   r,   r2   r/   rr   rr     sk    'R J!#%!#!!#MK m m 
 
r2   rr   c                       \ rS rSrSrSr\" 5       \" 5       \" 5       \	" 5       \
" 5       \" 5       \" 5       /r/ SQr\S 5       r\S 5       rSrg)	QwenImageInpaintCoreDenoiseStepi
  a  
Before denoise step that prepare the inputs (timesteps, latents, rope inputs etc.) for the denoise step for inpaint
task.

  Components:
      pachifier (`QwenImagePachifier`) scheduler (`FlowMatchEulerDiscreteScheduler`) guider
      (`ClassifierFreeGuidance`) transformer (`QwenImageTransformer2DModel`)

  Inputs:
      num_images_per_prompt (`int`, *optional*, defaults to 1):
          The number of images to generate per prompt.
      prompt_embeds (`Tensor`):
          text embeddings used to guide the image generation. Can be generated from text_encoder step.
      prompt_embeds_mask (`Tensor`):
          mask for the text embeddings. Can be generated from text_encoder step.
      negative_prompt_embeds (`Tensor`, *optional*):
          negative text embeddings used to guide the image generation. Can be generated from text_encoder step.
      negative_prompt_embeds_mask (`Tensor`, *optional*):
          mask for the negative text embeddings. Can be generated from text_encoder step.
      height (`int`, *optional*):
          The height in pixels of the generated image.
      width (`int`, *optional*):
          The width in pixels of the generated image.
      image_latents (`Tensor`, *optional*):
          image latents used to guide the image generation. Can be generated from vae_encoder step.
      processed_mask_image (`Tensor`, *optional*):
          The processed mask image
      latents (`Tensor`, *optional*):
          Pre-generated noisy latents for image generation.
      generator (`Generator`, *optional*):
          Torch generator for deterministic generation.
      num_inference_steps (`int`, *optional*, defaults to 50):
          The number of denoising steps.
      sigmas (`list`, *optional*):
          Custom sigmas for the denoising process.
      strength (`float`, *optional*, defaults to 0.9):
          Strength for img2img/inpainting.
      attention_kwargs (`dict`, *optional*):
          Additional kwargs for attention processors.
      **denoiser_input_fields (`None`, *optional*):
          conditional model inputs for the denoiser: e.g. prompt_embeds, negative_prompt_embeds, etc.

  Outputs:
      latents (`Tensor`):
          Denoised latents.
r'   )rs   rt   ru   prepare_inpaint_latentsrv   rw   rx   c                     gNzyBefore denoise step that prepare the inputs (timesteps, latents, rope inputs etc.) for the denoise step for inpaint task.r,   r-   s    r/   r0   +QwenImageInpaintCoreDenoiseStep.descriptionN       Kr2   c                 0    [         R                  " S5      /$ r~   r   r-   s    r/   r   'QwenImageInpaintCoreDenoiseStep.outputsR  r   r2   r,   N)r3   r4   r5   r6   r7   r8   rb   r   r   rl   r   r   r   r9   r:   r<   r0   r   r>   r,   r2   r/   r   r   
  sq    -^ J!##%-/*,!#%!#MK K K 
 
r2   r   c                       \ rS rSrSrSr\" 5       \" 5       \" 5       \	" 5       \
" 5       \" 5       \" 5       /r/ SQr\S 5       r\S 5       rSrg)	QwenImageImg2ImgCoreDenoiseStepi[  aC  
Before denoise step that prepare the inputs (timesteps, latents, rope inputs etc.) for the denoise step for img2img
task.

  Components:
      pachifier (`QwenImagePachifier`) scheduler (`FlowMatchEulerDiscreteScheduler`) guider
      (`ClassifierFreeGuidance`) transformer (`QwenImageTransformer2DModel`)

  Inputs:
      num_images_per_prompt (`int`, *optional*, defaults to 1):
          The number of images to generate per prompt.
      prompt_embeds (`Tensor`):
          text embeddings used to guide the image generation. Can be generated from text_encoder step.
      prompt_embeds_mask (`Tensor`):
          mask for the text embeddings. Can be generated from text_encoder step.
      negative_prompt_embeds (`Tensor`, *optional*):
          negative text embeddings used to guide the image generation. Can be generated from text_encoder step.
      negative_prompt_embeds_mask (`Tensor`, *optional*):
          mask for the negative text embeddings. Can be generated from text_encoder step.
      height (`int`, *optional*):
          The height in pixels of the generated image.
      width (`int`, *optional*):
          The width in pixels of the generated image.
      image_latents (`Tensor`):
          image latents used to guide the image generation. Can be generated from vae_encoder step.
      latents (`Tensor`, *optional*):
          Pre-generated noisy latents for image generation.
      generator (`Generator`, *optional*):
          Torch generator for deterministic generation.
      num_inference_steps (`int`, *optional*, defaults to 50):
          The number of denoising steps.
      sigmas (`list`, *optional*):
          Custom sigmas for the denoising process.
      strength (`float`, *optional*, defaults to 0.9):
          Strength for img2img/inpainting.
      attention_kwargs (`dict`, *optional*):
          Additional kwargs for attention processors.
      **denoiser_input_fields (`None`, *optional*):
          conditional model inputs for the denoiser: e.g. prompt_embeds, negative_prompt_embeds, etc.

  Outputs:
      latents (`Tensor`):
          Denoised latents.
r'   )rs   rt   ru   prepare_img2img_latentsrv   rw   rx   c                     gNzyBefore denoise step that prepare the inputs (timesteps, latents, rope inputs etc.) for the denoise step for img2img task.r,   r-   s    r/   r0   +QwenImageImg2ImgCoreDenoiseStep.description  r   r2   c                 0    [         R                  " S5      /$ r~   r   r-   s    r/   r   'QwenImageImg2ImgCoreDenoiseStep.outputs  r   r2   r,   N)r3   r4   r5   r6   r7   r8   r\   r   r   r   r   r   r   r9   r:   r<   r0   r   r>   r,   r2   r/   r   r   [  sq    +Z J!##%-//1!!#MK K K 
 
r2   r   c            	           \ rS rSrSrSr\" 5       \" 5       \" 5       \	" 5       \
" 5       \" 5       \" 5       \" 5       /r/ SQr\S 5       r\S 5       rSrg)	"QwenImageControlNetCoreDenoiseStepi  a	  
step that denoise noise into image for text2image task. It includes the denoise loop, as well as prepare the inputs
(timesteps, latents, rope inputs etc.).

  Components:
      pachifier (`QwenImagePachifier`) scheduler (`FlowMatchEulerDiscreteScheduler`) controlnet
      (`QwenImageControlNetModel`) guider (`ClassifierFreeGuidance`) transformer (`QwenImageTransformer2DModel`)

  Inputs:
      num_images_per_prompt (`int`, *optional*, defaults to 1):
          The number of images to generate per prompt.
      prompt_embeds (`Tensor`):
          text embeddings used to guide the image generation. Can be generated from text_encoder step.
      prompt_embeds_mask (`Tensor`):
          mask for the text embeddings. Can be generated from text_encoder step.
      negative_prompt_embeds (`Tensor`, *optional*):
          negative text embeddings used to guide the image generation. Can be generated from text_encoder step.
      negative_prompt_embeds_mask (`Tensor`, *optional*):
          mask for the negative text embeddings. Can be generated from text_encoder step.
      control_image_latents (`Tensor`):
          The control image latents to use for the denoising process. Can be generated in controlnet vae encoder
          step.
      height (`int`, *optional*):
          The height in pixels of the generated image.
      width (`int`, *optional*):
          The width in pixels of the generated image.
      latents (`Tensor`, *optional*):
          Pre-generated noisy latents for image generation.
      generator (`Generator`, *optional*):
          Torch generator for deterministic generation.
      num_inference_steps (`int`, *optional*, defaults to 50):
          The number of denoising steps.
      sigmas (`list`, *optional*):
          Custom sigmas for the denoising process.
      control_guidance_start (`float`, *optional*, defaults to 0.0):
          When to start applying ControlNet.
      control_guidance_end (`float`, *optional*, defaults to 1.0):
          When to stop applying ControlNet.
      controlnet_conditioning_scale (`float`, *optional*, defaults to 1.0):
          Scale for ControlNet conditioning.
      attention_kwargs (`dict`, *optional*):
          Additional kwargs for attention processors.
      **denoiser_input_fields (`None`, *optional*):
          conditional model inputs for the denoiser: e.g. prompt_embeds, negative_prompt_embeds, etc.

  Outputs:
      latents (`Tensor`):
          Denoised latents.
r'   )rs   controlnet_inputrt   ru   rv   controlnet_before_denoisecontrolnet_denoiserx   c                     grz   r,   r-   s    r/   r0   .QwenImageControlNetCoreDenoiseStep.description  r|   r2   c                 0    [         R                  " S5      /$ r~   r   r-   s    r/   r   *QwenImageControlNetCoreDenoiseStep.outputs  r   r2   r,   N)r3   r4   r5   r6   r7   r8   r#   r"   r   r   r   r   r   r   r9   r:   r<   r0   r   r>   r,   r2   r/   r   r     sw    0d J!%'#%!#!-/&(!#	M	K m m 
 
r2   r   c            
           \ rS rSrSrSr\" 5       \" 5       \" 5       \	" 5       \
" 5       \" 5       \" 5       \" 5       \" 5       /	r/ SQr\S 5       r\S 5       rSrg)	)QwenImageControlNetInpaintCoreDenoiseStepi   a
  
Before denoise step that prepare the inputs (timesteps, latents, rope inputs etc.) for the denoise step for inpaint
task.

  Components:
      pachifier (`QwenImagePachifier`) scheduler (`FlowMatchEulerDiscreteScheduler`) controlnet
      (`QwenImageControlNetModel`) guider (`ClassifierFreeGuidance`) transformer (`QwenImageTransformer2DModel`)

  Inputs:
      num_images_per_prompt (`int`, *optional*, defaults to 1):
          The number of images to generate per prompt.
      prompt_embeds (`Tensor`):
          text embeddings used to guide the image generation. Can be generated from text_encoder step.
      prompt_embeds_mask (`Tensor`):
          mask for the text embeddings. Can be generated from text_encoder step.
      negative_prompt_embeds (`Tensor`, *optional*):
          negative text embeddings used to guide the image generation. Can be generated from text_encoder step.
      negative_prompt_embeds_mask (`Tensor`, *optional*):
          mask for the negative text embeddings. Can be generated from text_encoder step.
      height (`int`, *optional*):
          The height in pixels of the generated image.
      width (`int`, *optional*):
          The width in pixels of the generated image.
      image_latents (`Tensor`, *optional*):
          image latents used to guide the image generation. Can be generated from vae_encoder step.
      processed_mask_image (`Tensor`, *optional*):
          The processed mask image
      control_image_latents (`Tensor`):
          The control image latents to use for the denoising process. Can be generated in controlnet vae encoder
          step.
      latents (`Tensor`, *optional*):
          Pre-generated noisy latents for image generation.
      generator (`Generator`, *optional*):
          Torch generator for deterministic generation.
      num_inference_steps (`int`, *optional*, defaults to 50):
          The number of denoising steps.
      sigmas (`list`, *optional*):
          Custom sigmas for the denoising process.
      strength (`float`, *optional*, defaults to 0.9):
          Strength for img2img/inpainting.
      control_guidance_start (`float`, *optional*, defaults to 0.0):
          When to start applying ControlNet.
      control_guidance_end (`float`, *optional*, defaults to 1.0):
          When to stop applying ControlNet.
      controlnet_conditioning_scale (`float`, *optional*, defaults to 1.0):
          Scale for ControlNet conditioning.
      attention_kwargs (`dict`, *optional*):
          Additional kwargs for attention processors.
      **denoiser_input_fields (`None`, *optional*):
          conditional model inputs for the denoiser: e.g. prompt_embeds, negative_prompt_embeds, etc.

  Outputs:
      latents (`Tensor`):
          Denoised latents.
r'   )	rs   r   rt   ru   r   rv   r   r   rx   c                     gr   r,   r-   s    r/   r0   5QwenImageControlNetInpaintCoreDenoiseStep.descriptionQ  r   r2   c                 0    [         R                  " S5      /$ r~   r   r-   s    r/   r   1QwenImageControlNetInpaintCoreDenoiseStep.outputsU  r   r2   r,   N)r3   r4   r5   r6   r7   r8   rb   r"   r   r   rl   r   r   r   r   r9   r:   r<   r0   r   r>   r,   r2   r/   r   r      s}    6p J!#%'#%-/*,!-/-/!#
M
K K K 
 
r2   r   c            
           \ rS rSrSrSr\" 5       \" 5       \" 5       \	" 5       \
" 5       \" 5       \" 5       \" 5       \" 5       /	r/ SQr\S 5       r\S 5       rSrg)	)QwenImageControlNetImg2ImgCoreDenoiseStepi^  an
  
Before denoise step that prepare the inputs (timesteps, latents, rope inputs etc.) for the denoise step for img2img
task.

  Components:
      pachifier (`QwenImagePachifier`) scheduler (`FlowMatchEulerDiscreteScheduler`) controlnet
      (`QwenImageControlNetModel`) guider (`ClassifierFreeGuidance`) transformer (`QwenImageTransformer2DModel`)

  Inputs:
      num_images_per_prompt (`int`, *optional*, defaults to 1):
          The number of images to generate per prompt.
      prompt_embeds (`Tensor`):
          text embeddings used to guide the image generation. Can be generated from text_encoder step.
      prompt_embeds_mask (`Tensor`):
          mask for the text embeddings. Can be generated from text_encoder step.
      negative_prompt_embeds (`Tensor`, *optional*):
          negative text embeddings used to guide the image generation. Can be generated from text_encoder step.
      negative_prompt_embeds_mask (`Tensor`, *optional*):
          mask for the negative text embeddings. Can be generated from text_encoder step.
      height (`int`, *optional*):
          The height in pixels of the generated image.
      width (`int`, *optional*):
          The width in pixels of the generated image.
      image_latents (`Tensor`):
          image latents used to guide the image generation. Can be generated from vae_encoder step.
      control_image_latents (`Tensor`):
          The control image latents to use for the denoising process. Can be generated in controlnet vae encoder
          step.
      latents (`Tensor`, *optional*):
          Pre-generated noisy latents for image generation.
      generator (`Generator`, *optional*):
          Torch generator for deterministic generation.
      num_inference_steps (`int`, *optional*, defaults to 50):
          The number of denoising steps.
      sigmas (`list`, *optional*):
          Custom sigmas for the denoising process.
      strength (`float`, *optional*, defaults to 0.9):
          Strength for img2img/inpainting.
      control_guidance_start (`float`, *optional*, defaults to 0.0):
          When to start applying ControlNet.
      control_guidance_end (`float`, *optional*, defaults to 1.0):
          When to stop applying ControlNet.
      controlnet_conditioning_scale (`float`, *optional*, defaults to 1.0):
          Scale for ControlNet conditioning.
      attention_kwargs (`dict`, *optional*):
          Additional kwargs for attention processors.
      **denoiser_input_fields (`None`, *optional*):
          conditional model inputs for the denoiser: e.g. prompt_embeds, negative_prompt_embeds, etc.

  Outputs:
      latents (`Tensor`):
          Denoised latents.
r'   )	rs   r   rt   ru   r   rv   r   r   rx   c                     gr   r,   r-   s    r/   r0   5QwenImageControlNetImg2ImgCoreDenoiseStep.description  r   r2   c                 0    [         R                  " S5      /$ r~   r   r-   s    r/   r   1QwenImageControlNetImg2ImgCoreDenoiseStep.outputs  r   r2   r,   N)r3   r4   r5   r6   r7   r8   r\   r"   r   r   r   r   r   r   r   r9   r:   r<   r0   r   r>   r,   r2   r/   r   r   ^  s}    4l J!#%'#%-//1!-/&(!#
M
K K K 
 
r2   r   c                   b    \ rS rSr\\\\\\	/r
/ SQr/ SQrSrS
S jr\S 5       r\S 5       rS	rg)QwenImageAutoCoreDenoiseStepi  )
text2imagerN   rO   controlnet_text2imagecontrolnet_inpaintcontrolnet_img2img)control_image_latentsrc   image_latentsr   Nc                 ,    Ub	  Ub  gUb  ggUb  gUb  gg)Nr   r   r   rN   rO   r   r,   )r.   r   rc   r   s       r/   select_block)QwenImageAutoCoreDenoiseStep.select_block  s2     ,#/+*+.#/ * #r2   c                      g)Na  Core step that performs the denoising process. 
 - `QwenImageCoreDenoiseStep` (text2image) for text2image tasks.
 - `QwenImageInpaintCoreDenoiseStep` (inpaint) for inpaint tasks.
 - `QwenImageImg2ImgCoreDenoiseStep` (img2img) for img2img tasks.
 - `QwenImageControlNetCoreDenoiseStep` (controlnet_text2image) for text2image tasks with controlnet.
 - `QwenImageControlNetInpaintCoreDenoiseStep` (controlnet_inpaint) for inpaint tasks with controlnet.
 - `QwenImageControlNetImg2ImgCoreDenoiseStep` (controlnet_img2img) for img2img tasks with controlnet.
This step support text-to-image, image-to-image, inpainting, and controlnet tasks for QwenImage:
 - for image-to-image generation, you need to provide `image_latents`
 - for inpainting, you need to provide `processed_mask_image` and `image_latents`
 - to run the controlnet workflow, you need to provide `control_image_latents`
 - for text-to-image generation, all you need to provide is prompt embeddingsr,   r-   s    r/   r0   (QwenImageAutoCoreDenoiseStep.description  s    ^	
r2   c                 0    [         R                  " S5      /$ r~   r   r-   s    r/   r   $QwenImageAutoCoreDenoiseStep.outputs  r   r2   r,   )NNN)r3   r4   r5   r6   rr   r   r   r   r   r   r9   r:   r;   default_block_namer   r<   r0   r   r>   r,   r2   r/   r   r     s^     ''*11MK ^%$  
 
  
 
r2   r   c                   P    \ rS rSrSrSr\" 5       \" 5       /rSS/r	\
S 5       rSrg)	QwenImageDecodeStepi  a  
Decode step that decodes the latents to images and postprocess the generated image.

  Components:
      vae (`AutoencoderKLQwenImage`) image_processor (`VaeImageProcessor`)

  Inputs:
      latents (`Tensor`):
          The denoised latents to decode, can be generated in the denoise step and unpacked in the after denoise
          step.
      output_type (`str`, *optional*, defaults to pil):
          Output format: 'pil', 'np', 'pt'.

  Outputs:
      images (`list`):
          Generated images. (tensor output of the vae decoder.)
r'   decodepostprocessc                     g)NzSDecode step that decodes the latents to images and postprocess the generated image.r,   r-   s    r/   r0   QwenImageDecodeStep.description  s    dr2   r,   N)r3   r4   r5   r6   r7   r8   r   r   r9   r:   r<   r0   r>   r,   r2   r/   r   r     s=    $ J)+-M-OPM]+Ke er2   r   c                   P    \ rS rSrSrSr\" 5       \" 5       /rSS/r	\
S 5       rSrg)	QwenImageInpaintDecodeStepi  a  
Decode step that decodes the latents to images and postprocess the generated image, optional apply the mask
overally to the original image.

  Components:
      vae (`AutoencoderKLQwenImage`) image_mask_processor (`InpaintProcessor`)

  Inputs:
      latents (`Tensor`):
          The denoised latents to decode, can be generated in the denoise step and unpacked in the after denoise
          step.
      output_type (`str`, *optional*, defaults to pil):
          Output format: 'pil', 'np', 'pt'.
      mask_overlay_kwargs (`dict`, *optional*):
          The kwargs for the postprocess step to apply the mask overlay. generated in
          InpaintProcessImagesInputStep.

  Outputs:
      images (`list`):
          Generated images. (tensor output of the vae decoder.)
r'   r   r   c                     g)NzDecode step that decodes the latents to images and postprocess the generated image, optional apply the mask overally to the original image.r,   r-   s    r/   r0   &QwenImageInpaintDecodeStep.description5  s     ]r2   r,   N)r3   r4   r5   r6   r7   r8   r   r   r9   r:   r<   r0   r>   r,   r2   r/   r   r     s=    , J)+-T-VWM]+K] ]r2   r   c                   <    \ rS rSr\\/rSS/rSS/r\	S 5       r
Srg)QwenImageAutoDecodeStepi;  inpaint_decoder   maskNc                      g)NaH  Decode step that decode the latents into images. 
 This is an auto pipeline block that works for inpaint/text2image/img2img tasks, for both QwenImage and QwenImage-Edit.
 - `QwenImageInpaintDecodeStep` (inpaint) is used when `mask` is provided.
 - `QwenImageDecodeStep` (text2image/img2img) is used when `mask` is not provided.
r,   r-   s    r/   r0   #QwenImageAutoDecodeStep.description@  s    e	
r2   r,   )r3   r4   r5   r6   r   r   r9   r:   r;   r<   r0   r>   r,   r2   r/   r   r   ;  s3    /1DEM#X.K"D>
 
r2   r   r(   vae_encodercontrolnet_vae_encoderrw   r   c            
           \ rS rSrSrSr\R                  5       r\R                  5       r
SS0SSS.SSSS.SSS.SSSS	.SSSSS
.S.r\S 5       r\S 5       rSrg)QwenImageAutoBlocksiY  aE  
Auto Modular pipeline for text-to-image, image-to-image, inpainting, and controlnet tasks using QwenImage.

  Supported workflows:
    - `text2image`: requires `prompt`
    - `image2image`: requires `prompt`, `image`
    - `inpainting`: requires `prompt`, `mask_image`, `image`
    - `controlnet_text2image`: requires `prompt`, `control_image`
    - `controlnet_image2image`: requires `prompt`, `image`, `control_image`
    - `controlnet_inpainting`: requires `prompt`, `mask_image`, `image`, `control_image`

  Components:
      text_encoder (`Qwen2_5_VLForConditionalGeneration`): The text encoder to use tokenizer (`Qwen2Tokenizer`):
      The tokenizer to use guider (`ClassifierFreeGuidance`) image_mask_processor (`InpaintProcessor`) vae
      (`AutoencoderKLQwenImage`) image_processor (`VaeImageProcessor`) controlnet (`QwenImageControlNetModel`)
      control_image_processor (`VaeImageProcessor`) pachifier (`QwenImagePachifier`) scheduler
      (`FlowMatchEulerDiscreteScheduler`) transformer (`QwenImageTransformer2DModel`)

  Inputs:
      prompt (`str`, *optional*):
          The prompt or prompts to guide image generation.
      negative_prompt (`str`, *optional*):
          The prompt or prompts not to guide the image generation.
      max_sequence_length (`int`, *optional*, defaults to 1024):
          Maximum sequence length for prompt encoding.
      mask_image (`Image`, *optional*):
          Mask image for inpainting.
      image (`Image | list`, *optional*):
          Reference image(s) for denoising. Can be a single image or list of images.
      height (`int`, *optional*):
          The height in pixels of the generated image.
      width (`int`, *optional*):
          The width in pixels of the generated image.
      padding_mask_crop (`int`, *optional*):
          Padding for mask cropping in inpainting.
      generator (`Generator`, *optional*):
          Torch generator for deterministic generation.
      control_image (`Image`, *optional*):
          Control image for ControlNet conditioning.
      num_images_per_prompt (`int`, *optional*, defaults to 1):
          The number of images to generate per prompt.
      prompt_embeds (`Tensor`):
          text embeddings used to guide the image generation. Can be generated from text_encoder step.
      prompt_embeds_mask (`Tensor`):
          mask for the text embeddings. Can be generated from text_encoder step.
      negative_prompt_embeds (`Tensor`, *optional*):
          negative text embeddings used to guide the image generation. Can be generated from text_encoder step.
      negative_prompt_embeds_mask (`Tensor`, *optional*):
          mask for the negative text embeddings. Can be generated from text_encoder step.
      latents (`Tensor`):
          Pre-generated noisy latents for image generation.
      num_inference_steps (`int`):
          The number of denoising steps.
      sigmas (`list`, *optional*):
          Custom sigmas for the denoising process.
      attention_kwargs (`dict`, *optional*):
          Additional kwargs for attention processors.
      **denoiser_input_fields (`None`, *optional*):
          conditional model inputs for the denoiser: e.g. prompt_embeds, negative_prompt_embeds, etc.
      image_latents (`Tensor`, *optional*):
          image latents used to guide the image generation. Can be generated from vae_encoder step.
      processed_mask_image (`Tensor`, *optional*):
          The processed mask image
      strength (`float`, *optional*, defaults to 0.9):
          Strength for img2img/inpainting.
      control_image_latents (`Tensor`, *optional*):
          The control image latents to use for the denoising process. Can be generated in controlnet vae encoder
          step.
      control_guidance_start (`float`, *optional*, defaults to 0.0):
          When to start applying ControlNet.
      control_guidance_end (`float`, *optional*, defaults to 1.0):
          When to stop applying ControlNet.
      controlnet_conditioning_scale (`float`, *optional*, defaults to 1.0):
          Scale for ControlNet conditioning.
      output_type (`str`, *optional*, defaults to pil):
          Output format: 'pil', 'np', 'pt'.
      mask_overlay_kwargs (`dict`, *optional*):
          The kwargs for the postprocess step to apply the mask overlay. generated in
          InpaintProcessImagesInputStep.

  Outputs:
      images (`list`):
          Generated images.
r'   r)   T)r)   rQ   )r)   rP   rQ   )r)   rX   )r)   rQ   rX   )r)   rP   rQ   rX   )r   image2image
inpaintingr   controlnet_image2imagecontrolnet_inpaintingc                     g)NzjAuto Modular pipeline for text-to-image, image-to-image, inpainting, and controlnet tasks using QwenImage.r,   r-   s    r/   r0   QwenImageAutoBlocks.description  s    {r2   c                 0    [         R                  " S5      /$ )Nimagesr   r-   s    r/   r   QwenImageAutoBlocks.outputs  s    $$X.//r2   r,   N)r3   r4   r5   r6   r7   r8   AUTO_BLOCKSvaluesr9   keysr:   _workflow_mapr<   r0   r   r>   r,   r2   r/   r   r   Y  s    Sj J&&(M""$K  &"&6!%TDI,04!H-1DSW"X,0tfj!kM | | 0 0r2   r   )>ri   utilsr   modular_pipeliner   r   r   modular_pipeline_utilsr	   r
   r   before_denoiser   r   r   r   r   r   r   decodersr   r   r   r   rw   r   r   r   r   encodersr   r   r   r   r    inputsr!   r"   r#   
get_loggerr3   loggerr%   r@   rG   rL   rU   r\   rb   rl   rr   r   r   r   r   r   r   r   r   r   r   r   r,   r2   r/   <module>r      s     f f L L       
		H	%$@#5 $@Z.
%= .
dp%= pD
"4 
$%
0B %
^;R 8 ;R~FR 8 FRV-
)A -
jD
7 D
RL
&> L
bJ
&> J
^Q
)A Q
lY
0H Y
|W
0H W
v9
#< 9
Fe2 e<]!9 ]B
0 
$ 	578	356	!#L#NO	023	*,-o02 o0r2   