
    
3jB                     ,   S SK Jr  S SKrSSKJr  SSKJrJr  SSKJ	r	  SSK
Jr  SS	KJrJr  SS
KJrJrJr  SSKJrJrJr  \R,                  " \5      r " S S\5      r " S S\5      r " S S\5      r " S S\5      r " S S\5      r " S S\5      rg)    )AnyN   )
FrozenDict)InpaintProcessorVaeImageProcessor)AutoencoderKLQwenImage)logging   )ModularPipelineBlocksPipelineState)ComponentSpec
InputParamOutputParam   )QwenImageLayeredPachifierQwenImageModularPipelineQwenImagePachifierc                       \ rS rSrSrSr\S\4S j5       r\S\	\
   4S j5       r\S\	\   4S j5       r\S\	\   4S j5       r\R"                  " 5       S	\S
\S\4S j5       rSrg)QwenImageAfterDenoiseStep$   a%  
Step that unpack the latents from 3D tensor (batch_size, sequence_length, channels) into 5D tensor (batch_size,
channels, 1, height, width)

  Components:
      pachifier (`QwenImagePachifier`)

  Inputs:
      height (`int`):
          The height in pixels of the generated image.
      width (`int`):
          The width in pixels of the generated image.
      latents (`Tensor`):
          The latents to decode, can be generated in the denoise step.

  Outputs:
      latents (`Tensor`):
          The denoisedlatents unpacked to B, C, 1, H, W
	qwenimagereturnc                     g)NzStep that unpack the latents from 3D tensor (batch_size, sequence_length, channels) into 5D tensor (batch_size, channels, 1, height, width) selfs    h/home/wildlama/miniconda3/lib/python3.13/site-packages/diffusers/modular_pipelines/qwenimage/decoders.pydescription%QwenImageAfterDenoiseStep.description;   s     ]    c                 &    [        S[        SS9/nU$ N	pachifierfrom_config)default_creation_method)r   r   r   
componentss     r   expected_components-QwenImageAfterDenoiseStep.expected_components?   s!     +'9S`a

 r    c           	          [         R                  " SSS9[         R                  " SSS9[        SS[        R                  SS9/$ )NheightTrequiredwidthlatentsz<The latents to decode, can be generated in the denoise step.namer-   	type_hintr   )r   templatetorchTensorr   s    r   inputs QwenImageAfterDenoiseStep.inputsG   sF     48$7,,Z		
 		
r    c                 6    [        S[        R                  SS9/$ )Nr/   z-The denoisedlatents unpacked to B, C, 1, H, Wr1   r2   r   )r   r4   r5   r   s    r   intermediate_outputs.QwenImageAfterDenoiseStep.intermediate_outputsT   s"     %,,Ds
 	
r    r'   statec                     U R                  U5      nUR                  nUR                  R                  UR                  UR
                  UR                  US9Ul        U R                  X#5        X4$ )N)vae_scale_factor)get_block_stater>   r#   unpack_latentsr/   r+   r.   set_block_state)r   r'   r<   block_stater>   s        r   __call__"QwenImageAfterDenoiseStep.__call__\   sr    **51%66(22AA!3!3[5F5FYi B 
 	U0  r    r   N__name__
__module____qualname____firstlineno____doc__
model_namepropertystrr   listr   r(   r   r6   r   r:   r4   no_gradr   r   rC   __static_attributes__r   r    r   r   r   $   s    ( J]S ] ] T-%8   

Z( 

 

 
d;&7 
 
 ]]_	!#; 	!M 	!Vc 	! 	!r    r   c                       \ rS rSrSrSr\S\4S j5       r\S\	\
   4S j5       r\S\	\   4S j5       r\S\	\   4S j5       r\R"                  " 5       S	\S\4S
 j5       rSrg) QwenImageLayeredAfterDenoiseStepj   ac  
Unpack latents from (B, seq, C*4) to (B, C, layers+1, H, W) after denoising.

  Components:
      pachifier (`QwenImageLayeredPachifier`)

  Inputs:
      latents (`Tensor`):
          The denoised latents to decode, can be generated in the denoise step.
      height (`int`):
          The height in pixels of the generated image.
      width (`int`):
          The width in pixels of the generated image.
      layers (`int`, *optional*, defaults to 4):
          Number of layers to extract from the image

  Outputs:
      latents (`Tensor`):
          Denoised latents. (unpacked to B, C, layers+1, H, W)
qwenimage-layeredr   c                     g)NzLUnpack latents from (B, seq, C*4) to (B, C, layers+1, H, W) after denoising.r   r   s    r   r   ,QwenImageLayeredAfterDenoiseStep.description   s    ]r    c                 "    [        S[        SS9/$ r"   )r   r   r   s    r   r(   4QwenImageLayeredAfterDenoiseStep.expected_components   s     +'@Zgh
 	
r    c                     [        SS[        R                  SS9[         R                  " SSS9[         R                  " SSS9[         R                  " S5      /$ )	Nr/   TzEThe denoised latents to decode, can be generated in the denoise step.r0   r+   r,   r.   layersr   r4   r5   r3   r   s    r   r6   'QwenImageLayeredAfterDenoiseStep.inputs   sV     ,,c	 48$7)

 
	
r    c                 .    [         R                  " SSS9/$ )Nr/   z unpacked to B, C, layers+1, H, Wnoter   r3   r   s    r   r:   5QwenImageLayeredAfterDenoiseStep.intermediate_outputs   s       1ST
 	
r    r<   c                     U R                  U5      nUR                  R                  UR                  UR                  UR
                  UR                  UR                  5      Ul        U R                  X#5        X4$ )N)	r?   r#   r@   r/   r+   r.   rZ   r>   rA   r   r'   r<   rB   s       r   rC   )QwenImageLayeredAfterDenoiseStep.__call__   ss    **51 )22AA''
 	U0  r    r   NrF   rG   rH   rI   rJ   rK   rL   rM   r   rN   r   r(   r   r6   r   r:   r4   rO   r   rC   rP   r   r    r   rR   rR   j   s    * %J^S ^ ^ 
T-%8 
 

 
Z( 
 
 
d;&7 
 

 ]]_!- !M ! !r    rR   c                       \ rS rSrSrSr\S\4S j5       r\S\	\
   4S j5       r\S\	\   4S j5       r\S\	\   4S j5       r\R"                  " 5       S	\S
\S\4S j5       rSrg)QwenImageDecoderStep   af  
Step that decodes the latents to images

  Components:
      vae (`AutoencoderKLQwenImage`)

  Inputs:
      latents (`Tensor`):
          The denoised latents to decode, can be generated in the denoise step and unpacked in the after denoise
          step.

  Outputs:
      images (`list`):
          Generated images. (tensor output of the vae decoder.)
r   r   c                     g)Nz'Step that decodes the latents to imagesr   r   s    r   r    QwenImageDecoderStep.description   s    8r    c                 (    [        S[        5      /nU$ )Nvae)r   r   r&   s     r   r(   (QwenImageDecoderStep.expected_components   s     %!78

 r    c                 8    [        SS[        R                  SS9/$ )Nr/   TlThe denoised latents to decode, can be generated in the denoise step and unpacked in the after denoise step.r0   )r   r4   r5   r   s    r   r6   QwenImageDecoderStep.inputs   s)     ,, K	
 	
r    c                 .    [         R                  " SSS9/$ )Nimagesz!tensor output of the vae decoder.r^   r`   r   s    r   r:   )QwenImageDecoderStep.intermediate_outputs   s    $$X4WXYYr    r'   r<   c                    U R                  U5      nUR                  R                  S:X  a  UR                  R                  SS9Ul        O=UR                  R                  S:w  a#  [	        SUR                  R
                   S35      eUR                  R                  UR                  R                  5      Ul        [        R                  " UR                  R                  R                  5      R                  SUR                  R                  R                  SSS5      R                  UR                  R                  UR                  R                  5      nS[        R                  " UR                  R                  R                   5      R                  SUR                  R                  R                  SSS5      R                  UR                  R                  UR                  R                  5      -  nUR                  U-  U-   Ul        UR                  R#                  UR                  SS	9S
   S S 2S S 2S
4   Ul        U R'                  X#5        X4$ )N   r   )dim   z0expect latents to be a 4D or 5D tensor but got: z?. Please make sure the latents are unpacked before decode step.      ?Freturn_dictr   )r?   r/   ndim	unsqueeze
ValueErrorshapetorl   dtyper4   tensorconfiglatents_meanviewz_dimdevicelatents_stddecoderr   rA   )r   r'   r<   rB   r   r   s         r   rC   QwenImageDecoderStep.__call__   s   **51 ##q("-"5"5"?"?A"?"FK  %%*B;CVCVC\C\B]  ^]  ^  *1144Z^^5I5IJ LL..;;<T!Z^^**00!Q:R##**K,?,?,E,EF 	
 ELL)>)>)J)JKPPz~~$$**Aq!

"[  '')<)<)B)B
CD *11K?,N'^^22;3F3FTY2Z[\]^_abde^efU0  r    r   NrE   r   r    r   rg   rg      s      J9S 9 9 T-%8   
Z( 
 
 Zd;&7 Z Z ]]_!#; !M !Vc ! !r    rg   c                       \ rS rSrSrSr\S\4S j5       r\S\	\
   4S j5       r\S\	\   4S j5       r\S\	\   4S j5       r\R"                  " 5       S	\S\4S
 j5       rSrg)QwenImageLayeredDecoderStep   a  
Decode unpacked latents (B, C, layers+1, H, W) into layer images.

  Components:
      vae (`AutoencoderKLQwenImage`) image_processor (`VaeImageProcessor`)

  Inputs:
      latents (`Tensor`):
          The denoised latents to decode, can be generated in the denoise step and unpacked in the after denoise
          step.
      output_type (`str`, *optional*, defaults to pil):
          Output format: 'pil', 'np', 'pt'.

  Outputs:
      images (`list`):
          Generated images.
rT   r   c                     g)NzADecode unpacked latents (B, C, layers+1, H, W) into layer images.r   r   s    r   r   'QwenImageLayeredDecoderStep.description  s    Rr    c           	      X    [        S[        5      [        S[        [        SS05      SS9/$ )Nrl   image_processorr>      r$   r   r%   )r   r   r   r   r   s    r   r(   /QwenImageLayeredDecoderStep.expected_components  s8     %!78!!!#5r":;(5	
 	
r    c                 b    [        SS[        R                  SS9[         R                  " S5      /$ )Nr/   Tro   r0   output_typer[   r   s    r   r6   "QwenImageLayeredDecoderStep.inputs%  s9     ,, K	 .
 	
r    c                 0    [         R                  " S5      /$ Nrr   r`   r   s    r   r:   0QwenImageLayeredDecoderStep.intermediate_outputs1      $$X.//r    r<   c                 ~   U R                  U5      nUR                  nUR                  UR                  R                  5      n[
        R                  " UR                  R                  R                  5      R                  SUR                  R                  R                  SSS5      R                  UR                  UR                  5      nS[
        R                  " UR                  R                  R                  5      R                  SUR                  R                  R                  SSS5      R                  UR                  UR                  5      -  nXF-  U-   nUR                  u  pxpnUS S 2S S 2SS 24   nUR                  SSSSS5      R                  SUSX5      nUR                  R!                  USS	9S   nUR#                  S5      nUR$                  R'                  XR(                  S
9n/ n[+        U5       H  nUR-                  XU	-  US-   U	-   5        M!     Xl        U R1                  X#5        X4$ )Nr   rx   r   r
   r   ru   Fry   r   )r?   r/   r   rl   r   r4   r   r   r   r   r   r   r   r~   permutereshaper   squeezer   postprocessr   rangeappendrr   rA   )r   r'   r<   rB   r/   r   r   bcfhwimagerr   bidxs                  r   rC   $QwenImageLayeredDecoderStep.__call__5  s   **51%% **Z^^112LL..;;<T!Z^^**00!Q:R. 	
 ELL)>)>)J)JKPPz~~$$**Aq!

"W^^W]]
+, ',6  aA!Q(#//!Q1a088Q1H %%g5%A!Da  **66uJaJa6b !HDMM%qD1H>:;  $U0  r    r   Nre   r   r    r   r   r      s    $ %JSS S S 	
T-%8 	
 	
 	
Z( 	
 	
 0d;&7 0 0 ]]_&!- &!M &! &!r    r   c                       \ rS rSrSrSr\S\4S j5       r\S\	\
   4S j5       r\S\	\   4S j5       r\S\	\   4S j5       r\S	 5       r\R&                  " 5       S
\S\4S j5       rSrg) QwenImageProcessImagesOutputStepic  a[  
postprocess the generated image

  Components:
      image_processor (`VaeImageProcessor`)

  Inputs:
      images (`Tensor`):
          the generated image tensor from decoders step
      output_type (`str`, *optional*, defaults to pil):
          Output format: 'pil', 'np', 'pt'.

  Outputs:
      images (`list`):
          Generated images.
r   r   c                     g)Nzpostprocess the generated imager   r   s    r   r   ,QwenImageProcessImagesOutputStep.descriptionw  s    0r    c                 :    [        S[        [        SS05      SS9/$ )Nr   r>   r   r$   r   )r   r   r   r   s    r   r(   4QwenImageProcessImagesOutputStep.expected_components{  s-     !!!#5r":;(5	
 	
r    c                 b    [        SS[        R                  SS9[         R                  " S5      /$ )Nrr   T-the generated image tensor from decoders stepr0   r   r[   r   s    r   r6   'QwenImageProcessImagesOutputStep.inputs  s6     ,,K	 .
 	
r    c                 0    [         R                  " S5      /$ r   r`   r   s    r   r:   5QwenImageProcessImagesOutputStep.intermediate_outputs  r   r    c                 ,    U S;  a  [        SU  35      eg )NpilnpptInvalid output_type: r}   r   s    r   check_inputs-QwenImageProcessImagesOutputStep.check_inputs  s#    114[MBCC 2r    r'   r<   c                     U R                  U5      nU R                  UR                  5        UR                  R	                  UR
                  UR                  S9Ul        U R                  X#5        X4$ )N)r   r   )r?   r   r   r   r   rr   rA   rc   s       r   rC   )QwenImageProcessImagesOutputStep.__call__  sp    **51+112'77CC$$#// D 

 	U0  r    r   NrF   rG   rH   rI   rJ   rK   rL   rM   r   rN   r   r(   r   r6   r   r:   staticmethodr   r4   rO   r   r   rC   rP   r   r    r   r   r   c  s    " J1S 1 1 
T-%8 
 
 	
Z( 	
 	
 0d;&7 0 0 D D ]]_!#; !M ! !r    r   c                       \ rS rSrSrSr\S\4S j5       r\S\	\
   4S j5       r\S\	\   4S j5       r\S\	\   4S j5       r\S	 5       r\R&                  " 5       S
\S\4S j5       rSrg)'QwenImageInpaintProcessImagesOutputStepi  aH  
postprocess the generated image, optional apply the mask overally to the original image..

  Components:
      image_mask_processor (`InpaintProcessor`)

  Inputs:
      images (`Tensor`):
          the generated image tensor from decoders step
      output_type (`str`, *optional*, defaults to pil):
          Output format: 'pil', 'np', 'pt'.
      mask_overlay_kwargs (`dict`, *optional*):
          The kwargs for the postprocess step to apply the mask overlay. generated in
          InpaintProcessImagesInputStep.

  Outputs:
      images (`list`):
          Generated images.
r   r   c                     g)NzYpostprocess the generated image, optional apply the mask overally to the original image..r   r   s    r   r   3QwenImageInpaintProcessImagesOutputStep.description  s    jr    c                 :    [        S[        [        SS05      SS9/$ )Nimage_mask_processorr>   r   r$   r   )r   r   r   r   s    r   r(   ;QwenImageInpaintProcessImagesOutputStep.expected_components  s-     & !#5r":;(5	
 	
r    c                     [        SS[        R                  SS9[         R                  " S5      [        S[        [
        [        4   SS9/$ )	Nrr   Tr   r0   r   mask_overlay_kwargszjThe kwargs for the postprocess step to apply the mask overlay. generated in InpaintProcessImagesInputStep.r9   )r   r4   r5   r3   dictrM   r   r   s    r   r6   .QwenImageInpaintProcessImagesOutputStep.inputs  sR     ,,K	 .*sCx. I
 	
r    c                 0    [         R                  " S5      /$ r   r`   r   s    r   r:   <QwenImageInpaintProcessImagesOutputStep.intermediate_outputs  r   r    c                 ^    U S;  a  [        SU  35      eU(       a  U S:w  a  [        S5      eg g )Nr   r   r   z/only support output_type 'pil' for mask overlayr   )r   r   s     r   r   4QwenImageInpaintProcessImagesOutputStep.check_inputs  s<    114[MBCC;%#7NOO $8r    r'   r<   c                 *   U R                  U5      nU R                  UR                  UR                  5        UR                  c  0 nOUR                  nUR                  R
                  " SSUR                  0UD6Ul        U R                  X#5        X4$ )Nr   r   )r?   r   r   r   r   r   rr   rA   )r   r'   r<   rB   r   s        r   rC   0QwenImageInpaintProcessImagesOutputStep.__call__  s    **51+11;3R3RS**2"$"-"A"A'<<HH 
$$
!

 	U0  r    r   Nr   r   r    r   r   r     s    ( JkS k k 
T-%8 
 
 
Z( 
 
  0d;&7 0 0 P P ]]_!#; !M ! !r    r   )typingr   r4   configuration_utilsr   r   r   r   modelsr   utilsr	   modular_pipeliner   r   modular_pipeline_utilsr   r   r   r   r   r   
get_loggerrF   loggerr   rR   rg   r   r   r   r   r    r   <module>r      s       - B ,  C K K e e 
		H	%B! 5 B!LD!'< D!VG!0 G!V\!"7 \!FD!'< D!PT!.C T!r    