
    
3j}              
          S SK r S SKrS SKrSSKJrJr  SSKJr  SSK	J
r
Jr  SSKJrJr  SSKJrJrJr  S	S
KJrJrJr      S.S\S\S\S\4S jjr    S/S\S-  S\\R4                  -  S-  S\\   S-  S\\   S-  4S jjrS r " S S\5      r " S S\5      r " S S\5      r  " S S\5      r! " S S\5      r" " S  S!\5      r# " S" S#\5      r$ " S$ S%\5      r% " S& S'\5      r& " S( S)\5      r' " S* S+\5      r( " S, S-\5      r)g)0    N   )QwenImageControlNetModelQwenImageMultiControlNetModel)FlowMatchEulerDiscreteScheduler)randn_tensorunwrap_module   )ModularPipelineBlocksPipelineState)ComponentSpec
InputParamOutputParam   )QwenImageLayeredPachifierQwenImageModularPipelineQwenImagePachifierbase_seq_lenmax_seq_len
base_shift	max_shiftc                 4    XC-
  X!-
  -  nX5U-  -
  nX-  U-   nU$ )N )image_seq_lenr   r   r   r   mbmus           n/home/wildlama/miniconda3/lib/python3.13/site-packages/diffusers/modular_pipelines/qwenimage/before_denoise.pycalculate_shiftr      s3     
	K$>?A%%A		Q	BI    num_inference_stepsdevice	timestepssigmasc                    Ub  Ub  [        S5      eUb  S[        [        R                  " U R                  5      R
                  R                  5       5      ;   nU(       d  [        SU R                   S35      eU R                  " S
X2S.UD6  U R                  n[        U5      nX14$ Ub  S[        [        R                  " U R                  5      R
                  R                  5       5      ;   nU(       d  [        SU R                   S35      eU R                  " S
XBS.UD6  U R                  n[        U5      nX14$ U R                  " U4S	U0UD6  U R                  nX14$ )a  
Calls the scheduler's `set_timesteps` method and retrieves timesteps from the scheduler after the call. Handles
custom timesteps. Any kwargs will be supplied to `scheduler.set_timesteps`.

Args:
    scheduler (`SchedulerMixin`):
        The scheduler to get timesteps from.
    num_inference_steps (`int`):
        The number of diffusion steps used when generating samples with a pre-trained model. If used, `timesteps`
        must be `None`.
    device (`str` or `torch.device`, *optional*):
        The device to which the timesteps should be moved to. If `None`, the timesteps are not moved.
    timesteps (`list[int]`, *optional*):
        Custom timesteps used to override the timestep spacing strategy of the scheduler. If `timesteps` is passed,
        `num_inference_steps` and `sigmas` must be `None`.
    sigmas (`list[float]`, *optional*):
        Custom sigmas used to override the timestep spacing strategy of the scheduler. If `sigmas` is passed,
        `num_inference_steps` and `timesteps` must be `None`.

Returns:
    `tuple[torch.Tensor, int]`: A tuple where the first element is the timestep schedule from the scheduler and the
    second element is the number of inference steps.
zYOnly one of `timesteps` or `sigmas` can be passed. Please choose one to set custom valuesr"   zThe current scheduler class zx's `set_timesteps` does not support custom timestep schedules. Please check whether you are using the correct scheduler.)r"   r!   r#   zv's `set_timesteps` does not support custom sigmas schedules. Please check whether you are using the correct scheduler.)r#   r!   r!   r   )

ValueErrorsetinspect	signatureset_timesteps
parameterskeys	__class__r"   len)	schedulerr    r!   r"   r#   kwargsaccepts_timestepsaccept_sigmass           r   retrieve_timestepsr2   +   s}   > !3tuu'3w/@/@AXAX/Y/d/d/i/i/k+ll .y/B/B.C Da b  	M)MfM''	!)n )) 
	 C(9(9):Q:Q(R(](](b(b(d$ee.y/B/B.C D_ `  	GvGG''	!)n )) 	 3MFMfM''	))r   c                     [        X-  U5      n[        [        X-
  S5      5      nU R                  X@R                  -  S  n[        U S5      (       a  U R                  X@R                  -  5        XQU-
  4$ )Nr   set_begin_index)minintmaxr"   orderhasattrr4   )r.   r    strengthinit_timestept_startr"   s         r   get_timestepsr=   g   ss    +68KLM#)91=>G##Goo$=$?@Iy+,,!!'OO";<G333r   c                       \ rS rSrSrSr\S\4S j5       r\S\	\
   4S j5       r\S\	\   4S j5       r\S\	\   4S j5       r\S	 5       r\R&                  " 5       S
\S\S\4S j5       rSrg)QwenImagePrepareLatentsStepy   a  
Prepare initial random noise for the generation process

  Components:
      pachifier (`QwenImagePachifier`)

  Inputs:
      latents (`Tensor`, *optional*):
          Pre-generated noisy latents for image generation.
      height (`int`, *optional*):
          The height in pixels of the generated image.
      width (`int`, *optional*):
          The width in pixels of the generated image.
      num_images_per_prompt (`int`, *optional*, defaults to 1):
          The number of images to generate per prompt.
      generator (`Generator`, *optional*):
          Torch generator for deterministic generation.
      batch_size (`int`, *optional*, defaults to 1):
          Number of prompts, the final batch size of model inputs should be batch_size * num_images_per_prompt. Can
          be generated in input step.
      dtype (`dtype`, *optional*, defaults to torch.float32):
          The dtype of the model inputs, can be generated in input step.

  Outputs:
      height (`int`):
          if not set, updated to default value
      width (`int`):
          if not set, updated to default value
      latents (`Tensor`):
          The initial latents to use for the denoising process
	qwenimagereturnc                     g)Nz7Prepare initial random noise for the generation processr   selfs    r   description'QwenImagePrepareLatentsStep.description   s    Hr   c                 "    [        S[        SS9/$ N	pachifierfrom_config)default_creation_methodr   r   rD   s    r   expected_components/QwenImagePrepareLatentsStep.expected_components        +'9S`a
 	
r   c           	      ,   [         R                  " S5      [         R                  " S5      [         R                  " S5      [         R                  " S5      [         R                  " S5      [         R                  " S5      [         R                  " S5      /$ )Nlatentsheightwidthnum_images_per_prompt	generator
batch_sizedtyper   templaterD   s    r   inputs"QwenImagePrepareLatentsStep.inputs   sq     	*)( 78,-(
 	
r   c                 n    [        S[        SS9[        S[        SS9[        S[        R                  SS9/$ NrS   z$if not set, updated to default valuename	type_hintrF   rT   rR   z4The initial latents to use for the denoising processr   r6   torchTensorrD   s    r   intermediate_outputs0QwenImagePrepareLatentsStep.intermediate_outputs   =     XBhiWAgh,,R
 	
r   c                     U b  XS-  -  S:w  a  [        SUS-   SU  35      eUb   XS-  -  S:w  a  [        SUS-   SU 35      eg g Nr	   r   zHeight must be divisible by z but is zWidth must be divisible by r%   rS   rT   vae_scale_factors      r   check_inputs(QwenImagePrepareLatentsStep.check_inputs   {    &q,@"AQ"F;<Lq<P;QQYZ`YabccQ*>!?1!D:;Ka;O:PPXY^X_`aa "Er   
componentsstatec                    U R                  U5      nU R                  UR                  UR                  UR                  S9  UR
                  nUR                  UR                  -  nUR                  =(       d    UR                  Ul        UR                  =(       d    UR                  Ul        S[        UR                  5      UR                  S-  -  -  nS[        UR                  5      UR                  S-  -  -  nXQR                  SXg4n[        UR                  [        5      (       a>  [        UR                  5      U:w  a%  [!        S[        UR                  5       SU S35      eUR"                  cM  [%        XR                  XCR&                  S9Ul        UR(                  R+                  UR"                  5      Ul        U R-                  X#5        X4$ Nrk   r	   r   z/You have passed a list of generators of length z+, but requested an effective batch size of z@. Make sure the batch size matches the length of the generators.)rV   r!   rX   )get_block_staterm   rS   rT   rl   _execution_devicerW   rU   default_heightdefault_widthr6   num_channels_latents
isinstancerV   listr-   r%   rR   r   rX   rJ   pack_latentsset_block_state	rE   rp   rq   block_stater!   rW   latent_heightlatent_widthshapes	            r   __call__$QwenImagePrepareLatentsStep.__call__   s   **51%%##'88 	 	
 -- ++k.O.OO
 )//L:3L3L'--I1I1I S!3!349T9TWX9XYZC 1 12z7R7RUV7VWX<<a]k++T22s;;P;P7QU_7_A#kF[F[B\A] ^&<'gi  &".!6!6vM^M^#K #-"6"6"C"CKDWDW"XKU0  r   r   N__name__
__module____qualname____firstlineno____doc__
model_namepropertystrrF   rz   r   rN   r   r[   r   re   staticmethodrm   rc   no_gradr   r   r   __static_attributes__r   r   r   r?   r?   y   s    @ JIS I I 
T-%8 
 

 	
Z( 	
 	
 	
d;&7 	
 	
 b b ]]_"!#; "!M "!Vc "! "!r   r?   c                       \ rS rSrSrSr\S\4S j5       r\S\	\
   4S j5       r\S\	\   4S j5       r\S\	\   4S j5       r\S	 5       r\R&                  " 5       S
\S\S\4S j5       rSrg)"QwenImageLayeredPrepareLatentsStep   a.  
Prepare initial random noise (B, layers+1, C, H, W) for the generation process

  Components:
      pachifier (`QwenImageLayeredPachifier`)

  Inputs:
      latents (`Tensor`, *optional*):
          Pre-generated noisy latents for image generation.
      height (`int`, *optional*):
          The height in pixels of the generated image.
      width (`int`, *optional*):
          The width in pixels of the generated image.
      layers (`int`, *optional*, defaults to 4):
          Number of layers to extract from the image
      num_images_per_prompt (`int`, *optional*, defaults to 1):
          The number of images to generate per prompt.
      generator (`Generator`, *optional*):
          Torch generator for deterministic generation.
      batch_size (`int`, *optional*, defaults to 1):
          Number of prompts, the final batch size of model inputs should be batch_size * num_images_per_prompt. Can
          be generated in input step.
      dtype (`dtype`, *optional*, defaults to torch.float32):
          The dtype of the model inputs, can be generated in input step.

  Outputs:
      height (`int`):
          if not set, updated to default value
      width (`int`):
          if not set, updated to default value
      latents (`Tensor`):
          The initial latents to use for the denoising process
qwenimage-layeredrB   c                     g)NzNPrepare initial random noise (B, layers+1, C, H, W) for the generation processr   rD   s    r   rF   .QwenImageLayeredPrepareLatentsStep.description  s    _r   c                 "    [        S[        SS9/$ rI   )r   r   rD   s    r   rN   6QwenImageLayeredPrepareLatentsStep.expected_components  s     +'@Zgh
 	
r   c           
      V   [         R                  " S5      [         R                  " S5      [         R                  " S5      [         R                  " S5      [         R                  " S5      [         R                  " S5      [         R                  " S5      [         R                  " S5      /$ )	NrR   rS   rT   layersrU   rV   rW   rX   rY   rD   s    r   r[   )QwenImageLayeredPrepareLatentsStep.inputs  s     	*)() 78,-(	
 		
r   c                 n    [        S[        SS9[        S[        SS9[        S[        R                  SS9/$ r^   rb   rD   s    r   re   7QwenImageLayeredPrepareLatentsStep.intermediate_outputs)  rg   r   c                     U b  XS-  -  S:w  a  [        SUS-   SU  35      eUb   XS-  -  S:w  a  [        SUS-   SU 35      eg g ri   rj   rk   s      r   rm   /QwenImageLayeredPrepareLatentsStep.check_inputs5  ro   r   rp   rq   c                    U R                  U5      nU R                  UR                  UR                  UR                  S9  UR
                  nUR                  UR                  -  nUR                  =(       d    UR                  Ul        UR                  =(       d    UR                  Ul        S[        UR                  5      UR                  S-  -  -  nS[        UR                  5      UR                  S-  -  -  nXSR                  S-   UR                  Xg4n[        UR                  [        5      (       a>  [!        UR                  5      U:w  a%  [#        S[!        UR                  5       SU S35      eUR$                  cM  ['        XR                  XCR(                  S9Ul        UR*                  R-                  UR$                  5      Ul        U R/                  X#5        X4$ rs   )rt   rm   rS   rT   rl   ru   rW   rU   rv   rw   r6   r   rx   ry   rV   rz   r-   r%   rR   r   rX   rJ   r{   r|   r}   s	            r   r   +QwenImageLayeredPrepareLatentsStep.__call__=  s   **51%%##'88 	 	
 -- ++k.O.OO
 )//L:3L3L'--I1I1I S!3!349T9TWX9XYZC 1 12z7R7RUV7VWX//!3Z5T5TVcrk++T22s;;P;P7QU_7_A#kF[F[B\A] ^&<'gi  &".!6!6vM^M^#K #-"6"6"C"CKDWDW"XKU0  r   r   Nr   r   r   r   r   r      s     D %J`S ` ` 
T-%8 
 

 

Z( 

 

 	
d;&7 	
 	
 b b ]]_"!#; "!M "!Vc "! "!r   r   c                       \ rS rSrSrSr\S\4S j5       r\S\	\
   4S j5       r\S\	\   4S j5       r\S\	\   4S j5       r\S	 5       r\R&                  " 5       S
\S\S\4S j5       rSrg)'QwenImagePrepareLatentsWithStrengthStepid  a  
Step that adds noise to image latents for image-to-image/inpainting. Should be run after set_timesteps,
prepare_latents. Both noise and image latents should alreadybe patchified.

  Components:
      scheduler (`FlowMatchEulerDiscreteScheduler`)

  Inputs:
      latents (`Tensor`):
          The initial random noised, can be generated in prepare latent step.
      image_latents (`Tensor`):
          image latents used to guide the image generation. Can be generated from vae_encoder step. (Can be
          generated from vae encoder and updated in input step.)
      timesteps (`Tensor`):
          The timesteps to use for the denoising process. Can be generated in set_timesteps step.

  Outputs:
      initial_noise (`Tensor`):
          The initial random noised used for inpainting denoising.
      latents (`Tensor`):
          The scaled noisy latents to use for inpainting/image-to-image denoising.
rA   rB   c                     g)NzStep that adds noise to image latents for image-to-image/inpainting. Should be run after set_timesteps, prepare_latents. Both noise and image latents should alreadybe patchified.r   rD   s    r   rF   3QwenImagePrepareLatentsWithStrengthStep.description~  s     Dr   c                 $    [        S[        5      /$ Nr.   r   r   rD   s    r   rN   ;QwenImagePrepareLatentsWithStrengthStep.expected_components       +'FG
 	
r   c           	          [        SS[        R                  SS9[         R                  " SSS9[        SS[        R                  S	S9/$ )
NrR   TzCThe initial random noised, can be generated in prepare latent step.r`   requiredra   rF   image_latentsz<Can be generated from vae encoder and updated in input step.)noter"   WThe timesteps to use for the denoising process. Can be generated in set_timesteps step.r   rc   rd   rZ   rD   s    r   r[   .QwenImagePrepareLatentsWithStrengthStep.inputs  sQ     ,,a	 6tu ,,u	
 	
r   c                 f    [        S[        R                  SS9[        S[        R                  SS9/$ )Ninitial_noisez8The initial random noised used for inpainting denoising.r_   rR   zHThe scaled noisy latents to use for inpainting/image-to-image denoising.r   rc   rd   rD   s    r   re   <QwenImagePrepareLatentsWithStrengthStep.intermediate_outputs  s:     $,,V
 ,,f
 	
r   c                     U R                   S   UR                   S   :w  a+  [        SU R                   S    SUR                   S    35      eU R                  S:w  a  [        SU R                   35      eg )Nr   zE`image_latents` must have have same batch size as `latents`, but got z and r   z=`image_latents` must have 3 dimensions (patchified), but got )r   r%   ndimr   rR   s     r   rm   4QwenImagePrepareLatentsWithStrengthStep.check_inputs  s    q!W]]1%55WXeXkXklmXnWootu|  vC  vC  DE  vF  uG  H  "\]j]o]o\pqrr #r   rp   rq   c                    U R                  U5      nU R                  UR                  UR                  S9  UR                  S S R                  UR                  R                  S   5      nUR                  Ul        UR                  R                  UR                  XCR                  5      Ul        U R                  X#5        X4$ )Nr   r   r   )rt   rm   r   rR   r"   repeatr   r   r.   scale_noiser|   )rE   rp   rq   r~   latent_timesteps        r   r   0QwenImagePrepareLatentsWithStrengthStep.__call__  s    **51%33'' 	 	
 &//3::;;N;N;T;TUV;WX %0$7$7! )22>>%%8K8K
 	U0  r   r   Nr   r   r   r   r   r   d  s    . JDS D D 
T-%8 
 

 
Z( 
 
" 
d;&7 
 
 s s ]]_!#; !M !Vc ! !r   r   c                       \ rS rSrSrSr\S\4S j5       r\S\	\
   4S j5       r\S\	\   4S j5       r\S\	\   4S j5       r\R"                  " 5       S	\S
\S\4S j5       rSrg)QwenImageCreateMaskLatentsStepi  a  
Step that creates mask latents from preprocessed mask_image by interpolating to latent space.

  Components:
      pachifier (`QwenImagePachifier`)

  Inputs:
      processed_mask_image (`Tensor`):
          The processed mask to use for the inpainting process.
      height (`int`):
          The height in pixels of the generated image.
      width (`int`):
          The width in pixels of the generated image.
      dtype (`dtype`, *optional*, defaults to torch.float32):
          The dtype of the model inputs, can be generated in input step.

  Outputs:
      mask (`Tensor`):
          The mask to use for the inpainting process.
rA   rB   c                     g)Nz]Step that creates mask latents from preprocessed mask_image by interpolating to latent space.r   rD   s    r   rF   *QwenImageCreateMaskLatentsStep.description  s    nr   c                 "    [        S[        SS9/$ rI   rM   rD   s    r   rN   2QwenImageCreateMaskLatentsStep.expected_components  rP   r   c                     [        SS[        R                  SS9[         R                  " SSS9[         R                  " SSS9[         R                  " S5      /$ )	Nprocessed_mask_imageTz5The processed mask to use for the inpainting process.r   rS   r   rT   rX   r   rD   s    r   r[   %QwenImageCreateMaskLatentsStep.inputs  sV     +,,S	 48$7(

 
	
r   c                 6    [        S[        R                  SS9/$ )Nmaskz+The mask to use for the inpainting process.r_   r   rD   s    r   re   3QwenImageCreateMaskLatentsStep.intermediate_outputs  s"     u||An
 	
r   rp   rq   c                    U R                  U5      nUR                  nS[        UR                  5      UR                  S-  -  -  nS[        UR
                  5      UR                  S-  -  -  n[        R                  R                  R                  UR                  XV4S9Ul        UR                  R                  S5      Ul        UR                  R                  SUR                  SSS5      Ul        UR                  R                  XCR                   S9Ul        UR"                  R%                  UR                  5      Ul        U R'                  X#5        X4$ )Nr	   )sizer   r!   rX   )rt   ru   r6   rS   rl   rT   rc   nn
functionalinterpolater   r   	unsqueezer   rx   torX   rJ   r{   r|   )rE   rp   rq   r~   r!   height_latentswidth_latentss          r   r   'QwenImageCreateMaskLatentsStep.__call__  s9   **51--
 c+"4"45*:U:UXY:YZ[S!2!23
8S8SVW8WXY 88..::,, 0 ; 

 '++55a8&++221j6U6UWXZ[]^_&++..fDUDU.V%//<<[=M=MNU0  r   r   N)r   r   r   r   r   r   r   r   rF   rz   r   rN   r   r[   r   re   rc   r   r   r   r   r   r   r   r   r   r     s    * JoS o o 
T-%8 
 

 
Z( 
 
 
d;&7 
 
 ]]_!#; !M !Vc ! !r   r   c                       \ rS rSrSrSr\S\4S j5       r\S\	\
   4S j5       r\S\	\   4S j5       r\S\	\   4S j5       rS	\S
\S\4S jrSrg)QwenImageSetTimestepsStepi'  aj  
Step that sets the scheduler's timesteps for text-to-image generation. Should be run after prepare latents step.

  Components:
      scheduler (`FlowMatchEulerDiscreteScheduler`)

  Inputs:
      num_inference_steps (`int`, *optional*, defaults to 50):
          The number of denoising steps.
      sigmas (`list`, *optional*):
          Custom sigmas for the denoising process.
      latents (`Tensor`):
          The initial random noised latents for the denoising process. Can be generated in prepare latents step.

  Outputs:
      timesteps (`Tensor`):
          The timesteps to use for the denoising process
rA   rB   c                     g)NzpStep that sets the scheduler's timesteps for text-to-image generation. Should be run after prepare latents step.r   rD   s    r   rF   %QwenImageSetTimestepsStep.description=  s     Br   c                 $    [        S[        5      /$ r   r   rD   s    r   rN   -QwenImageSetTimestepsStep.expected_componentsA  r   r   c           	          [         R                  " S5      [         R                  " S5      [        SS[        R                  SS9/$ )Nr    r#   rR   TzfThe initial random noised latents for the denoising process. Can be generated in prepare latents step.r   r   rZ   rc   rd   rD   s    r   r[    QwenImageSetTimestepsStep.inputsG  sF      56),, E		
 		
r   c                 6    [        S[        R                  SS9/$ )Nr"   z.The timesteps to use for the denoising processr_   r   rD   s    r   re   .QwenImageSetTimestepsStep.intermediate_outputsT  s"      ELLFv
 	
r   rp   rq   c           
         U R                  U5      nUR                  nUR                  c/  [        R                  " SSUR
                  -  UR
                  5      OUR                  n[        UR                  R                  S   UR                  R                  R                  SS5      UR                  R                  R                  SS5      UR                  R                  R                  SS5      UR                  R                  R                  S	S
5      S9n[        UR                  UR
                  UUUS9u  Ul        Ul        UR                  R                  S5        U R                  X#5        X4$ )N      ?r   base_image_seq_len   max_image_seq_len   r         ?r   ffffff?r   r   r   r   r   r.   r    r!   r#   r   r   )rt   ru   r#   nplinspacer    r   rR   r   r.   configgetr2   r"   r4   r|   rE   rp   rq   r~   r!   r#   r   s          r   r   "QwenImageSetTimestepsStep.__call__\  sP   **51-- !!) KKQ!@!@@+BaBab## 	 %--33A6#--44889MsS",,33778KTR!++2266|SI **1155k4H
 BT ** + ? ?B
>{> 	,,Q/U0  r   r   Nr   r   r   r   r   r   r   r   rF   rz   r   rN   r   r[   r   re   r   r   r   r   r   r   r   r   r   '  s    & JBS B B 
T-%8 
 

 

Z( 

 

 
d;&7 
 
!#; !M !Vc !r   r   c                       \ rS rSrSrSr\S\4S j5       r\S\	\
   4S j5       r\S\	\   4S j5       r\S\	\   4S j5       r\R"                  " 5       S	\S\4S
 j5       rSrg) QwenImageLayeredSetTimestepsStepi}  aO  
Set timesteps step for QwenImage Layered with custom mu calculation based on image_latents.

  Components:
      scheduler (`FlowMatchEulerDiscreteScheduler`)

  Inputs:
      num_inference_steps (`int`, *optional*, defaults to 50):
          The number of denoising steps.
      sigmas (`list`, *optional*):
          Custom sigmas for the denoising process.
      image_latents (`Tensor`):
          image latents used to guide the image generation. Can be generated from vae_encoder step.

  Outputs:
      timesteps (`Tensor`):
          The timesteps to use for the denoising process.
r   rB   c                     g)Nz[Set timesteps step for QwenImage Layered with custom mu calculation based on image_latents.r   rD   s    r   rF   ,QwenImageLayeredSetTimestepsStep.description  s    lr   c                 $    [        S[        5      /$ r   r   rD   s    r   rN   4QwenImageLayeredSetTimestepsStep.expected_components  r   r   c                     [         R                  " S5      [         R                  " S5      [         R                  " S5      /$ )Nr    r#   r   rY   rD   s    r   r[   'QwenImageLayeredSetTimestepsStep.inputs  s9      56)0
 	
r   c                 6    [        S[        R                  SS9/$ )Nr"   /The timesteps to use for the denoising process.r_   r   rD   s    r   re   5QwenImageLayeredSetTimestepsStep.intermediate_outputs  s"      ELLFw
 	
r   rq   c                    U R                  U5      nUR                  nSnUR                  R                  S   U-  S-  nUR                  c/  [
        R                  " SSUR                  -  UR                  5      OUR                  n[        UR                  UR                  UUUS9u  Ul
        Ul        UR                  R                  S5        U R                  X#5        X4$ )Ng      p@r   r   r   )r#   r   r   )rt   ru   r   r   r#   r   r   r    r2   r.   r"   r4   r|   )rE   rp   rq   r~   r!   base_seqlenr   r#   s           r   r   )QwenImageLayeredSetTimestepsStep.__call__  s    **51-- *''--a0;>3F
 !!) KKQ!@!@@+BaBab## 	 BT  ++B
>{> 	,,Q/U0  r   r   N)r   r   r   r   r   r   r   r   rF   rz   r   rN   r   r[   r   re   rc   r   r   r   r   r   r   r   r   r   }  s    & %JmS m m 
T-%8 
 

 
Z( 
 
 
d;&7 
 
 ]]_!- !M ! !r   r   c                       \ rS rSrSrSr\S\4S j5       r\S\	\
   4S j5       r\S\	\   4S j5       r\S\	\   4S j5       rS	\S
\S\4S jrSrg)%QwenImageSetTimestepsWithStrengthStepi  aS  
Step that sets the scheduler's timesteps for image-to-image generation, and inpainting. Should be run after prepare
latents step.

  Components:
      scheduler (`FlowMatchEulerDiscreteScheduler`)

  Inputs:
      num_inference_steps (`int`, *optional*, defaults to 50):
          The number of denoising steps.
      sigmas (`list`, *optional*):
          Custom sigmas for the denoising process.
      latents (`Tensor`):
          The latents to use for the denoising process. Can be generated in prepare latents step.
      strength (`float`, *optional*, defaults to 0.9):
          Strength for img2img/inpainting.

  Outputs:
      timesteps (`Tensor`):
          The timesteps to use for the denoising process.
      num_inference_steps (`int`):
          The number of denoising steps to perform at inference time. Updated based on strength.
rA   rB   c                     g)NzStep that sets the scheduler's timesteps for image-to-image generation, and inpainting. Should be run after prepare latents step.r   rD   s    r   rF   1QwenImageSetTimestepsWithStrengthStep.description  s     Sr   c                 $    [        S[        5      /$ r   r   rD   s    r   rN   9QwenImageSetTimestepsWithStrengthStep.expected_components  r   r   c           	          [         R                  " S5      [         R                  " S5      [        SS[        R                  SS9[         R                  " SSS	9/$ )
Nr    r#   rR   TzWThe latents to use for the denoising process. Can be generated in prepare latents step.)r   ra   rF   r:   g?)defaultr   rD   s    r   r[   ,QwenImageSetTimestepsWithStrengthStep.inputs  sU      56),,u	 
C8

 
	
r   c                 R    [        S[        R                  SS9[        S[        SS9/$ )Nr"   r   r_   r    zVThe number of denoising steps to perform at inference time. Updated based on strength.)r   rc   rd   r6   rD   s    r   re   :QwenImageSetTimestepsWithStrengthStep.intermediate_outputs   s6      ,,M
 *t
 	
r   rp   rq   c           
      $   U R                  U5      nUR                  nUR                  c/  [        R                  " SSUR
                  -  UR
                  5      OUR                  n[        UR                  R                  S   UR                  R                  R                  SS5      UR                  R                  R                  SS5      UR                  R                  R                  SS5      UR                  R                  R                  S	S
5      S9n[        UR                  UR
                  UUUS9u  Ul        Ul        [        UR                  UR
                  UR                  S9u  Ul        Ul        U R!                  X#5        X4$ )Nr   r   r   r   r   r   r   r   r   r   r   r   )r.   r    r:   )rt   ru   r#   r   r   r    r   rR   r   r.   r   r   r2   r"   r=   r:   r|   r   s          r   r   .QwenImageSetTimestepsWithStrengthStep.__call__  sq   **51-- !!) KKQ!@!@@+BaBab## 	 %--33A6#--44889MsS",,33778KTR!++2266|SI **1155k4H
 BT ** + ? ?B
>{> BO ** + ? ? ))B
>{> 	U0  r   r   Nr   r   r   r   r  r    s    0 JSS S S 
T-%8 
 

 
Z( 
 
 
d;&7 
 
!!#; !!M !!Vc !!r   r  c                       \ rS rSrSrSr\S\4S j5       r\S\	\
   4S j5       r\S\	\   4S j5       rS\S	\S\4S
 jrSrg)QwenImageRoPEInputsStepi;  aS  
Step that prepares the RoPE inputs for the denoising process. Should be place after prepare_latents step

  Inputs:
      batch_size (`int`, *optional*, defaults to 1):
          Number of prompts, the final batch size of model inputs should be batch_size * num_images_per_prompt. Can
          be generated in input step.
      height (`int`):
          The height in pixels of the generated image.
      width (`int`):
          The width in pixels of the generated image.
      prompt_embeds_mask (`Tensor`):
          mask for the text embeddings. Can be generated from text_encoder step.
      negative_prompt_embeds_mask (`Tensor`, *optional*):
          mask for the negative text embeddings. Can be generated from text_encoder step.

  Outputs:
      img_shapes (`list`):
          The shapes of the images latents, used for RoPE calculation
rA   rB   c                      gNzhStep that prepares the RoPE inputs for the denoising process. Should be place after prepare_latents stepr   rD   s    r   rF   #QwenImageRoPEInputsStep.descriptionS       w	
r   c                     [         R                  " S5      [         R                  " SSS9[         R                  " SSS9[         R                  " S5      [         R                  " S5      /$ )NrW   rS   Tr   rT   prompt_embeds_masknegative_prompt_embeds_maskrY   rD   s    r   r[   QwenImageRoPEInputsStep.inputsY  sZ     -48$7 45 =>
 	
r   c           
      d    [        SS[        [        [        [        [        [        4         SS9/$ N
img_shapesdenoiser_input_fieldsz;The shapes of the images latents, used for RoPE calculationr`   kwargs_typera   rF   r   rz   tupler6   rD   s    r   re   ,QwenImageRoPEInputsStep.intermediate_outputsc  7     !3tE#sC-$89:Y	
 	
r   rp   rq   c                     U R                  U5      nSUR                  UR                  -  S-  UR                  UR                  -  S-  4//UR                  -  Ul        U R                  X#5        X4$ Nr   r	   )rt   rS   rl   rT   rW   r  r|   rE   rp   rq   r~   s       r   r    QwenImageRoPEInputsStep.__call__n  s    **51
 &&**E*EEJ%%)D)DDI"
 """# 	U0  r   r   Nr   r   r   r   r   r   r   r   rF   rz   r   r[   r   re   r   r   r   r   r   r   r   r  r  ;  s    * J
S 
 

 
Z( 
 
 
d;&7 
 
!#; !M !Vc !r   r  c                       \ rS rSrSrSr\S\4S j5       r\S\	\
   4S j5       r\S\	\   4S j5       rS\S	\S\4S
 jrSrg)QwenImageEditRoPEInputsStepi  a@  
Step that prepares the RoPE inputs for denoising process. This is used in QwenImage Edit. Should be placed after
prepare_latents step

  Inputs:
      batch_size (`int`, *optional*, defaults to 1):
          Number of prompts, the final batch size of model inputs should be batch_size * num_images_per_prompt. Can
          be generated in input step.
      image_height (`int`):
          The height of the reference image. Can be generated in input step.
      image_width (`int`):
          The width of the reference image. Can be generated in input step.
      height (`int`):
          The height in pixels of the generated image.
      width (`int`):
          The width in pixels of the generated image.
      prompt_embeds_mask (`Tensor`):
          mask for the text embeddings. Can be generated from text_encoder step.
      negative_prompt_embeds_mask (`Tensor`, *optional*):
          mask for the negative text embeddings. Can be generated from text_encoder step.

  Outputs:
      img_shapes (`list`):
          The shapes of the images latents, used for RoPE calculation
rA   rB   c                     g)NzStep that prepares the RoPE inputs for denoising process. This is used in QwenImage Edit. Should be placed after prepare_latents stepr   rD   s    r   rF   'QwenImageEditRoPEInputsStep.description  s     Wr   c           	         [         R                  " S5      [        SS[        SS9[        SS[        SS9[         R                  " SSS	9[         R                  " S
SS	9[         R                  " S5      [         R                  " S5      /$ )NrW   image_heightTzBThe height of the reference image. Can be generated in input step.r   image_widthzAThe width of the reference image. Can be generated in input step.rS   r   rT   r  r  )r   rZ   r6   rD   s    r   r[   "QwenImageEditRoPEInputsStep.inputs  s     -#`	 "_	 48$7 45 =>#
 	
r   c           
      d    [        SS[        [        [        [        [        [        4         SS9/$ r  r  rD   s    r   re   0QwenImageEditRoPEInputsStep.intermediate_outputs  r!  r   rp   rq   c                 V   U R                  U5      nSUR                  UR                  -  S-  UR                  UR                  -  S-  4SUR                  UR                  -  S-  UR
                  UR                  -  S-  4//UR                  -  Ul        U R                  X#5        X4$ r#  )	rt   rS   rl   rT   r,  r-  rW   r  r|   r$  s       r   r   $QwenImageEditRoPEInputsStep.__call__  s    **51 &&**E*EEJ%%)D)DDI ,,
0K0KKqP++z/J/JJaO"
 """# 	U0  r   r   Nr&  r   r   r   r(  r(    s    4 JWS W W 
Z( 
 
* 
d;&7 
 
!#; !M !Vc !r   r(  c                       \ rS rSrSrSr\S\4S j5       r\S\	\
   4S j5       r\S\	\   4S j5       rS\S	\S\4S
 jrSrg)QwenImageEditPlusRoPEInputsStepi  a  
Step that prepares the RoPE inputs for denoising process. This is used in QwenImage Edit Plus.
  Unlike Edit, Edit Plus handles lists of image_height/image_width for multiple reference images. Should be placed
  after prepare_latents step.

  Inputs:
      batch_size (`int`, *optional*, defaults to 1):
          Number of prompts, the final batch size of model inputs should be batch_size * num_images_per_prompt. Can
          be generated in input step.
      image_height (`list`):
          The heights of the reference images. Can be generated in input step.
      image_width (`list`):
          The widths of the reference images. Can be generated in input step.
      height (`int`):
          The height in pixels of the generated image.
      width (`int`):
          The width in pixels of the generated image.
      prompt_embeds_mask (`Tensor`):
          mask for the text embeddings. Can be generated from text_encoder step.
      negative_prompt_embeds_mask (`Tensor`, *optional*):
          mask for the negative text embeddings. Can be generated from text_encoder step.

  Outputs:
      img_shapes (`list`):
          The shapes of the image latents, used for RoPE calculation
      txt_seq_lens (`list`):
          The sequence lengths of the prompt embeds, used for RoPE calculation
      negative_txt_seq_lens (`list`):
          The sequence lengths of the negative prompt embeds, used for RoPE calculation
zqwenimage-edit-plusrB   c                      g)NzStep that prepares the RoPE inputs for denoising process. This is used in QwenImage Edit Plus.
Unlike Edit, Edit Plus handles lists of image_height/image_width for multiple reference images.
Should be placed after prepare_latents step.r   rD   s    r   rF   +QwenImageEditPlusRoPEInputsStep.description  s    ;	
r   c           	      ,   [         R                  " S5      [        SS[        [           SS9[        SS[        [           SS9[         R                  " SSS	9[         R                  " S
SS	9[         R                  " S5      [         R                  " S5      /$ )NrW   r,  TzDThe heights of the reference images. Can be generated in input step.r   r-  zCThe widths of the reference images. Can be generated in input step.rS   r   rT   r  r  )r   rZ   rz   r6   rD   s    r   r[   &QwenImageEditPlusRoPEInputsStep.inputs  s     -#s)b	 "s)a	 48$7 45 =>#
 	
r   c           
          [        SS[        [        [        [        [        [        4         SS9[        SS[        [           SS9[        SS[        [           SS9/$ )	Nr  r  :The shapes of the image latents, used for RoPE calculationr  txt_seq_lensDThe sequence lengths of the prompt embeds, used for RoPE calculationnegative_txt_seq_lensMThe sequence lengths of the negative prompt embeds, used for RoPE calculationr  rD   s    r   re   4QwenImageEditPlusRoPEInputsStep.intermediate_outputs  sk     !3tE#sC-$89:X	 #3s)b	 ,3s)k	
 	
r   rp   rq   c           	      R   U R                  U5      nUR                  nSUR                  U-  S-  UR                  U-  S-  4/[	        UR
                  UR                  5       VVs/ s H  u  pVSXT-  S-  Xd-  S-  4PM     snnQ/UR                  -  Ul        UR                  b'  UR                  R                  SS9R                  5       OS Ul        UR                  b'  UR                  R                  SS9R                  5       OS Ul        U R                  X#5        X4$ s  snnf )Nr   r	   dim)rt   rl   rS   rT   zipr,  r-  rW   r  r  sumtolistr;  r  r=  r|   )rE   rp   rq   r~   rl   
img_height	img_widths          r   r   (QwenImageEditPlusRoPEInputsStep.__call__3  sO   **51%66
 K&&*::a?ARARVfAfjkAkl 25[5M5M{OfOf1g1g-
 
6!;Y=Z^_=_`1g"
 """# CNB`B`BlK**..1.5<<>rv 	 
 66B 3377A7>EEG 	) 	U0  %s   &D#r   Nr&  r   r   r   r4  r4    s    > 'J
S 
 
 
Z( 
 
* 
d;&7 
 
,!#; !M !Vc !r   r4  c                       \ rS rSrSrSr\S\4S j5       r\S\	\
   4S j5       r\S\	\   4S j5       r\R                  " 5       S\S\4S	 j5       rS
rg)QwenImageLayeredRoPEInputsStepiR  a  
Step that prepares the RoPE inputs for the denoising process. Should be place after prepare_latents step

  Inputs:
      batch_size (`int`, *optional*, defaults to 1):
          Number of prompts, the final batch size of model inputs should be batch_size * num_images_per_prompt. Can
          be generated in input step.
      layers (`int`, *optional*, defaults to 4):
          Number of layers to extract from the image
      height (`int`):
          The height in pixels of the generated image.
      width (`int`):
          The width in pixels of the generated image.
      prompt_embeds_mask (`Tensor`):
          mask for the text embeddings. Can be generated from text_encoder step.
      negative_prompt_embeds_mask (`Tensor`, *optional*):
          mask for the negative text embeddings. Can be generated from text_encoder step.

  Outputs:
      img_shapes (`list`):
          The shapes of the image latents, used for RoPE calculation
      txt_seq_lens (`list`):
          The sequence lengths of the prompt embeds, used for RoPE calculation
      negative_txt_seq_lens (`list`):
          The sequence lengths of the negative prompt embeds, used for RoPE calculation
      additional_t_cond (`Tensor`):
          The additional t cond, used for RoPE calculation
r   rB   c                      gr  r   rD   s    r   rF   *QwenImageLayeredRoPEInputsStep.descriptionr  r  r   c                     [         R                  " S5      [         R                  " S5      [         R                  " SSS9[         R                  " SSS9[         R                  " S5      [         R                  " S5      /$ )	NrW   r   rS   Tr   rT   r  r  rY   rD   s    r   r[   %QwenImageLayeredRoPEInputsStep.inputsx  sh     -)48$7 45 =>
 	
r   c           
          [        S[        [        [        [        [        [        4         SSS9[        S[        [           SSS9[        S[        [           SSS9[        S	[        R
                  SS
S9/$ )Nr  r  r:  )r`   ra   r  rF   r;  r<  r=  r>  additional_t_condz0The additional t cond, used for RoPE calculation)r   rz   r  r6   rc   rd   rD   s    r   re   3QwenImageLayeredRoPEInputsStep.intermediate_outputs  s     !tE#sC-$89:3X	 #s)3b	 ,s)3k	 (,,3N	'
 	
r   rq   c                    U R                  U5      nUR                  nSUR                  UR                  -  S-  UR                  UR                  -  S-  4nU/UR
                  S-   -  /UR                  -  Ul        UR                  b'  UR                  R                  SS9R                  5       OS Ul        UR                  b'  UR                  R                  SS9R                  5       OS Ul        [        R                  " S/UR                  -  5      R!                  U[        R"                  S9Ul        U R'                  X#5        X4$ )Nr   r	   rA  r   r   )rt   ru   rS   rl   rT   r   rW   r  r  rD  rE  r;  r  r=  rc   tensorr   longrP  r|   )rE   rp   rq   r~   r!   r   s         r   r   'QwenImageLayeredRoPEInputsStep.__call__  sI   **51-- *"="==B!<!<<A
 $)'[-?-?!-C"D!EH^H^!^ CNB`B`BlK**..1.5<<>rv 	 
 66B 3377A7>EEG 	) ).aS;;Q;Q5Q(R(U(U]ckpkuku(U(v%U0  r   r   N)r   r   r   r   r   r   r   r   rF   rz   r   r[   r   re   rc   r   r   r   r   r   r   r   rJ  rJ  R  s    : %J
S 
 

 
Z( 
 
 
d;&7 
 
8 ]]_!- !M ! !r   rJ  c                       \ rS rSrSrSr\S\\   4S j5       r	\S\
4S j5       r\S\\   4S j5       r\S\\   4S j5       r\R"                  " 5       S	\S
\S\4S j5       rSrg)%QwenImageControlNetBeforeDenoiserStepi  a~  
step that prepare inputs for controlnet. Insert before the Denoise Step, after set_timesteps step.

  Components:
      controlnet (`QwenImageControlNetModel`)

  Inputs:
      control_guidance_start (`float`, *optional*, defaults to 0.0):
          When to start applying ControlNet.
      control_guidance_end (`float`, *optional*, defaults to 1.0):
          When to stop applying ControlNet.
      controlnet_conditioning_scale (`float`, *optional*, defaults to 1.0):
          Scale for ControlNet conditioning.
      control_image_latents (`Tensor`):
          The control image latents to use for the denoising process. Can be generated in controlnet vae encoder
          step.
      timesteps (`Tensor`):
          The timesteps to use for the denoising process. Can be generated in set_timesteps step.

  Outputs:
      controlnet_keep (`list`):
          The controlnet keep values
rA   rB   c                 $    [        S[        5      /$ )N
controlnet)r   r   rD   s    r   rN   9QwenImageControlNetBeforeDenoiserStep.expected_components  s     ,(@A
 	
r   c                     g)Nzbstep that prepare inputs for controlnet. Insert before the Denoise Step, after set_timesteps step.r   rD   s    r   rF   1QwenImageControlNetBeforeDenoiserStep.description  s    sr   c                     [         R                  " S5      [         R                  " S5      [         R                  " S5      [        SS[        R                  SS9[        SS[        R                  S	S9/$ )
Ncontrol_guidance_startcontrol_guidance_endcontrolnet_conditioning_scalecontrol_image_latentsTzlThe control image latents to use for the denoising process. Can be generated in controlnet vae encoder step.r   r"   r   r   rD   s    r   r[   ,QwenImageControlNetBeforeDenoiserStep.inputs  sp      89 67 ?@,,, K	  ,,u	
 	
r   c                 0    [        S[        [           SS9/$ )Ncontrolnet_keepzThe controlnet keep values)ra   rF   )r   rz   floatrD   s    r   re   :QwenImageControlNetBeforeDenoiserStep.intermediate_outputs  s      )T%[Njk
 	
r   rp   rq   c                    U R                  U5      n[        UR                  5      n[        UR                  [
        5      (       dH  [        UR                  [
        5      (       a)  [        UR                  5      UR                  /-  Ul        O[        UR                  [
        5      (       dH  [        UR                  [
        5      (       a)  [        UR                  5      UR                  /-  Ul        O[        UR                  [
        5      (       dt  [        UR                  [
        5      (       dU  [        U[        5      (       a  [        UR                  5      OSnXSR                  /-  XSR                  /-  sUl        Ul        [        U[        5      (       a4  [        UR                  [        5      (       a  UR                  /W-  Ul
        / Ul        [        [        UR                  5      5       H  n[        UR                  UR                  5       VVs/ s HQ  u  pxS[        U[        UR                  5      -  U:  =(       d    US-   [        UR                  5      -  U:  5      -
  PMS     n	nnUR                  R!                  [        U["        5      (       a  U	S   OU	5        M     U R%                  X#5        X4$ s  snnf )Nr   r   r   )rt   r   rY  ry   r^  rz   r_  r-   r   ra  r`  re  rd  ranger"   rC  appendr   r|   )
rE   rp   rq   r~   rY  multisekeepss
             r   r   .QwenImageControlNetBeforeDenoiserStep.__call__  sy   **51":#8#89
 +<<dCC
,,dI
 I
 25[5U5U1V22Z 2K. K<<dCC
..I
 I
 03;3U3U/V00Z 0K, K>>EEj,,dO
 O
 ;EZQn:o:oK556uv  ::;;8899 QK.0P j"?@@Z55uF
 F
 :E9b9b8cfj8jK5 ')#s;0012A   B BKDdDdeeDA eAK$9$9 ::Q>j1q5CP[PePeLfBfijBjkke   ''..:jRj;k;kuQxqvw 3 	U0  s   AKr   N)r   r   r   r   r   r   r   rz   r   rN   r   rF   r   r[   r   re   rc   r   r   r   r   r   r   r   r   rW  rW    s    0 J
T-%8 
 

 tS t t 
Z( 
 
& 
d;&7 
 

 ]]_.!#; .!M .!Vc .! .!r   rW  )r   r   r   r   )NNNN)*r'   numpyr   rc   modelsr   r   
schedulersr   utils.torch_utilsr   r   modular_pipeliner
   r   modular_pipeline_utilsr   r   r   r   r   r   r6   re  r   r   r!   rz   r2   r=   r?   r   r   r   r   r   r  r  r(  r4  rJ  rW  r   r   r   <module>rv     s      M 9 < C K K e e 

 
 	

 
  '+(,"&!%8*t8* %,,%8* Cy4	8*
 K$8*x	4$p!"7 p!hs!)> s!ne!.C e!RQ!%: Q!tR! 5 R!lL!'< L!`c!,A c!\B!3 B!LW!"7 W!vr!&; r!lk!%: k!dn!,A n!r   