
    
3j              
          S SK r S SKJrJr  S SKrS SKrS SKJrJ	r	J
r
Jr  SSKJrJr  SSKJrJrJr  SSKJr  SSKJr  SS	KJr  SS
KJrJrJrJrJrJr  SSK J!r!  SSK"J#r#  SSK$J%r%  \" 5       (       a  S SK&J's  J(r)  Sr*OSr*\RV                  " \,5      r-Sr.    S#S\/S\/S\0S\04S jjr1 S$S\Rd                  S\Rf                  S-  S\44S jjr5    S%S\/S-  S\4\Rl                  -  S-  S\7\/   S-  S\7\0   S-  4S  jjr8 " S! S"\#\\5      r9g)&    N)AnyCallable)CLIPTextModelCLIPTokenizerT5EncoderModelT5TokenizerFast   )PipelineImageInputVaeImageProcessor)FluxLoraLoaderMixinFromSingleFileMixinTextualInversionLoaderMixin)AutoencoderKL)FluxTransformer2DModel)FlowMatchEulerDiscreteScheduler)USE_PEFT_BACKENDis_torch_xla_availableloggingreplace_example_docstringscale_lora_layersunscale_lora_layers)randn_tensor   )DiffusionPipeline   )FluxPipelineOutputTFa  
    Examples:
        ```py
        >>> import torch
        >>> from controlnet_aux import CannyDetector
        >>> from diffusers import FluxControlImg2ImgPipeline
        >>> from diffusers.utils import load_image

        >>> pipe = FluxControlImg2ImgPipeline.from_pretrained(
        ...     "black-forest-labs/FLUX.1-Canny-dev", torch_dtype=torch.bfloat16
        ... ).to("cuda")

        >>> prompt = "A robot made of exotic candies and chocolates of different kinds. Abstract background"
        >>> image = load_image(
        ...     "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/watercolor-painting.jpg"
        ... )
        >>> control_image = load_image(
        ...     "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/robot.png"
        ... )

        >>> processor = CannyDetector()
        >>> control_image = processor(
        ...     control_image, low_threshold=50, high_threshold=200, detect_resolution=1024, image_resolution=1024
        ... )

        >>> image = pipe(
        ...     prompt=prompt,
        ...     image=image,
        ...     control_image=control_image,
        ...     strength=0.8,
        ...     height=1024,
        ...     width=1024,
        ...     num_inference_steps=50,
        ...     guidance_scale=30.0,
        ... ).images[0]
        >>> image.save("output.png")
        ```
base_seq_lenmax_seq_len
base_shift	max_shiftc                 4    XC-
  X!-
  -  nX5U-  -
  nX-  U-   nU$ N )image_seq_lenr   r   r   r    mbmus           p/home/wildlama/miniconda3/lib/python3.13/site-packages/diffusers/pipelines/flux/pipeline_flux_control_img2img.pycalculate_shiftr)   [   s3     
	K$>?A%%A		Q	BI    encoder_output	generatorsample_modec                    [        U S5      (       a!  US:X  a  U R                  R                  U5      $ [        U S5      (       a   US:X  a  U R                  R                  5       $ [        U S5      (       a  U R                  $ [        S5      e)Nlatent_distsampleargmaxlatentsz3Could not access latents of provided encoder_output)hasattrr/   r0   moder2   AttributeError)r+   r,   r-   s      r(   retrieve_latentsr6   i   s}     ~}--+2I))00;;		/	/K84K))..00		+	+%%%RSSr*   num_inference_stepsdevice	timestepssigmasc                    Ub  Ub  [        S5      eUb  S[        [        R                  " U R                  5      R
                  R                  5       5      ;   nU(       d  [        SU R                   S35      eU R                  " S
X2S.UD6  U R                  n[        U5      nX14$ Ub  S[        [        R                  " U R                  5      R
                  R                  5       5      ;   nU(       d  [        SU R                   S35      eU R                  " S
XBS.UD6  U R                  n[        U5      nX14$ U R                  " U4S	U0UD6  U R                  nX14$ )a  
Calls the scheduler's `set_timesteps` method and retrieves timesteps from the scheduler after the call. Handles
custom timesteps. Any kwargs will be supplied to `scheduler.set_timesteps`.

Args:
    scheduler (`SchedulerMixin`):
        The scheduler to get timesteps from.
    num_inference_steps (`int`):
        The number of diffusion steps used when generating samples with a pre-trained model. If used, `timesteps`
        must be `None`.
    device (`str` or `torch.device`, *optional*):
        The device to which the timesteps should be moved to. If `None`, the timesteps are not moved.
    timesteps (`list[int]`, *optional*):
        Custom timesteps used to override the timestep spacing strategy of the scheduler. If `timesteps` is passed,
        `num_inference_steps` and `sigmas` must be `None`.
    sigmas (`list[float]`, *optional*):
        Custom sigmas used to override the timestep spacing strategy of the scheduler. If `sigmas` is passed,
        `num_inference_steps` and `timesteps` must be `None`.

Returns:
    `tuple[torch.Tensor, int]`: A tuple where the first element is the timestep schedule from the scheduler and the
    second element is the number of inference steps.
zYOnly one of `timesteps` or `sigmas` can be passed. Please choose one to set custom valuesr9   zThe current scheduler class zx's `set_timesteps` does not support custom timestep schedules. Please check whether you are using the correct scheduler.)r9   r8   r:   zv's `set_timesteps` does not support custom sigmas schedules. Please check whether you are using the correct scheduler.)r:   r8   r8   r#   )

ValueErrorsetinspect	signatureset_timesteps
parameterskeys	__class__r9   len)	schedulerr7   r8   r9   r:   kwargsaccepts_timestepsaccept_sigmass           r(   retrieve_timestepsrI   w   s}   > !3tuu'3w/@/@AXAX/Y/d/d/i/i/k+ll .y/B/B.C Da b  	M)MfM''	!)n )) 
	 C(9(9):Q:Q(R(](](b(b(d$ee.y/B/B.C D_ `  	GvGG''	!)n )) 	 3MFMfM''	))r*   c            -       V  ^  \ rS rSrSrSr/ rSS/rS\S\	S\
S	\S
\S\S\4U 4S jjr     S=S\\\   -  S\S\S\R(                  S-  S\R*                  S-  4
S jjr  S>S\\\   -  S\S\R(                  S-  4S jjr       S?S\\\   -  S\\\   -  S-  S\R(                  S-  S\S\R0                  S-  S\R0                  S-  S\S\S-  4S jjrS\R6                  S\R8                  4S jrS r    S@S  jr\ S! 5       r!\ S" 5       r"\ S# 5       r# SAS$ jr$  SBS% jr%\&S& 5       r'\&S' 5       r(\&S( 5       r)\&S) 5       r*\RV                  " 5       \," \-5      SSSSSSS*S+SS,SSSSSS-S.SSS/S4S\\\   -  S\\\   -  S-  S\.S/\.S0\S-  S1\S-  S2\S3\S4\\   S-  S5\S\S-  S\R8                  \\R8                     -  S-  S\R0                  S-  S\R0                  S-  S\R0                  S-  S6\S-  S7\/S8\0\\14   S-  S9\2\\/S4   S-  S:\\   S\4*S; jj5       5       r3S<r4U =r5$ )CFluxControlImg2ImgPipeline   as  
The Flux pipeline for image inpainting.

Reference: https://blackforestlabs.ai/announcing-black-forest-labs/

Args:
    transformer ([`FluxTransformer2DModel`]):
        Conditional Transformer (MMDiT) architecture to denoise the encoded image latents.
    scheduler ([`FlowMatchEulerDiscreteScheduler`]):
        A scheduler to be used in combination with `transformer` to denoise the encoded image latents.
    vae ([`AutoencoderKL`]):
        Variational Auto-Encoder (VAE) Model to encode and decode images to and from latent representations.
    text_encoder ([`CLIPTextModel`]):
        [CLIP](https://huggingface.co/docs/transformers/model_doc/clip#transformers.CLIPTextModel), specifically
        the [clip-vit-large-patch14](https://huggingface.co/openai/clip-vit-large-patch14) variant.
    text_encoder_2 ([`T5EncoderModel`]):
        [T5](https://huggingface.co/docs/transformers/en/model_doc/t5#transformers.T5EncoderModel), specifically
        the [google/t5-v1_1-xxl](https://huggingface.co/google/t5-v1_1-xxl) variant.
    tokenizer (`CLIPTokenizer`):
        Tokenizer of class
        [CLIPTokenizer](https://huggingface.co/docs/transformers/en/model_doc/clip#transformers.CLIPTokenizer).
    tokenizer_2 (`T5TokenizerFast`):
        Second Tokenizer of class
        [T5TokenizerFast](https://huggingface.co/docs/transformers/en/model_doc/t5#transformers.T5TokenizerFast).
z.text_encoder->text_encoder_2->transformer->vaer2   prompt_embedsrE   vaetext_encoder	tokenizertext_encoder_2tokenizer_2transformerc           
        > [         TU ]  5         U R                  UUUUUUUS9  [        U SS 5      (       a/  S[	        U R
                  R                  R                  5      S-
  -  OSU l        [        U R                  S-  S9U l
        [        U S5      (       a#  U R                  b  U R                  R                  OSU l        S	U l        g )
N)rN   rO   rQ   rP   rR   rS   rE   rN   r   r      )vae_scale_factorrP   M      )super__init__register_modulesgetattrrD   rN   configblock_out_channelsrV   r   image_processorr3   rP   model_max_lengthtokenizer_max_lengthdefault_sample_size)	selfrE   rN   rO   rP   rQ   rR   rS   rC   s	           r(   rZ   #FluxControlImg2ImgPipeline.__init__   s     	%)## 	 	
 W^^bdikoVpVpc$((//*L*L&MPQ&Q Rvw  1$BWBWZ[B[\/6t[/I/IdnnNhDNN++np 	! $' r*   Nr      promptnum_images_per_promptmax_sequence_lengthr8   dtypec           
         U=(       d    U R                   nU=(       d    U R                  R                  n[        U[        5      (       a  U/OUn[        U5      n[        U [        5      (       a  U R                  XR                  5      nU R                  USUSSSSS9nUR                  nU R                  USSS9R                  n	U	R                  S   UR                  S   :  ag  [        R                  " X5      (       dL  U R                  R                  U	S S 2U R                  S	-
  S24   5      n
[        R!                  S
U SU
 35        U R#                  UR%                  U5      SS9S   nU R"                  R                  nUR%                  XTS9nUR                  u  pnUR'                  S	US	5      nUR)                  Xb-  US5      nU$ )N
max_lengthTFpt)paddingrk   
truncationreturn_lengthreturn_overflowing_tokensreturn_tensorslongestrm   rq   r   zXThe following part of your input was truncated because `max_sequence_length` is set to  	 tokens: output_hidden_statesr   ri   r8   )_execution_devicerO   ri   
isinstancestrrD   r   maybe_convert_promptrR   	input_idsshapetorchequalbatch_decodera   loggerwarningrQ   torepeatview)rc   rf   rg   rh   r8   ri   
batch_sizetext_inputstext_input_idsuntruncated_idsremoved_textrM   _seq_lens                 r(   _get_t5_prompt_embeds0FluxControlImg2ImgPipeline._get_t5_prompt_embeds   s    14110**00'44&&[
d788..v7G7GHF&& *&+ ' 
 %..**69UY*Zdd  $(<(<R(@@UcIuIu++88DLeLehiLilnLnIn9opLNN'(	,A
 ++N,=,=f,E\a+bcde##))%((u(D%++A &,,Q0EqI%**:+MwXZ[r*   c           
      r   U=(       d    U R                   n[        U[        5      (       a  U/OUn[        U5      n[        U [        5      (       a  U R                  XR                  5      nU R                  USU R                  SSSSS9nUR                  nU R                  USSS9R                  nUR                  S   UR                  S   :  aq  [        R                  " Xg5      (       dV  U R                  R                  US S 2U R                  S	-
  S24   5      n[        R                  S
U R                   SU 35        U R                  UR!                  U5      SS9n	U	R"                  n	U	R!                  U R                  R$                  US9n	U	R'                  S	U5      n	U	R)                  XB-  S5      n	U	$ )Nrk   TFrl   )rm   rk   rn   rp   ro   rq   rr   rs   rt   r   z\The following part of your input was truncated because CLIP can only handle sequences up to ru   rv   rx   )ry   rz   r{   rD   r   r|   rP   ra   r}   r~   r   r   r   r   r   rO   r   pooler_outputri   r   r   )
rc   rf   rg   r8   r   r   r   r   r   rM   s
             r(   _get_clip_prompt_embeds2FluxControlImg2ImgPipeline._get_clip_prompt_embeds"  s    1411'44&&[
d788..v~~FFnn 00&+ % 
 %....SW.Xbb  $(<(<R(@@UcIuIu>>66q$JcJcfgJgjlJlGl7mnLNN--.i~G )).*;*;F*CZ_)` &33%((t/@/@/F/Fv(V &,,Q0EF%**:+MrRr*   prompt_2pooled_prompt_embeds
lora_scalec	                    U=(       d    U R                   nUbw  [        U [        5      (       ab  Xl        U R                  b!  [
        (       a  [        U R                  U5        U R                  b!  [
        (       a  [        U R                  U5        [        U[        5      (       a  U/OUnUcH  U=(       d    Un[        U[        5      (       a  U/OUnU R                  UUUS9nU R                  UUUUS9nU R                  b6  [        U [        5      (       a!  [
        (       a  [        U R                  U5        U R                  b6  [        U [        5      (       a!  [
        (       a  [        U R                  U5        U R                  b  U R                  R                  OU R                  R                  n	[        R                  " UR                   S   S5      R#                  X9S9n
XVU
4$ )a?  

Args:
    prompt (`str` or `list[str]`, *optional*):
        prompt to be encoded
    prompt_2 (`str` or `list[str]`, *optional*):
        The prompt or prompts to be sent to the `tokenizer_2` and `text_encoder_2`. If not defined, `prompt` is
        used in all text-encoders
    device: (`torch.device`):
        torch device
    num_images_per_prompt (`int`):
        number of images that should be generated per prompt
    prompt_embeds (`torch.FloatTensor`, *optional*):
        Pre-generated text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt weighting. If not
        provided, text embeddings will be generated from `prompt` input argument.
    pooled_prompt_embeds (`torch.FloatTensor`, *optional*):
        Pre-generated pooled text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt weighting.
        If not provided, pooled text embeddings will be generated from `prompt` input argument.
    lora_scale (`float`, *optional*):
        A lora scale that will be applied to all LoRA layers of the text encoder if LoRA layers are loaded.
)rf   r8   rg   )rf   rg   rh   r8   r   r	   r8   ri   )ry   rz   r   _lora_scalerO   r   r   rQ   r{   r   r   r   ri   rS   r   zerosr~   r   )rc   rf   r   r8   rg   rM   r   rh   r   ri   text_idss              r(   encode_prompt(FluxControlImg2ImgPipeline.encode_promptO  s   @ 1411 !j7J&K&K)   ,1A1A!$"3"3Z@"".3C3C!$"5"5zB'44&& )6H%/#%>%>zHH $(#?#?&; $@ $ 
 !66&;$7	 7 M ($ 3449I9I#D$5$5zB*$ 3449I9I#D$7$7D+/+<+<+H!!''dN^N^NdNd;;}2215q9<<F<XH<<r*   imager,   c                    [        U[        5      (       af  [        UR                  S   5       Vs/ s H-  n[	        U R
                  R                  XUS-    5      X#   S9PM/     nn[        R                  " USS9nO#[	        U R
                  R                  U5      US9nX@R
                  R                  R                  -
  U R
                  R                  R                  -  nU$ s  snf )Nr   r   r,   dim)rz   listranger~   r6   rN   encoder   catr]   shift_factorscaling_factor)rc   r   r,   iimage_latentss        r(   _encode_vae_image,FluxControlImg2ImgPipeline._encode_vae_image  s    i&& u{{1~..A !1q51A!Bil[.   "IIm;M,TXX__U-CyYM&)E)EEIgIggs   4C$c                 N   [        X-  U5      n[        [        X-
  S5      5      nU R                  R                  XPR                  R
                  -  S  n[        U R                  S5      (       a1  U R                  R                  XPR                  R
                  -  5        XaU-
  4$ )Nr   set_begin_index)minintmaxrE   r9   orderr3   r   )rc   r7   strengthr8   init_timestept_startr9   s          r(   get_timesteps(FluxControlImg2ImgPipeline.get_timesteps  s    /:<OPc-=qABNN,,W~~7K7K-K-MN	4>>#455NN**7^^5I5I+IJ777r*   c
           
        ^  US:  d  US:  a  [        SU 35      eUT R                  S-  -  S:w  d  UT R                  S-  -  S:w  a,  [        R                  ST R                  S-   SU SU S35        UbW  [	        U 4S	 jU 5       5      (       d=  [        S
T R
                   SU V
s/ s H  oT R
                  ;  d  M  U
PM     sn
 35      eUb  Ub  [        SU SU S35      eUb  Ub  [        SU SU S35      eUc  Uc  [        S5      eUbA  [        U[        5      (       d,  [        U[        5      (       d  [        S[        U5       35      eUbA  [        U[        5      (       d,  [        U[        5      (       d  [        S[        U5       35      eUb  Uc  [        S5      eU	b  U	S:  a  [        SU	 35      eg g s  sn
f )Nr   r   z2The value of strength should in [0.0, 1.0] but is r   z-`height` and `width` have to be divisible by z	 but are z and z(. Dimensions will be resized accordinglyc              3   @   >#    U  H  oTR                   ;   v   M     g 7fr"   )_callback_tensor_inputs).0krc   s     r(   	<genexpr>:FluxControlImg2ImgPipeline.check_inputs.<locals>.<genexpr>  s      F
7Y!---7Ys   z2`callback_on_step_end_tensor_inputs` has to be in z, but found zCannot forward both `prompt`: z and `prompt_embeds`: z2. Please make sure to only forward one of the two.z Cannot forward both `prompt_2`: zeProvide either `prompt` or `prompt_embeds`. Cannot leave both `prompt` and `prompt_embeds` undefined.z2`prompt` has to be of type `str` or `list` but is z4`prompt_2` has to be of type `str` or `list` but is zIf `prompt_embeds` are provided, `pooled_prompt_embeds` also have to be passed. Make sure to generate `pooled_prompt_embeds` from the same text encoder that was used to generate `prompt_embeds`.re   z8`max_sequence_length` cannot be greater than 512 but is )
r<   rV   r   r   allr   rz   r{   r   type)rc   rf   r   r   heightwidthrM   r   "callback_on_step_end_tensor_inputsrh   r   s   `          r(   check_inputs'FluxControlImg2ImgPipeline.check_inputs  s    a<8a<QRZQ[\]]T**Q./14AVAVYZAZ8[_`8`NN?@U@UXY@Y?ZZcdjckkpqvpw  x`  a .9# F
7YF
 C
 C
 DTEaEaDbbn  |^  pH  |^vw  ko  kG  kG  bGpq  |^  pH  oI  J  -";08N}o ^0 0  !m&?28*<RS`Ra b0 0  ^ 5w  FC)@)@TZ\`IaIaQRVW]R^Q_`aa!:h+D+DZX`bfMgMgSTXYaTbScdee$)=)E U  */BS/HWXkWlmnn 0I*7 pHs   (G?Gc                 ,   [         R                  " XS5      nUS   [         R                  " U5      S S 2S 4   -   US'   US   [         R                  " U5      S S S 24   -   US'   UR                  u  pgnUR	                  Xg-  U5      nUR                  X4S9$ )Nr	   ).r   ).r   r   )r   r   aranger~   reshaper   )	r   r   r   r8   ri   latent_image_idslatent_image_id_heightlatent_image_id_widthlatent_image_id_channelss	            r(   _prepare_latent_image_ids4FluxControlImg2ImgPipeline._prepare_latent_image_ids  s     !;;va8#3F#;ell6>RSTVZSZ>[#[ #3F#;ell5>QRVXYRY>Z#Z RbRhRhO7O+33":<T
  ""&">>r*   c                     U R                  XUS-  SUS-  S5      n U R                  SSSSSS5      n U R                  XS-  US-  -  US-  5      n U $ )Nr   r      r   r	      )r   permuter   )r2   r   num_channels_latentsr   r   s        r(   _pack_latents(FluxControlImg2ImgPipeline._pack_latents  sg     ,,z1aQVZ[Q[]^_//!Q1a3//*{uz.JL`cdLder*   c                    U R                   u  pEnS[        U5      US-  -  -  nS[        U5      US-  -  -  nU R                  XAS-  US-  US-  SS5      n U R                  SSSSSS5      n U R	                  XFS-  X5      n U $ )Nr   r   r   r	   r   r   )r~   r   r   r   r   )r2   r   r   rV   r   num_patcheschannelss          r(   _unpack_latents*FluxControlImg2ImgPipeline._unpack_latents  s     -4MM)
 c&k&6&:;<SZ$4q$89:,,zQ;
HPQMSTVWX//!Q1a3//*5.A6Qr*   c                 j   [        U	[        5      (       a*  [        U	5      U:w  a  [        S[        U	5       SU S35      eS[	        U5      U R
                  S-  -  -  nS[	        U5      U R
                  S-  -  -  nX4XV4nU R                  X5S-  US-  X5      nU
b  U
R                  XS9U4$ UR                  XS9nU R                  XS9nX=R                  S   :  a@  X=R                  S   -  S:X  a+  X=R                  S   -  n[        R                  " U/U-  SS9nO\X=R                  S   :  a4  X=R                  S   -  S:w  a  [        S	UR                  S    S
U S35      e[        R                  " U/SS9n[        XXS9nU R                  R                  XU5      n
U R                  XXEU5      n
X4$ )Nz/You have passed a list of generators of length z+, but requested an effective batch size of z@. Make sure the batch size matches the length of the generators.r   r   )r   r,   r   r   z'Cannot duplicate `image` of batch size z to z text prompts.)r,   r8   ri   )rz   r   rD   r<   r   rV   r   r   r   r~   r   r   r   rE   scale_noiser   )rc   r   timestepr   r   r   r   ri   r8   r,   r2   r~   r   r   additional_image_per_promptnoises                   r(   prepare_latents*FluxControlImg2ImgPipeline.prepare_latents  s    i&&3y>Z+GA#i.AQ R&<'gi  c&kd&;&;a&?@ASZD$9$9A$=>?6A99*PQkSX\]S]_em::V:9;KKK4..U.P++A..:@S@STU@V3VZ[3[*48K8KA8N*N'!II}o8S&SYZ[M--a00ZBUBUVWBX5X\]5]9-:M:Ma:P9QQUV`Uaaop  "II}o1=MUT..,,]eL$$W:NX]^((r*   c
                 <   [        U[        R                  5      (       a  OU R                  R	                  XUS9nUR
                  S   n
U
S:X  a  UnOUnUR                  USS9nUR                  XgS9nU(       a!  U	(       d  [        R                  " U/S-  5      nU$ )Nr   r   r   r   r   r   r   )	rz   r   Tensorr_   
preprocessr~   repeat_interleaver   r   )rc   r   r   r   r   rg   r8   ri   do_classifier_free_guidance
guess_modeimage_batch_size	repeat_bys               r(   prepare_image(FluxControlImg2ImgPipeline.prepare_imageH  s     eU\\**((33EPU3VE ;;q>q "I .I''	q'94&zIIugk*Er*   c                     U R                   $ r"   )_guidance_scalerc   s    r(   guidance_scale)FluxControlImg2ImgPipeline.guidance_scalej  s    ###r*   c                     U R                   $ r"   )_joint_attention_kwargsr   s    r(   joint_attention_kwargs1FluxControlImg2ImgPipeline.joint_attention_kwargsn  s    +++r*   c                     U R                   $ r"   )_num_timestepsr   s    r(   num_timesteps(FluxControlImg2ImgPipeline.num_timestepsr  s    """r*   c                     U R                   $ r"   )
_interruptr   s    r(   	interrupt$FluxControlImg2ImgPipeline.interruptv  s    r*   g333333?   g      @pilTcontrol_imager   r   r   r7   r:   r   output_typereturn_dictr   callback_on_step_endr   c                 r   U=(       d    U R                   U R                  -  nU=(       d    U R                   U R                  -  nU R                  UUUUUUUUUS9	  Xl        UU l        SU l        U R                  R                  X5US9nUR                  [        R                  S9nUb  [        U[        5      (       a  SnO3Ub!  [        U[        5      (       a  [        U5      nOUR                  S   nU R                   nU R"                  b  U R"                  R%                  SS5      OSnU R'                  UUUUUUUUS	9u  nnnU	c  [(        R*                  " S
SU-  U5      OU	n	[-        U5      U R                  -  S-  [-        U5      U R                  -  S-  -  n[/        UU R0                  R2                  R%                  SS5      U R0                  R2                  R%                  SS5      U R0                  R2                  R%                  SS5      U R0                  R2                  R%                  SS5      5      n[4        (       a  SnOUn[7        U R0                  UUU	US9u  nnU R9                  XU5      u  nnUS:  a  [;        SU SU S35      eUSS R=                  UU-  5      nU R>                  R2                  R@                  S-  n U RC                  UUUUU-  UUU RD                  RF                  S9nURH                  S:X  a  U RD                  RK                  U5      RL                  RO                  US9nX@RD                  R2                  RP                  -
  U RD                  R2                  RR                  -  nUR                  SS u  n!n"U RU                  UUU-  U U!U"5      nU RW                  UUUU-  U UUURF                  UUU5
      u  nn#[Y        [        U5      XR0                  RZ                  -  -
  S5      n$[        U5      U l.        U R>                  R2                  R^                  (       aE  [        R`                  " S/U
U[        R                  S9n%U%Rc                  UR                  S   5      n%OSn%U Re                  US9 n&[g        U5       GH  u  n'n(U Rh                  (       a  M  [        Rj                  " X/SS9n)U(Rc                  UR                  S   5      R                  URF                  5      n*U R?                  U)U*S -  U%UUUU#U R"                  SS!9	S   n+URF                  n,U R0                  Rm                  U+U(USS"9S   nURF                  U,:w  a>  [        Rn                  Rp                  Rs                  5       (       a  UR                  U,5      nUbJ  0 n-U H  n.[u        5       U.   U-U.'   M     U" U U'U(U-5      n/U/Rw                  S#U5      nU/Rw                  S$U5      nU'[        U5      S-
  :X  d)  U'S-   U$:  a0  U'S-   U R0                  RZ                  -  S:X  a  U&Ry                  5         [4        (       d  GM  [z        R|                  " 5         GM     SSS5        US%:X  a  UnOU R                  XX`R                  5      nXRD                  R2                  RR                  -  U RD                  R2                  RP                  -   nU RD                  R                  USS"9S   nU R                  R                  UUS&9nU R                  5         U(       d  U4$ [        US'9$ ! , (       d  f       N= f)(a  
Function invoked when calling the pipeline for generation.

Args:
    prompt (`str` or `list[str]`, *optional*):
        The prompt or prompts to guide the image generation. If not defined, one has to pass `prompt_embeds`.
        instead.
    prompt_2 (`str` or `list[str]`, *optional*):
        The prompt or prompts to be sent to `tokenizer_2` and `text_encoder_2`. If not defined, `prompt` is
        will be used instead
    image (`torch.Tensor`, `PIL.Image.Image`, `np.ndarray`, `list[torch.Tensor]`, `list[PIL.Image.Image]`, or `list[np.ndarray]`):
        `Image`, numpy array or tensor representing an image batch to be used as the starting point. For both
        numpy array and pytorch tensor, the expected value range is between `[0, 1]` If it's a tensor or a list
        or tensors, the expected shape should be `(B, C, H, W)` or `(C, H, W)`. If it is a numpy array or a
        list of arrays, the expected shape should be `(B, H, W, C)` or `(H, W, C)` It can also accept image
        latents as `image`, but if passing latents directly it is not encoded again.
    control_image (`torch.Tensor`, `PIL.Image.Image`, `np.ndarray`, `list[torch.Tensor]`, `list[PIL.Image.Image]`, `list[np.ndarray]`,:
            `list[list[torch.Tensor]]`, `list[list[np.ndarray]]` or `list[list[PIL.Image.Image]]`):
        The ControlNet input condition to provide guidance to the `unet` for generation. If the type is
        specified as `torch.Tensor`, it is passed to ControlNet as is. `PIL.Image.Image` can also be accepted
        as an image. The dimensions of the output image defaults to `image`'s dimensions. If height and/or
        width are passed, `image` is resized accordingly. If multiple ControlNets are specified in `init`,
        images must be passed as a list such that each element of the list can be correctly batched for input
        to a single ControlNet.
    height (`int`, *optional*, defaults to self.unet.config.sample_size * self.vae_scale_factor):
        The height in pixels of the generated image. This is set to 1024 by default for the best results.
    width (`int`, *optional*, defaults to self.unet.config.sample_size * self.vae_scale_factor):
        The width in pixels of the generated image. This is set to 1024 by default for the best results.
    strength (`float`, *optional*, defaults to 1.0):
        Indicates extent to transform the reference `image`. Must be between 0 and 1. `image` is used as a
        starting point and more noise is added the higher the `strength`. The number of denoising steps depends
        on the amount of noise initially added. When `strength` is 1, added noise is maximum and the denoising
        process runs for the full number of iterations specified in `num_inference_steps`. A value of 1
        essentially ignores `image`.
    num_inference_steps (`int`, *optional*, defaults to 50):
        The number of denoising steps. More denoising steps usually lead to a higher quality image at the
        expense of slower inference.
    sigmas (`list[float]`, *optional*):
        Custom sigmas to use for the denoising process with schedulers which support a `sigmas` argument in
        their `set_timesteps` method. If not defined, the default behavior when `num_inference_steps` is passed
        will be used.
    guidance_scale (`float`, *optional*, defaults to 7.0):
        Guidance scale as defined in [Classifier-Free Diffusion
        Guidance](https://huggingface.co/papers/2207.12598). `guidance_scale` is defined as `w` of equation 2.
        of [Imagen Paper](https://huggingface.co/papers/2205.11487). Guidance scale is enabled by setting
        `guidance_scale > 1`. Higher guidance scale encourages to generate images that are closely linked to
        the text `prompt`, usually at the expense of lower image quality.
    num_images_per_prompt (`int`, *optional*, defaults to 1):
        The number of images to generate per prompt.
    generator (`torch.Generator` or `list[torch.Generator]`, *optional*):
        One or a list of [torch generator(s)](https://pytorch.org/docs/stable/generated/torch.Generator.html)
        to make generation deterministic.
    latents (`torch.FloatTensor`, *optional*):
        Pre-generated noisy latents, sampled from a Gaussian distribution, to be used as inputs for image
        generation. Can be used to tweak the same generation with different prompts. If not provided, a latents
        tensor will be generated by sampling using the supplied random `generator`.
    prompt_embeds (`torch.FloatTensor`, *optional*):
        Pre-generated text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt weighting. If not
        provided, text embeddings will be generated from `prompt` input argument.
    pooled_prompt_embeds (`torch.FloatTensor`, *optional*):
        Pre-generated pooled text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt weighting.
        If not provided, pooled text embeddings will be generated from `prompt` input argument.
    output_type (`str`, *optional*, defaults to `"pil"`):
        The output format of the generate image. Choose between
        [PIL](https://pillow.readthedocs.io/en/stable/): `PIL.Image.Image` or `np.array`.
    return_dict (`bool`, *optional*, defaults to `True`):
        Whether or not to return a [`~pipelines.flux.FluxPipelineOutput`] instead of a plain tuple.
    joint_attention_kwargs (`dict`, *optional*):
        A kwargs dictionary that if specified is passed along to the `AttentionProcessor` as defined under
        `self.processor` in
        [diffusers.models.attention_processor](https://github.com/huggingface/diffusers/blob/main/src/diffusers/models/attention_processor.py).
    callback_on_step_end (`Callable`, *optional*):
        A function that calls at the end of each denoising steps during the inference. The function is called
        with the following arguments: `callback_on_step_end(self: DiffusionPipeline, step: int, timestep: int,
        callback_kwargs: Dict)`. `callback_kwargs` will include a list of all tensors as specified by
        `callback_on_step_end_tensor_inputs`.
    callback_on_step_end_tensor_inputs (`list`, *optional*):
        The list of tensor inputs for the `callback_on_step_end` function. The tensors specified in the list
        will be passed as `callback_kwargs` argument. You will only be able to include variables listed in the
        `._callback_tensor_inputs` attribute of your pipeline class.
    max_sequence_length (`int` defaults to 512): Maximum sequence length to use with the `prompt`.

Examples:

Returns:
    [`~pipelines.flux.FluxPipelineOutput`] or `tuple`: [`~pipelines.flux.FluxPipelineOutput`] if `return_dict`
    is True, otherwise a `tuple`. When returning a tuple, the first element is a list with the generated
    images.
)rM   r   r   rh   Fr   )ri   Nr   r   scale)rf   r   rM   r   r8   rg   rh   r   g      ?r   base_image_seq_len   max_image_seq_len   r         ?r    ffffff?cpu)r:   r'   z?After adjusting the num_inference_steps by strength parameter: z!, the number of pipelinesteps is z4 which is < 1 and not appropriate for this pipeline.rU   )r   r   r   r   rg   r8   ri   r   r   r   )totalr   i  )	hidden_statesr   guidancepooled_projectionsencoder_hidden_statestxt_idsimg_idsr   r  )r  r2   rM   latent)r  )images)Drb   rV   r   r   r   r   r_   r   r   r   float32rz   r{   r   rD   r~   ry   r   getr   nplinspacer   r)   rE   r]   XLA_AVAILABLErI   r   r<   r   rS   in_channelsr   rN   ri   ndimr   r/   r0   r   r   r   r   r   r   r   guidance_embedsfullexpandprogress_bar	enumerater   r   stepbackendsmpsis_availablelocalspopupdatexm	mark_stepr   decodepostprocessmaybe_free_model_hooksr   )0rc   rf   r   r   r   r   r   r   r7   r:   r   rg   r,   r2   rM   r   r  r  r   r  r   rh   
init_imager   r8   r   r   r$   r'   timestep_devicer9   latent_timestepr   height_control_imagewidth_control_imager   num_warmup_stepsr  r   r   tlatent_model_inputr   
noise_predlatents_dtypecallback_kwargsr   callback_outputss0                                                   r(   __call__#FluxControlImg2ImgPipeline.__call__z  s   h K433d6K6KKI11D4I4II 	'!5/Q 3 	 
	
  .'=$ ))44UQV4W
]]]7
 *VS"9"9JJvt$<$<VJ&,,Q/J'' ?C>Y>Y>eD''++GT:ko 	 '!5"7 3!  	
		
  TZSaS!&9"9;NOgmV(=(==Bs5zUYUjUjGjnoGopNN!!%%&:C@NN!!%%&94@NN!!%%lC8NN!!%%k48
 =#O$O);NN*
&	& *.););<O[a)b&	&"QRZQ[ \/00df  $BQ-..z<Q/QR  $//66BBaG**!$99"7((.. + 
 " HHOOM:FFMMXaMbM*XX__-I-IITXX__MkMkkM8E8K8KAB8O5 "5 ..22$$#M %)$8$8.. %
!! s9~0CnnFZFZ0ZZ\]^!)n ""22zz1#~fEMMZHw}}Q'78HH %89\!),1>>%*YY/GQ%O" 88GMM!$4588G!--"4%_%';*7$,+/+F+F % . 
 

 !(..--j!WRW-XYZ[==M1~~))6688")**]";'3&(O?-3Xa[* @';D!Q'X$.229gFG$4$8$8-$XM I**A9I/IqSTuX\XfXfXlXlNlpqNq '') =LLNY - :^ ("E **7ECXCXYG!?!??488??C_C__GHHOOGO?BE((44U4TE 	##%8O!//} :9s   F4\(\((
\6)	r   r   r   r   r   rb   r_   ra   rV   )Nr   re   NN)r   N)NNr   NNre   NNNNNr"   )FF)6__name__
__module____qualname____firstlineno____doc__model_cpu_offload_seq_optional_componentsr   r   r   r   r   r   r   r   rZ   r{   r   r   r   r8   ri   r   r   FloatTensorfloatr   r   	Generatorr   r   r   staticmethodr   r   r   r   r   propertyr   r   r   r   no_gradr   EXAMPLE_DOC_STRINGr
   booldictr   r   r:  __static_attributes____classcell__)rC   s   @r(   rK   rK      sj   4 M(/:'2' ' $	'
 !' '' %' ,'B #'%&#&&*$(/d3i/  #/ !	/
 t#/ {{T!/j &'&*	*d3i*  #* t#	*` ,0&*%&269=#&#'M=d3iM= S	/D(M= t#	M=
  #M= ((4/M= $//$6M= !M= DLM=`u||  	8$ !+/ 4ol ? ?    4 -)r %* D $ $ , , # #   ]]_12 #'+/$(,0! #%%) #,-DH,0269="' 8<BF9B#&-u0d3iu0 S	/D(u0 "	u0
 *u0 d
u0 Tzu0 u0 !u0 Ud"u0 u0  #Tzu0 ??T%//%::TAu0 ""T)u0 ((4/u0  $//$6!u0" 4Z#u0$ %u0& !%S#X 5'u0( 'Sz4'784?)u0* -1I+u0, !-u0 3 u0r*   rK   )r  r	  r
  r  )Nr0   r<  ):r>   typingr   r   numpyr  r   transformersr   r   r   r   r_   r
   r   loadersr   r   r   models.autoencodersr   models.transformersr   
schedulersr   utilsr   r   r   r   r   r   utils.torch_utilsr   pipeline_utilsr   pipeline_outputr   torch_xla.core.xla_modelcore	xla_modelr)  r  
get_loggerr=  r   rJ  r   rE  r)   r   rF  r{   r6   r8   r   rI   rK   r#   r*   r(   <module>r^     sW        V V D \ \ 0 9 9  . . / ))MM 
		H	%% V 

 
 	

 
 `h
TLL
T-2__t-C
TY\
T  '+(,"&!%8*t8* %,,%8* Cy4	8*
 K$8*v0!24GI\ 0r*   