
    
3j              
          S SK r S SKJrJr  S SKrS SKJrJrJrJ	r	  SSK
Jr  SSKJrJr  SSKJrJr  SSKJr  SS	KJr  SS
KJr  SSKJrJrJrJrJrJr  SSKJ r   SSK!J"r"  SSK#J$r$  SSK%J&r&  \" 5       (       a  S SK'J(s  J)r*  Sr+OSr+\RX                  " \-5      r.Sr/    SS\0S-  S\1\Rd                  -  S-  S\3\0   S-  S\3\4   S-  4S jjr5 " S S\"\\\&5      r6g)    N)AnyCallable)CLIPTextModelWithProjectionCLIPTokenizerT5EncoderModelT5TokenizerFast   )VaeImageProcessor)FromSingleFileMixinSD3LoraLoaderMixin)PAGCFGJointAttnProcessor2_0PAGJointAttnProcessor2_0)AutoencoderKL)SD3Transformer2DModel)FlowMatchEulerDiscreteScheduler)USE_PEFT_BACKENDis_torch_xla_availableloggingreplace_example_docstringscale_lora_layersunscale_lora_layers)randn_tensor   )DiffusionPipeline)StableDiffusion3PipelineOutput   )PAGMixinTFa^  
    Examples:
        ```py
        >>> import torch
        >>> from diffusers import AutoPipelineForText2Image

        >>> pipe = AutoPipelineForText2Image.from_pretrained(
        ...     "stabilityai/stable-diffusion-3-medium-diffusers",
        ...     torch_dtype=torch.float16,
        ...     enable_pag=True,
        ...     pag_applied_layers=["blocks.13"],
        ... )
        >>> pipe.to("cuda")
        >>> prompt = "A cat holding a sign that says hello world"
        >>> image = pipe(prompt, guidance_scale=5.0, pag_scale=0.7).images[0]
        >>> image.save("sd3_pag.png")
        ```
num_inference_stepsdevice	timestepssigmasc                    Ub  Ub  [        S5      eUb  S[        [        R                  " U R                  5      R
                  R                  5       5      ;   nU(       d  [        SU R                   S35      eU R                  " S
X2S.UD6  U R                  n[        U5      nX14$ Ub  S[        [        R                  " U R                  5      R
                  R                  5       5      ;   nU(       d  [        SU R                   S35      eU R                  " S
XBS.UD6  U R                  n[        U5      nX14$ U R                  " U4S	U0UD6  U R                  nX14$ )a  
Calls the scheduler's `set_timesteps` method and retrieves timesteps from the scheduler after the call. Handles
custom timesteps. Any kwargs will be supplied to `scheduler.set_timesteps`.

Args:
    scheduler (`SchedulerMixin`):
        The scheduler to get timesteps from.
    num_inference_steps (`int`):
        The number of diffusion steps used when generating samples with a pre-trained model. If used, `timesteps`
        must be `None`.
    device (`str` or `torch.device`, *optional*):
        The device to which the timesteps should be moved to. If `None`, the timesteps are not moved.
    timesteps (`list[int]`, *optional*):
        Custom timesteps used to override the timestep spacing strategy of the scheduler. If `timesteps` is passed,
        `num_inference_steps` and `sigmas` must be `None`.
    sigmas (`list[float]`, *optional*):
        Custom sigmas used to override the timestep spacing strategy of the scheduler. If `sigmas` is passed,
        `num_inference_steps` and `timesteps` must be `None`.

Returns:
    `tuple[torch.Tensor, int]`: A tuple where the first element is the timestep schedule from the scheduler and the
    second element is the number of inference steps.
zYOnly one of `timesteps` or `sigmas` can be passed. Please choose one to set custom valuesr    zThe current scheduler class zx's `set_timesteps` does not support custom timestep schedules. Please check whether you are using the correct scheduler.)r    r   r!   zv's `set_timesteps` does not support custom sigmas schedules. Please check whether you are using the correct scheduler.)r!   r   r    )

ValueErrorsetinspect	signatureset_timesteps
parameterskeys	__class__r    len)	schedulerr   r   r    r!   kwargsaccepts_timestepsaccept_sigmass           c/home/wildlama/miniconda3/lib/python3.13/site-packages/diffusers/pipelines/pag/pipeline_pag_sd_3.pyretrieve_timestepsr2   M   s}   > !3tuu'3w/@/@AXAX/Y/d/d/i/i/k+ll .y/B/B.C Da b  	M)MfM''	!)n )) 
	 C(9(9):Q:Q(R(](](b(b(d$ee.y/B/B.C D_ `  	GvGG''	!)n )) 	 3MFMfM''	))    c            9       z  ^  \ rS rSrSrSr/ r/ SQr SFS\S\	S\
S\S	\S
\S\S\S\S\\\   -  4U 4S jjjr     SGS\\\   -  S\S\S\R(                  S-  S\R*                  S-  4
S jjr    SHS\\\   -  S\S\R(                  S-  S\S-  S\4
S jjr             SIS\\\   -  S\\\   -  S\\\   -  S\R(                  S-  S\S\S \\\   -  S-  S!\\\   -  S-  S"\\\   -  S-  S#\R2                  S-  S$\R2                  S-  S%\R2                  S-  S&\R2                  S-  S\S-  S\S'\S-  4 S( jjr         SJS) jr SKS* jr\S+ 5       r\S, 5       r \S- 5       r!\S. 5       r"\S/ 5       r#\S0 5       r$\RJ                  " 5       \&" \'5      SSSSSS1SS2SSSSSSSSSSS3SSSSS4/SS5S64S\\\   -  S\\\   -  S-  S\\\   -  S-  S7\S-  S8\S-  S9\S:\\   S-  S;\S \\\   -  S-  S!\\\   -  S-  S"\\\   -  S-  S\S-  S<\RP                  \\RP                     -  S-  S4\R2                  S-  S#\R2                  S-  S$\R2                  S-  S%\R2                  S-  S&\R2                  S-  S=\S-  S>\S?\)\\*4   S-  S\S-  S@\+\\/S4   S-  SA\\   S\SB\SC\46SD jj5       5       r,SEr-U =r.$ )LStableDiffusion3PAGPipeline   a\  
[PAG pipeline](https://huggingface.co/docs/diffusers/main/en/using-diffusers/pag) for text-to-image generation
using Stable Diffusion 3.

Args:
    transformer ([`SD3Transformer2DModel`]):
        Conditional Transformer (MMDiT) architecture to denoise the encoded image latents.
    scheduler ([`FlowMatchEulerDiscreteScheduler`]):
        A scheduler to be used in combination with `transformer` to denoise the encoded image latents.
    vae ([`AutoencoderKL`]):
        Variational Auto-Encoder (VAE) Model to encode and decode images to and from latent representations.
    text_encoder ([`CLIPTextModelWithProjection`]):
        [CLIP](https://huggingface.co/docs/transformers/model_doc/clip#transformers.CLIPTextModelWithProjection),
        specifically the [clip-vit-large-patch14](https://huggingface.co/openai/clip-vit-large-patch14) variant,
        with an additional added projection layer that is initialized with a diagonal matrix with the `hidden_size`
        as its dimension.
    text_encoder_2 ([`CLIPTextModelWithProjection`]):
        [CLIP](https://huggingface.co/docs/transformers/model_doc/clip#transformers.CLIPTextModelWithProjection),
        specifically the
        [laion/CLIP-ViT-bigG-14-laion2B-39B-b160k](https://huggingface.co/laion/CLIP-ViT-bigG-14-laion2B-39B-b160k)
        variant.
    text_encoder_3 ([`T5EncoderModel`]):
        Frozen text-encoder. Stable Diffusion 3 uses
        [T5](https://huggingface.co/docs/transformers/model_doc/t5#transformers.T5EncoderModel), specifically the
        [t5-v1_1-xxl](https://huggingface.co/google/t5-v1_1-xxl) variant.
    tokenizer (`CLIPTokenizer`):
        Tokenizer of class
        [CLIPTokenizer](https://huggingface.co/docs/transformers/v4.21.0/en/model_doc/clip#transformers.CLIPTokenizer).
    tokenizer_2 (`CLIPTokenizer`):
        Second Tokenizer of class
        [CLIPTokenizer](https://huggingface.co/docs/transformers/v4.21.0/en/model_doc/clip#transformers.CLIPTokenizer).
    tokenizer_3 (`T5TokenizerFast`):
        Tokenizer of class
        [T5Tokenizer](https://huggingface.co/docs/transformers/model_doc/t5#transformers.T5Tokenizer).
z>text_encoder->text_encoder_2->text_encoder_3->transformer->vae)latentsprompt_embedsnegative_prompt_embedsnegative_pooled_prompt_embedstransformerr-   vaetext_encoder	tokenizertext_encoder_2tokenizer_2text_encoder_3tokenizer_3pag_applied_layersc                   > [         TU ]  5         U R                  UUUUUUU	UUS9	  [        U SS 5      (       a/  S[	        U R
                  R                  R                  5      S-
  -  OSU l        [        U R                  S9U l
        [        U S5      (       a#  U R                  b  U R                  R                  OSU l        [        U S	5      (       a-  U R                  b   U R                  R                  R                   OS
U l        [        U S	5      (       a-  U R                  b   U R                  R                  R$                  OSU l        U R'                  U
[)        5       [+        5       4S9  g )N)	r<   r=   r?   rA   r>   r@   rB   r;   r-   r<   r   r      )vae_scale_factorr>   M   r;      )pag_attn_processors)super__init__register_modulesgetattrr,   r<   configblock_out_channelsrF   r
   image_processorhasattrr>   model_max_lengthtokenizer_max_lengthr;   sample_sizedefault_sample_size
patch_sizeset_pag_applied_layersr   r   )selfr;   r-   r<   r=   r>   r?   r@   rA   rB   rC   r+   s              r1   rK   $StableDiffusion3PAGPipeline.__init__   sX    	%))### 	 
	
 W^^bdikoVpVpc$((//*L*L&MPQ&Q Rvw0$BWBWX/6t[/I/IdnnNhDNN++np 	!
 t]++0@0@0L ##// 	  3:$2N2NSWScScSoD##..uv 	 	##5P5RTlTn4o 	$ 	
r3   Nr      promptnum_images_per_promptmax_sequence_lengthr   dtypec           	         U=(       d    U R                   nU=(       d    U R                  R                  n[        U[        5      (       a  U/OUn[        U5      nU R                  c9  [        R                  " Xb-  UU R                  R                  R                  4UUS9$ U R                  USUSSSS9nUR                  nU R                  USSS9R                  n	U	R                  S   UR                  S   :  ag  [        R                  " X5      (       dL  U R                  R!                  U	S S 2U R"                  S	-
  S24   5      n
[$        R'                  S
U SU
 35        U R                  UR)                  U5      5      S   nU R                  R                  nUR)                  XTS9nUR                  u  pnUR+                  S	US	5      nUR-                  Xb-  US5      nU$ )Nr   r^   
max_lengthTpt)paddingra   
truncationadd_special_tokensreturn_tensorslongestrc   rf   r   zXThe following part of your input was truncated because `max_sequence_length` is set to  	 tokens: r   r^   r   )_execution_devicer=   r^   
isinstancestrr,   rA   torchzerosr;   rN   joint_attention_dimrB   	input_idsshapeequalbatch_decoderS   loggerwarningtorepeatview)rX   r[   r\   r]   r   r^   
batch_sizetext_inputstext_input_idsuntruncated_idsremoved_textr8   _seq_lens                 r1   _get_t5_prompt_embeds1StableDiffusion3PAGPipeline._get_t5_prompt_embeds   s    14110**00'44&&[
&;;6'$$++??
   && *# ' 
 %..**69UY*Zdd  $(<(<R(@@UcIuIu++88DLeLehiLilnLnIn9opLNN'(	,A
 ++N,=,=f,EFqI##))%((u(D%++A &,,Q0EqI%**:+MwXZ[r3   	clip_skipclip_model_indexc                    U=(       d    U R                   nU R                  U R                  /nU R                  U R                  /nXe   nXu   n	[        U[        5      (       a  U/OUn[        U5      n
U" USU R                  SSS9nUR                  nU" USSS9R                  nUR                  S   UR                  S   :  ag  [        R                  " X5      (       dL  UR                  US S 2U R                  S-
  S24   5      n[        R                  S	U R                   S
U 35        U	" UR!                  U5      SS9nUS   nUc  UR"                  S   nOUR"                  US-   *    nUR!                  U R                  R$                  US9nUR                  u  nnnUR'                  SUS5      nUR)                  X-  US5      nUR'                  SU5      nUR)                  X-  S5      nUU4$ )Nra   Trb   )rc   ra   rd   rf   rg   rh   ri   r   z\The following part of your input was truncated because CLIP can only handle sequences up to rj   )output_hidden_statesr   r   rk   )rl   r>   r@   r=   r?   rm   rn   r,   rS   rr   rs   ro   rt   ru   rv   rw   rx   hidden_statesr^   ry   rz   )rX   r[   r\   r   r   r   clip_tokenizersclip_text_encodersr>   r=   r{   r|   r}   r~   r   r8   pooled_prompt_embedsr   r   s                      r1   _get_clip_prompt_embeds3StableDiffusion3PAGPipeline._get_clip_prompt_embeds  s    1411>>4+;+;<"//1D1DE#5	);'44&&[
 00
 %..#FIdS]]  $(<(<R(@@UcIuIu$11/!TE^E^abEbegEgBg2hiLNN--.i~G %^%6%6v%>UYZ,Q/)77;M)77)a-8HIM%((t/@/@/F/Fv(V%++7A%,,Q0EqI%**:+MwXZ[3::1>ST3889[]_`222r3   Tprompt_2prompt_3do_classifier_free_guidancenegative_promptnegative_prompt_2negative_prompt_3r8   r9   r   r:   
lora_scalec                 V   U=(       d    U R                   nUbx  [        U [        5      (       ac  UU l        U R                  b!  [
        (       a  [        U R                  U5        U R                  b!  [
        (       a  [        U R                  U5        [        U[        5      (       a  U/OUnUb  [        U5      nOU
R                  S   nU
Gc  U=(       d    Un[        U[        5      (       a  U/OUnU=(       d    Un[        U[        5      (       a  U/OUnU R                  UUUUSS9u  nnU R                  UUUUSS9u  nn[        R                  " UU/SS9nU R                  UUUUS9n[        R                  R                   R#                  USUR                  S   UR                  S   -
  45      n[        R                  " UU/SS9n
[        R                  " UU/SS9nU(       Ga  UGc  U=(       d    S	nU=(       d    UnU	=(       d    Un	[        U[        5      (       a  UU/-  OUn[        U[        5      (       a  UU/-  OUn[        U	[        5      (       a  UU	/-  OU	n	Ub;  [%        U5      [%        U5      La$  ['        S
[%        U5       S[%        U5       S35      eU[        U5      :w  a!  [)        SU S[        U5       SU SU S3	5      eU R                  UUUSSS9u  nnU R                  UUUSSS9u  nn[        R                  " UU/SS9nU R                  U	UUUS9n[        R                  R                   R#                  USUR                  S   UR                  S   -
  45      n[        R                  " UU/SS9n[        R                  " UU/SS9nU R                  b6  [        U [        5      (       a!  [
        (       a  [+        U R                  U5        U R                  b6  [        U [        5      (       a!  [
        (       a  [+        U R                  U5        XX4$ )a  

Args:
    prompt (`str` or `list[str]`, *optional*):
        prompt to be encoded
    prompt_2 (`str` or `list[str]`, *optional*):
        The prompt or prompts to be sent to the `tokenizer_2` and `text_encoder_2`. If not defined, `prompt` is
        used in all text-encoders
    prompt_3 (`str` or `list[str]`, *optional*):
        The prompt or prompts to be sent to the `tokenizer_3` and `text_encoder_3`. If not defined, `prompt` is
        used in all text-encoders
    device: (`torch.device`):
        torch device
    num_images_per_prompt (`int`):
        number of images that should be generated per prompt
    do_classifier_free_guidance (`bool`):
        whether to use classifier free guidance or not
    negative_prompt (`str` or `list[str]`, *optional*):
        The prompt or prompts not to guide the image generation. If not defined, one has to pass
        `negative_prompt_embeds` instead. Ignored when not using guidance (i.e., ignored if `guidance_scale` is
        less than `1`).
    negative_prompt_2 (`str` or `list[str]`, *optional*):
        The prompt or prompts not to guide the image generation to be sent to `tokenizer_2` and
        `text_encoder_2`. If not defined, `negative_prompt` is used in all the text-encoders.
    negative_prompt_3 (`str` or `list[str]`, *optional*):
        The prompt or prompts not to guide the image generation to be sent to `tokenizer_3` and
        `text_encoder_3`. If not defined, `negative_prompt` is used in all the text-encoders.
    prompt_embeds (`torch.FloatTensor`, *optional*):
        Pre-generated text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt weighting. If not
        provided, text embeddings will be generated from `prompt` input argument.
    negative_prompt_embeds (`torch.FloatTensor`, *optional*):
        Pre-generated negative text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt
        weighting. If not provided, negative_prompt_embeds will be generated from `negative_prompt` input
        argument.
    pooled_prompt_embeds (`torch.FloatTensor`, *optional*):
        Pre-generated pooled text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt weighting.
        If not provided, pooled text embeddings will be generated from `prompt` input argument.
    negative_pooled_prompt_embeds (`torch.FloatTensor`, *optional*):
        Pre-generated negative pooled text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt
        weighting. If not provided, pooled negative_prompt_embeds will be generated from `negative_prompt`
        input argument.
    clip_skip (`int`, *optional*):
        Number of layers to be skipped from CLIP while computing the prompt embeddings. A value of 1 means that
        the output of the pre-final layer will be used for computing the prompt embeddings.
    lora_scale (`float`, *optional*):
        A lora scale that will be applied to all LoRA layers of the text encoder if LoRA layers are loaded.
Nr   )r[   r   r\   r   r   r   ri   dim)r[   r\   r]   r   r    z?`negative_prompt` should be the same type to `prompt`, but got z != .z`negative_prompt`: z has batch size z, but `prompt`: zT. Please make sure that passed `negative_prompt` matches the batch size of `prompt`.)r   r\   r   r   )rl   rm   r   _lora_scaler=   r   r   r?   rn   r,   rs   r   ro   catr   nn
functionalpadtype	TypeErrorr$   r   )rX   r[   r   r   r   r\   r   r   r   r   r8   r9   r   r:   r   r]   r   r{   prompt_embedpooled_prompt_embedprompt_2_embedpooled_prompt_2_embedclip_prompt_embedst5_prompt_embednegative_prompt_embednegative_pooled_prompt_embednegative_prompt_2_embednegative_pooled_prompt_2_embednegative_clip_prompt_embedst5_negative_prompt_embeds                                 r1   encode_prompt)StableDiffusion3PAGPipeline.encode_promptO  s   D 1411 !j7I&J&J)D   ,1A1A!$"3"3Z@"".3C3C!$"5"5zB'44&&VJ&,,Q/J )6H%/#%>%>zHH)6H%/#%>%>zHH040L0L&;#!" 1M 1-L- 594P4P&;#!" 5Q 51N1 "'L.+Ir!R"88&;$7	 9 O "'!4!4!8!8"Q(=(=b(ADVD\D\]_D`(`$a" "II'9?&KQSTM#(99.ACX-Y_a#b &+A+I-3O 1 D_ 1 D_ AK?\_@`@`jO+<<fuO4>?PRU4V4V
/00\m  5??PRU4V4V
/00\m  !d6l$:O&OUVZ[jVkUl mV~Q(  s?33 )/)::J3K_J` ax/
| <33  CGB^B^&;!" C_ C?!#? GKFbFb!&;!" Gc GC#%C +0))5JLc4djl*m''+'A'A(&;$7	 (B ($ +0((*=*=*A*A+,22269T9Z9Z[]9^^_+'
 &+YY0KMe/fln%o",1II-/MNTV-) ($ 2338H8H#D$5$5zB*$ 2338H8H#D$7$7D6Jiir3   c                   ^  UT R                   T R                  -  -  S:w  d   UT R                   T R                  -  -  S:w  aj  [        ST R                   T R                  -   SU SU SXDT R                   T R                  -  -  -
   SXUT R                   T R                  -  -  -
   S35      eUbW  [        U 4S jU 5       5      (       d=  [        S	T R                   S
U Vs/ s H  oT R                  ;  d  M  UPM     sn 35      eUb  U	b  [        SU SU	 S35      eUb  U	b  [        SU SU	 S35      eUb  U	b  [        SU SU	 S35      eUc  U	c  [        S5      eUbA  [        U[        5      (       d,  [        U[        5      (       d  [        S[        U5       35      eUbA  [        U[        5      (       d,  [        U[        5      (       d  [        S[        U5       35      eUbA  [        U[        5      (       d,  [        U[        5      (       d  [        S[        U5       35      eUb  U
b  [        SU SU
 S35      eUb  U
b  [        SU SU
 S35      eUb  U
b  [        SU SU
 S35      eU	bC  U
b@  U	R                  U
R                  :w  a&  [        SU	R                   SU
R                   S35      eU	b  Uc  [        S5      eU
b  Uc  [        S5      eUb  US:  a  [        SU 35      eg g s  snf )Nr   z-`height` and `width` have to be divisible by z	 but are z and z.You can use height z and width r   c              3   @   >#    U  H  oTR                   ;   v   M     g 7fN)_callback_tensor_inputs).0krX   s     r1   	<genexpr>;StableDiffusion3PAGPipeline.check_inputs.<locals>.<genexpr>+  s      F
7Y!---7Ys   z2`callback_on_step_end_tensor_inputs` has to be in z, but found zCannot forward both `prompt`: z and `prompt_embeds`: z2. Please make sure to only forward one of the two.z Cannot forward both `prompt_2`: z Cannot forward both `prompt_3`: zeProvide either `prompt` or `prompt_embeds`. Cannot leave both `prompt` and `prompt_embeds` undefined.z2`prompt` has to be of type `str` or `list` but is z4`prompt_2` has to be of type `str` or `list` but is z4`prompt_3` has to be of type `str` or `list` but is z'Cannot forward both `negative_prompt`: z and `negative_prompt_embeds`: z)Cannot forward both `negative_prompt_2`: z)Cannot forward both `negative_prompt_3`: zu`prompt_embeds` and `negative_prompt_embeds` must have the same shape when passed directly, but got: `prompt_embeds` z != `negative_prompt_embeds` zIf `prompt_embeds` are provided, `pooled_prompt_embeds` also have to be passed. Make sure to generate `pooled_prompt_embeds` from the same text encoder that was used to generate `prompt_embeds`.zIf `negative_prompt_embeds` are provided, `negative_pooled_prompt_embeds` also have to be passed. Make sure to generate `negative_pooled_prompt_embeds` from the same text encoder that was used to generate `negative_prompt_embeds`.i   z8`max_sequence_length` cannot be greater than 512 but is )
rF   rV   r$   allr   rm   rn   listr   rs   )rX   r[   r   r   heightwidthr   r   r   r8   r9   r   r:   "callback_on_step_end_tensor_inputsr]   r   s   `               r1   check_inputs(StableDiffusion3PAGPipeline.check_inputs  s{   $ d++doo=>!C--?@AE?@U@UX\XgXg@g?hhqrxqyy~  @E  F F&&,9N9NQUQ`Q`9`/a&a%bbmns  @D  @U  @U  X\  Xg  Xg  @g  wh  oh  ni  ijk 
 .9# F
7YF
 C
 C
 DTEaEaDbbn  |^  pH  |^vw  ko  kG  kG  bGpq  |^  pH  oI  J  -";08N}o ^0 0  !m&?28*<RS`Ra b0 0  !m&?28*<RS`Ra b0 0  ^ 5w  FC)@)@TZ\`IaIaQRVW]R^Q_`aa!:h+D+DZX`bfMgMgSTXYaTbScdee!:h+D+DZX`bfMgMgSTXYaTbScdee&+A+M9/9J K*++]_  */E/Q;<M;N O*++]_  */E/Q;<M;N O*++]_ 
 $)?)K""&<&B&BB --:-@-@,A B.445Q8  $)=)E U  "-2O2W y  */BS/HWXkWlmnn 0I* pHs    K47K4c	                     Ub  UR                  XeS9$ UU[        U5      U R                  -  [        U5      U R                  -  4n	[        U[        5      (       a*  [        U5      U:w  a  [        S[        U5       SU S35      e[        XXeS9nU$ )Nr`   z/You have passed a list of generators of length z+, but requested an effective batch size of z@. Make sure the batch size matches the length of the generators.)	generatorr   r^   )rx   intrF   rm   r   r,   r$   r   )
rX   r{   num_channels_latentsr   r   r^   r   r   r7   rs   s
             r1   prepare_latents+StableDiffusion3PAGPipeline.prepare_latentsr  s     ::V:99  K4000J$///	
 i&&3y>Z+GA#i.AQ R&<'gi 
 u&Vr3   c                     U R                   $ r   _guidance_scalerX   s    r1   guidance_scale*StableDiffusion3PAGPipeline.guidance_scale  s    ###r3   c                     U R                   $ r   )
_clip_skipr   s    r1   r   %StableDiffusion3PAGPipeline.clip_skip      r3   c                      U R                   S:  $ )Nr   r   r   s    r1   r   7StableDiffusion3PAGPipeline.do_classifier_free_guidance  s    ##a''r3   c                     U R                   $ r   )_joint_attention_kwargsr   s    r1   joint_attention_kwargs2StableDiffusion3PAGPipeline.joint_attention_kwargs  s    +++r3   c                     U R                   $ r   )_num_timestepsr   s    r1   num_timesteps)StableDiffusion3PAGPipeline.num_timesteps  s    """r3   c                     U R                   $ r   )
_interruptr   s    r1   	interrupt%StableDiffusion3PAGPipeline.interrupt  r   r3      g      @pilr7   g      @g        r   r   r   r!   r   r   output_typereturn_dictr   callback_on_step_endr   	pag_scalepag_adaptive_scalec                 ^   U=(       d    U R                   U R                  -  nU=(       d    U R                   U R                  -  nU R                  UUUUUU	U
UUUUUUUS9  Xl        UU l        UU l        SU l        UU l        UU l        Ub  [        U[        5      (       a  SnO3Ub!  [        U[        5      (       a  [        U5      nOUR                  S   nU R                  nU R                  b  U R                  R!                  SS5      OSnU R#                  UUUU	U
UU R$                  UUUUUU R&                  UUUS9u  nnnnU R(                  (       a;  U R+                  UUU R$                  5      nU R+                  UUU R$                  5      nO?U R$                  (       a.  [,        R.                  " UU/SS9n[,        R.                  " UU/SS9n[0        (       a  S	nOUn[3        U R4                  UUUS
9u  n n[7        [        U 5      X`R4                  R8                  -  -
  S5      n![        U 5      U l        U R<                  R>                  R@                  n"U RC                  UU-  U"UUURD                  UUU5      nU R(                  (       a:  U R<                  RF                  n#U RI                  U RJ                  U R$                  S9  U RM                  US9 n$[O        U 5       GH\  u  n%n&U RP                  (       a  M  [,        R.                  " U/UR                  S   UR                  S   -  -  5      n'U&RS                  U'R                  S   5      n(U R=                  U'U(UUU R                  SS9S   n)U R(                  (       a)  U RU                  U)U R$                  U RV                  U&5      n)O:U R$                  (       a)  U)RY                  S5      u  n*n+U*U RV                  U+U*-
  -  -   n)URD                  n,U R4                  R[                  U)U&USS9S   nURD                  U,:w  a>  [,        R\                  R^                  Ra                  5       (       a  URc                  U,5      nUbn  0 n-U H  n.[e        5       U.   U-U.'   M     U" U U%U&U-5      n/U/Rg                  SU5      nU/Rg                  SU5      nU/Rg                  SU5      nU/Rg                  SU5      nU%[        U 5      S-
  :X  d)  U%S-   U!:  a0  U%S-   U R4                  R8                  -  S:X  a  U$Ri                  5         [0        (       d  GMG  [j        Rl                  " 5         GM_     SSS5        US:X  a  Un0OzXRn                  R>                  Rp                  -  U Rn                  R>                  Rr                  -   nU Rn                  Ru                  USS9S   n0U Rv                  Ry                  U0US9n0U R{                  5         U R(                  (       a  U R<                  R}                  W#5        U(       d  U04$ [        U0S9$ ! , (       d  f       N= f)a  
Function invoked when calling the pipeline for generation.

Args:
    prompt (`str` or `list[str]`, *optional*):
        The prompt or prompts to guide the image generation. If not defined, one has to pass `prompt_embeds`.
        instead.
    prompt_2 (`str` or `list[str]`, *optional*):
        The prompt or prompts to be sent to `tokenizer_2` and `text_encoder_2`. If not defined, `prompt` is
        will be used instead
    prompt_3 (`str` or `list[str]`, *optional*):
        The prompt or prompts to be sent to `tokenizer_3` and `text_encoder_3`. If not defined, `prompt` is
        will be used instead
    height (`int`, *optional*, defaults to self.unet.config.sample_size * self.vae_scale_factor):
        The height in pixels of the generated image. This is set to 1024 by default for the best results.
    width (`int`, *optional*, defaults to self.unet.config.sample_size * self.vae_scale_factor):
        The width in pixels of the generated image. This is set to 1024 by default for the best results.
    num_inference_steps (`int`, *optional*, defaults to 50):
        The number of denoising steps. More denoising steps usually lead to a higher quality image at the
        expense of slower inference.
    sigmas (`list[float]`, *optional*):
        Custom sigmas to use for the denoising process with schedulers which support a `sigmas` argument in
        their `set_timesteps` method. If not defined, the default behavior when `num_inference_steps` is passed
        will be used.
    guidance_scale (`float`, *optional*, defaults to 7.0):
        Guidance scale as defined in [Classifier-Free Diffusion
        Guidance](https://huggingface.co/papers/2207.12598). `guidance_scale` is defined as `w` of equation 2.
        of [Imagen Paper](https://huggingface.co/papers/2205.11487). Guidance scale is enabled by setting
        `guidance_scale > 1`. Higher guidance scale encourages to generate images that are closely linked to
        the text `prompt`, usually at the expense of lower image quality.
    negative_prompt (`str` or `list[str]`, *optional*):
        The prompt or prompts not to guide the image generation. If not defined, one has to pass
        `negative_prompt_embeds` instead. Ignored when not using guidance (i.e., ignored if `guidance_scale` is
        less than `1`).
    negative_prompt_2 (`str` or `list[str]`, *optional*):
        The prompt or prompts not to guide the image generation to be sent to `tokenizer_2` and
        `text_encoder_2`. If not defined, `negative_prompt` is used instead
    negative_prompt_3 (`str` or `list[str]`, *optional*):
        The prompt or prompts not to guide the image generation to be sent to `tokenizer_3` and
        `text_encoder_3`. If not defined, `negative_prompt` is used instead
    num_images_per_prompt (`int`, *optional*, defaults to 1):
        The number of images to generate per prompt.
    generator (`torch.Generator` or `list[torch.Generator]`, *optional*):
        One or a list of [torch generator(s)](https://pytorch.org/docs/stable/generated/torch.Generator.html)
        to make generation deterministic.
    latents (`torch.FloatTensor`, *optional*):
        Pre-generated noisy latents, sampled from a Gaussian distribution, to be used as inputs for image
        generation. Can be used to tweak the same generation with different prompts. If not provided, a latents
        tensor will be generated by sampling using the supplied random `generator`.
    prompt_embeds (`torch.FloatTensor`, *optional*):
        Pre-generated text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt weighting. If not
        provided, text embeddings will be generated from `prompt` input argument.
    negative_prompt_embeds (`torch.FloatTensor`, *optional*):
        Pre-generated negative text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt
        weighting. If not provided, negative_prompt_embeds will be generated from `negative_prompt` input
        argument.
    pooled_prompt_embeds (`torch.FloatTensor`, *optional*):
        Pre-generated pooled text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt weighting.
        If not provided, pooled text embeddings will be generated from `prompt` input argument.
    negative_pooled_prompt_embeds (`torch.FloatTensor`, *optional*):
        Pre-generated negative pooled text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt
        weighting. If not provided, pooled negative_prompt_embeds will be generated from `negative_prompt`
        input argument.
    output_type (`str`, *optional*, defaults to `"pil"`):
        The output format of the generate image. Choose between
        [PIL](https://pillow.readthedocs.io/en/stable/): `PIL.Image.Image` or `np.array`.
    return_dict (`bool`, *optional*, defaults to `True`):
        Whether or not to return a [`~pipelines.stable_diffusion_xl.StableDiffusionXLPipelineOutput`] instead
        of a plain tuple.
    joint_attention_kwargs (`dict`, *optional*):
        A kwargs dictionary that if specified is passed along to the `AttentionProcessor` as defined under
        `self.processor` in
        [diffusers.models.attention_processor](https://github.com/huggingface/diffusers/blob/main/src/diffusers/models/attention_processor.py).
    callback_on_step_end (`Callable`, *optional*):
        A function that calls at the end of each denoising steps during the inference. The function is called
        with the following arguments: `callback_on_step_end(self: DiffusionPipeline, step: int, timestep: int,
        callback_kwargs: Dict)`. `callback_kwargs` will include a list of all tensors as specified by
        `callback_on_step_end_tensor_inputs`.
    callback_on_step_end_tensor_inputs (`list`, *optional*):
        The list of tensor inputs for the `callback_on_step_end` function. The tensors specified in the list
        will be passed as `callback_kwargs` argument. You will only be able to include variables listed in the
        `._callback_tensor_inputs` attribute of your pipeline class.
    max_sequence_length (`int` defaults to 256): Maximum sequence length to use with the `prompt`.
    pag_scale (`float`, *optional*, defaults to 3.0):
        The scale factor for the perturbed attention guidance. If it is set to 0.0, the perturbed attention
        guidance will not be used.
    pag_adaptive_scale (`float`, *optional*, defaults to 0.0):
        The adaptive scale factor for the perturbed attention guidance. If it is set to 0.0, `pag_scale` is
        used.

Examples:

Returns:
    [`~pipelines.stable_diffusion_3.StableDiffusion3PipelineOutput`] or `tuple`:
    [`~pipelines.stable_diffusion_3.StableDiffusion3PipelineOutput`] if `return_dict` is True, otherwise a
    `tuple`. When returning a tuple, the first element is a list with the generated images.
)	r   r   r   r8   r9   r   r:   r   r]   FNr   r   scale)r[   r   r   r   r   r   r   r8   r9   r   r:   r   r   r\   r]   r   r   cpu)r!   )rC   r   )total)r   timestepencoder_hidden_statespooled_projectionsr   r   r   )r   r7   r8   r9   r:   latent)r   )images)@rU   rF   r   r   r   r   r   
_pag_scale_pag_adaptive_scalerm   rn   r   r,   rs   rl   r   getr   r   r   do_perturbed_attention_guidance%_prepare_perturbed_attention_guidancero   r   XLA_AVAILABLEr2   r-   maxorderr   r;   rN   in_channelsr   r^   attn_processors_set_pag_attn_processorrC   progress_bar	enumerater   expand#_apply_perturbed_attention_guidancer   chunkstepbackendsmpsis_availablerx   localspopupdatexm	mark_stepr<   scaling_factorshift_factordecoderP   postprocessmaybe_free_model_hooksset_attn_processorr   )1rX   r[   r   r   r   r   r   r!   r   r   r   r   r\   r   r7   r8   r9   r   r:   r   r   r   r   r   r   r]   r   r   r{   r   r   timestep_devicer    num_warmup_stepsr   original_attn_procr   itlatent_model_inputr   
noise_prednoise_pred_uncondnoise_pred_textlatents_dtypecallback_kwargsr   callback_outputsimages1                                                    r1   __call__$StableDiffusion3PAGPipeline.__call__  s#   D K433d6K6KKI11D4I4II 	+//'#9!5*G/Q 3 	 	
"  .#'=$##5  *VS"9"9JJvt$<$<VJ&,,Q/J'' ?C>Y>Y>eD''++GT:ko 	 +//(,(H(H'#9!5*Gnn"7 3!!  
	
" )( // FF5t7W7WM $(#M#M$&CTEeEe$  --!II'=}&MSTUM#(99.KMa-bhi#j  =#O$O);NN/*
&	& s9~0CnnFZFZ0ZZ\]^!)n  $//66BB&&.. 	
 //!%!1!1!A!A((#'#:#:,0,L,L )  %89\!),1>> &+YYyM<O<OPQ<RV]VcVcdeVf<f/g%h"88$6$<$<Q$?@!--"4%*7';+/+F+F % .  
 77!%!I!I"D$D$DdFYFY[\"J 559C9I9I!9L6%!2T5H5HO^oLo5p!pJ !(..--j!WRW-XYZ[==M1~~))6688")**]";'3&(O?-3Xa[* @';D!Q'X$.229gFG$4$8$8-$XM-=-A-ABZ\r-s*4D4H4H79V51
 I**A9I/IqSTuX\XfXfXlXlNlpqNq '') =LLNo - :t ("E !?!??488??C_C__GHHOOGO?BE((44U4TE 	##%////0BC8O-U;;Y :9s   IX*X
X,)r   r   r   r   r   r   r   r   rU   rP   rV   rS   rF   )zblocks.1)Nr   rZ   NN)r   NNr   )Nr   TNNNNNNNNrZ   N)	NNNNNNNNNr   )/__name__
__module____qualname____firstlineno____doc__model_cpu_offload_seq_optional_componentsr   r   r   r   r   r   r   r   rn   r   rK   r   ro   r   r^   r   r   boolFloatTensorfloatr   r   r   propertyr   r   r   r   r   r   no_gradr   EXAMPLE_DOC_STRING	Generatordictr   r   r  __static_attributes____classcell__)r+   s   @r1   r5   r5      s   "H ]u /9*
**
 3*
 	*

 2*
 !*
 4*
 #*
 '*
 %*
  $s)O*
 *
^ #'%&#&&*$(6d3i6  #6 !	6
 t#6 {{T!6x &'&* $ !53d3i53  #53 t#	53
 :53 53z '+%&,026484826;?9=BF $#&#'#jd3ij S	/j S	/	j
 t#j  #j &*j tCy4/j c?T1j c?T1j ((4/j !& 1 1D 8j $//$6j (-'8'84'?j :j  !!j" DL#jR #!&*+/ ^oT > $ $   ( ( , , # #   ]]_12 #'+/+/! #%%) #264848,-DH,026;?9=BF"' 8< $BF9B#&$'9x<d3ix< S	/D(x< S	/D(	x<
 d
x< Tzx< !x< Ud"x< x< tCy4/x< c?T1x< c?T1x<  #Tzx< ??T%//%::TAx< ""T)x<  ((4/!x<" !& 1 1D 8#x<$ $//$6%x<& (-'8'84'?'x<( 4Z)x<* +x<, !%S#X 5-x<. :/x<0 'Sz4'784?1x<2 -1I3x<4 !5x<6 7x<8 "9x< 3 x<r3   r5   )NNNN)7r&   typingr   r   ro   transformersr   r   r   r   rP   r
   loadersr   r   models.attention_processorr   r   models.autoencodersr   models.transformersr   
schedulersr   utilsr   r   r   r   r   r   utils.torch_utilsr   pipeline_utilsr   "stable_diffusion_3.pipeline_outputr   	pag_utilsr   torch_xla.core.xla_modelcore	xla_modelr  r   
get_loggerr  rv   r'  r   rn   r   r   r$  r2   r5   r#   r3   r1   <module>r<     s         1 > _ 0 8 9  . . O  ))MM 
		H	% . '+(,"&!%8*t8* %,,%8* Cy4	8*
 K$8*v^<"35GI\^f ^<r3   