
    
3j7                         S SK Jr  S SKrSSKJrJr  SSKJr  SSKJ	r	J
r
JrJr  SSKJr  SS	KJrJr  \
" 5       (       a  S SKJs  Jr  S
rOSr\R.                  " \5      rSrSS jr " S S\5      rg)    )CallableN   )UNet2DConditionModelVQModel)DDPMScheduler)	deprecateis_torch_xla_availableloggingreplace_example_docstring)randn_tensor   )DiffusionPipelineImagePipelineOutputTFae  
    Examples:
        ```py
        >>> from diffusers import KandinskyV22Pipeline, KandinskyV22PriorPipeline
        >>> import torch

        >>> pipe_prior = KandinskyV22PriorPipeline.from_pretrained("kandinsky-community/kandinsky-2-2-prior")
        >>> pipe_prior.to("cuda")
        >>> prompt = "red cat, 4k photo"
        >>> out = pipe_prior(prompt)
        >>> image_emb = out.image_embeds
        >>> zero_image_emb = out.negative_image_embeds
        >>> pipe = KandinskyV22Pipeline.from_pretrained("kandinsky-community/kandinsky-2-2-decoder")
        >>> pipe.to("cuda")
        >>> image = pipe(
        ...     image_embeds=image_emb,
        ...     negative_image_embeds=zero_image_emb,
        ...     height=768,
        ...     width=768,
        ...     num_inference_steps=50,
        ... ).images
        >>> image[0].save("cat.png")
        ```
c                 n    XS-  -  nXS-  -  S:w  a  US-  nXS-  -  nXS-  -  S:w  a  US-  nX2-  XB-  4$ )Nr   r       )heightwidthscale_factor
new_height	new_widths        p/home/wildlama/miniconda3/lib/python3.13/site-packages/diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2.pydownscale_height_and_widthr   >   s[    ?*Ja1$a
q(IQ!#Q	$i&>>>    c                     ^  \ rS rSrSrSr/ SQrS\S\S\	4U 4S jjr
S	 r\S
 5       r\S 5       r\S 5       r\R"                  " 5       \" \5      SSSSSSSSSSS/4S\R(                  \\R(                     -  S\R(                  \\R(                     -  S\S\S\S\S\S\R0                  \\R0                     -  S-  S\R(                  S-  S\S-  S\S\\\/S4   S-  S \\   4S! jj5       5       rS"rU =r$ )#KandinskyV22PipelineH   af  
Pipeline for text-to-image generation using Kandinsky

This model inherits from [`DiffusionPipeline`]. Check the superclass documentation for the generic methods the
library implements for all the pipelines (such as downloading or saving, running on a particular device, etc.)

Args:
    scheduler (`DDIMScheduler` | `DDPMScheduler`):
        A scheduler to be used in combination with `unet` to generate image latents.
    unet ([`UNet2DConditionModel`]):
        Conditional U-Net architecture to denoise the image embedding.
    movq ([`VQModel`]):
        MoVQ Decoder to generate the image from the latents.
z
unet->movq)latentsimage_embedsnegative_image_embedsunet	schedulermovqc                    > [         TU ]  5         U R                  UUUS9  S[        U R                  R
                  R                  5      S-
  -  U l        g )N)r!   r"   r#   r   r   )super__init__register_moduleslenr#   configblock_out_channelsmovq_scale_factor)selfr!   r"   r#   	__class__s       r   r&   KandinskyV22Pipeline.__init__[   sV     	 	 	

 "#s499+;+;+N+N'ORS'S!Tr   c                     Uc  [        XX2S9nO<UR                  U:w  a  [        SUR                   SU 35      eUR                  U5      nXVR                  -  nU$ )N)	generatordevicedtypezUnexpected latents shape, got z, expected )r   shape
ValueErrortoinit_noise_sigma)r,   r3   r2   r1   r0   r   r"   s          r   prepare_latents$KandinskyV22Pipeline.prepare_latentsk   s`    ?"5fZG}}% #A'--P[\a[b!cddjj(G666r   c                     U R                   $ N_guidance_scaler,   s    r   guidance_scale#KandinskyV22Pipeline.guidance_scalev   s    ###r   c                      U R                   S:  $ )Nr   r;   r=   s    r   do_classifier_free_guidance0KandinskyV22Pipeline.do_classifier_free_guidancez   s    ##a''r   c                     U R                   $ r:   )_num_timestepsr=   s    r   num_timesteps"KandinskyV22Pipeline.num_timesteps~   s    """r   i   d   g      @r   NpilTr   r   r    r   r   num_inference_stepsr>   num_images_per_promptr0   output_typereturn_dictcallback_on_step_end"callback_on_step_end_tensor_inputsc           
      V
  ^  UR                  SS5      nUR                  SS5      nUb  [        SSS5        Ub  [        SSS5        UbX  [        U 4S jU 5       5      (       d>  [        ST R                   S	U Vs/ s H  nUT R                  ;  d  M  UPM     sn 35      eT R
                  nUT l        [        U[        5      (       a  [        R                  " US
S9nUR                  S
   U-  n[        U[        5      (       a  [        R                  " US
S9nT R                  (       aX  UR                  US
S9nUR                  US
S9n[        R                  " X!/S
S9R                  T R                  R                   US9nT R"                  R%                  UUS9  T R"                  R&                  nT R                  R(                  R*                  n[-        X4T R.                  5      u  p4T R1                  UUX44UR                   UUU	T R"                  5      n	[3        U5      T l        [7        T R9                  U5      5       GH  u  nnT R                  (       a  [        R                  " U	/S-  5      OU	nSU0nT R                  UUSUSS9S
   nT R                  (       at  UR;                  U	R                  S   SS9u  nnUR=                  S5      u  nnUR=                  S5      u  nnUT R>                  UU-
  -  -   n[        R                  " UU/SS9n[A        T R"                  R(                  S5      (       a$  T R"                  R(                  RB                  S;   d   UR;                  U	R                  S   SS9u  nnT R"                  RE                  UUU	US9S
   n	Ub\  0 n U H  n[G        5       U   U U'   M     U" T UUU 5      n!U!R                  SU	5      n	U!R                  SU5      nU!R                  SU5      nUb-  UU-  S
:X  a$  U[I        T R"                  SS5      -  n"U" U"UU	5        [J        (       d  GM  [L        RN                  " 5         GM     U
S;  a  [        SU
 35      eU
S:X  d  T RP                  RS                  U	SS9S   n#U
S;   aX  U#S -  S -   n#U#RU                  S
S5      n#U#RW                  5       RY                  S
SS!S5      R[                  5       R]                  5       n#U
S":X  a  T R_                  U#5      n#OU	n#T Ra                  5         U(       d  U#4$ [c        U#S#9$ s  snf )$a  
Function invoked when calling the pipeline for generation.

Args:
    image_embeds (`torch.Tensor` or `list[torch.Tensor]`):
        The clip image embeddings for text prompt, that will be used to condition the image generation.
    negative_image_embeds (`torch.Tensor` or `list[torch.Tensor]`):
        The clip image embeddings for negative text prompt, will be used to condition the image generation.
    height (`int`, *optional*, defaults to 512):
        The height in pixels of the generated image.
    width (`int`, *optional*, defaults to 512):
        The width in pixels of the generated image.
    num_inference_steps (`int`, *optional*, defaults to 100):
        The number of denoising steps. More denoising steps usually lead to a higher quality image at the
        expense of slower inference.
    guidance_scale (`float`, *optional*, defaults to 4.0):
        Guidance scale as defined in [Classifier-Free Diffusion
        Guidance](https://huggingface.co/papers/2207.12598). `guidance_scale` is defined as `w` of equation 2.
        of [Imagen Paper](https://huggingface.co/papers/2205.11487). Guidance scale is enabled by setting
        `guidance_scale > 1`. Higher guidance scale encourages to generate images that are closely linked to
        the text `prompt`, usually at the expense of lower image quality.
    num_images_per_prompt (`int`, *optional*, defaults to 1):
        The number of images to generate per prompt.
    generator (`torch.Generator` or `list[torch.Generator]`, *optional*):
        One or a list of [torch generator(s)](https://pytorch.org/docs/stable/generated/torch.Generator.html)
        to make generation deterministic.
    latents (`torch.Tensor`, *optional*):
        Pre-generated noisy latents, sampled from a Gaussian distribution, to be used as inputs for image
        generation. Can be used to tweak the same generation with different prompts. If not provided, a latents
        tensor will be generated by sampling using the supplied random `generator`.
    output_type (`str`, *optional*, defaults to `"pil"`):
        The output format of the generate image. Choose between: `"pil"` (`PIL.Image.Image`), `"np"`
        (`np.array`) or `"pt"` (`torch.Tensor`).
    return_dict (`bool`, *optional*, defaults to `True`):
        Whether or not to return a [`~pipelines.ImagePipelineOutput`] instead of a plain tuple.
    callback_on_step_end (`Callable`, *optional*):
        A function that calls at the end of each denoising steps during the inference. The function is called
        with the following arguments: `callback_on_step_end(self: DiffusionPipeline, step: int, timestep: int,
        callback_kwargs: Dict)`. `callback_kwargs` will include a list of all tensors as specified by
        `callback_on_step_end_tensor_inputs`.
    callback_on_step_end_tensor_inputs (`list`, *optional*):
        The list of tensor inputs for the `callback_on_step_end` function. The tensors specified in the list
        will be passed as `callback_kwargs` argument. You will only be able to include variables listed in the
        `._callback_tensor_inputs` attribute of your pipeline class.

Examples:

Returns:
    [`~pipelines.ImagePipelineOutput`] or `tuple`
callbackNcallback_stepsz1.0.0zhPassing `callback` as an input argument to `__call__` is deprecated, consider use `callback_on_step_end`znPassing `callback_steps` as an input argument to `__call__` is deprecated, consider use `callback_on_step_end`c              3   @   >#    U  H  oTR                   ;   v   M     g 7fr:   )_callback_tensor_inputs).0kr,   s     r   	<genexpr>0KandinskyV22Pipeline.__call__.<locals>.<genexpr>   s      F
7Y!---7Ys   z2`callback_on_step_end_tensor_inputs` has to be in z, but found r   )dim)r2   r1   )r1   r   r   F)sampletimestepencoder_hidden_statesadded_cond_kwargsrL   r   variance_type)learnedlearned_range)r0   r   r    order)ptnprH   latentzIOnly the output types `pt`, `pil` and `np` are supported not output_type=rc   T)force_not_quantizerY   )rb   rH   g      ?r   rH   )images)2popr   allr4   rS   _execution_devicer<   
isinstancelisttorchcatr3   rA   repeat_interleaver5   r!   r2   r"   set_timesteps	timestepsr)   in_channelsr   r+   r7   r(   rD   	enumerateprogress_barsplitchunkr>   hasattrr]   steplocalsgetattrXLA_AVAILABLExm	mark_stepr#   decodeclampcpupermutefloatnumpynumpy_to_pilmaybe_free_model_hooksr   )$r,   r   r    r   r   rI   r>   rJ   r0   r   rK   rL   rM   rN   kwargsrP   rQ   rU   r1   
batch_sizero   num_channels_latentsitlatent_model_inputr\   
noise_predvariance_prednoise_pred_uncondnoise_pred_text_variance_pred_textcallback_kwargscallback_outputsstep_idximages$   `                                   r   __call__KandinskyV22Pipeline.__call__   s   L ::j$/$4d;z
 %  A .9# F
7YF
 C
 C
 DTEaEaDbbn  |^  pH  |^vw  bc  ko  kG  kG  bGpq  |^  pH  oI  J  ''-lD)) 99\q9L!''*-BB
+T22$)II.C$K!++'99:OUV9WL$9$K$KLagh$K$i! 99&;%JPQRUUiioof V L 	$$%8$HNN,,	#yy//;;26$BXBXY &&-v=NN
 ")nd//	:;DAq=A=]=]G9q=!9cj!/ >)&*"3! #  J //,6,<,<W]]1=MST,<,U)
M5?5E5Ea5H2!?(5(;(;A(>%%.1D1DZkHk1ll
"YY
4F'GQO
 --??NN))77;WW * 0 0q1Aq 0 I
A nn))#	 * 
 G $/"$;A)/!OA& <#7aO#T *..y'B/33NLQ(8(<(<=TVk(l%#N(:a(? CC1g.}c <f ;;hithuvwwh&II$$W$FxPEm+c)Aq)		++Aq!Q7==?EEGe#))%0E##%8O"%00_ pHs   :T&T&)r<   rD   r+   )__name__
__module____qualname____firstlineno____doc__model_cpu_offload_seqrS   r   r   r   r&   r7   propertyr>   rA   rE   rk   no_gradr   EXAMPLE_DOC_STRINGTensorrj   intr   	Generatorstrboolr   r   __static_attributes____classcell__)r-   s   @r   r   r   H   s    )RU"U !U 	U 	 $ $ ( ( # # ]]_12
 #& #%&DH'+"' BF9BG1llT%,,%77G1  %||d5<<.@@G1 	G1
 G1 !G1 G1  #G1 ??T%//%::TAG1 $G1 4ZG1 G1 'Sz4'784?G1 -1IG1 3 G1r   r   )   )typingr   rk   modelsr   r   
schedulersr   utilsr   r	   r
   r   utils.torch_utilsr   pipeline_utilsr   r   torch_xla.core.xla_modelcore	xla_modelrz   ry   
get_loggerr   loggerr   r   r   r   r   r   <module>r      sf      3 ' Z Z - C ))MM			H	% 4?C1, C1r   