
    
3jf                         S SK Jr  S SKrS SKJrJr  SSKJr  SSKJ	r	  SSK
JrJrJrJr  SSKJr  S	S
KJr  S	SKJrJrJr  \" 5       (       a  S SKJs  Jr  SrOSr\R8                  " \5      rSr " S S\\5      r g)    )CallableN)CLIPTextModelWithProjectionCLIPTokenizer   )StableCascadeUNet)DDPMWuerstchenScheduler)is_torch_versionis_torch_xla_availableloggingreplace_example_docstring)randn_tensor   )PaellaVQModel)DeprecatedPipelineMixinDiffusionPipelineImagePipelineOutputTFa  
    Examples:
        ```py
        >>> import torch
        >>> from diffusers import StableCascadePriorPipeline, StableCascadeDecoderPipeline

        >>> prior_pipe = StableCascadePriorPipeline.from_pretrained(
        ...     "stabilityai/stable-cascade-prior", torch_dtype=torch.bfloat16
        ... ).to("cuda")
        >>> gen_pipe = StableCascadeDecoderPipeline.from_pretrain(
        ...     "stabilityai/stable-cascade", torch_dtype=torch.float16
        ... ).to("cuda")

        >>> prompt = "an image of a shiba inu, donning a spacesuit and helmet"
        >>> prior_output = pipe(prompt)
        >>> images = gen_pipe(prior_output.image_embeddings, prompt=prompt)
        ```
c            $       $  ^  \ rS rSrSrSrSrSrSr/ SQr	 S-S\
S\S\S	\S
\S\SS4U 4S jjjrS r      S.S\R&                  S-  S\R&                  S-  S\R&                  S-  S\R&                  S-  4S jjr    S/S jr\S 5       r\S 5       r\S 5       rS r\R6                  " 5       \" \5      SSSSSSSSSSSSSSS/4S \R&                  \\R&                     -  S!\\\   -  S"\ S#\S$\\\   -  S-  S\R&                  S-  S\R&                  S-  S\R&                  S-  S\R&                  S-  S%\ S&\RB                  \\RB                     -  S-  S\R&                  S-  S'\S-  S(\"S)\#\ \ /S4   S-  S*\\   4 S+ jj5       5       r$S,r%U =r&$ )0StableCascadeDecoderPipeline:   a  
Pipeline for generating images from the Stable Cascade model.

This model inherits from [`DiffusionPipeline`]. Check the superclass documentation for the generic methods the
library implements for all the pipelines (such as downloading or saving, running on a particular device, etc.)

Args:
    tokenizer (`CLIPTokenizer`):
        The CLIP tokenizer.
    text_encoder (`CLIPTextModelWithProjection`):
        The CLIP text encoder.
    decoder ([`StableCascadeUNet`]):
        The Stable Cascade decoder unet.
    vqgan ([`PaellaVQModel`]):
        The VQGAN model.
    scheduler ([`DDPMWuerstchenScheduler`]):
        A scheduler to be used in combination with `prior` to generate image embedding.
    latent_dim_scale (float, `optional`, defaults to 10.67):
        Multiplier to determine the VQ latent space size from the image embeddings. If the image embeddings are
        height=24 and width=24, the VQ latent shape needs to be height=int(24*10.67)=256 and
        width=int(24*10.67)=256 in order to match the training conditions.
z0.35.2decodertext_encoderztext_encoder->decoder->vqgan)latentsprompt_embeds_poolednegative_prompt_embedsimage_embeddings	tokenizer	schedulervqganlatent_dim_scalereturnNc                 f   > [         TU ]  5         U R                  UUUUUS9  U R                  US9  g )N)r   r   r   r   r   )r   )super__init__register_modulesregister_to_config)selfr   r   r   r   r   r   	__class__s          t/home/wildlama/miniconda3/lib/python3.13/site-packages/diffusers/pipelines/stable_cascade/pipeline_stable_cascade.pyr#   %StableCascadeDecoderPipeline.__init__^   sF     	% 	 	
 	1AB    c	                 `   UR                   u  ppX-  S[        XR                  R                  -  5      [        XR                  R                  -  5      4nUc  [	        XXTS9nO<UR                   U:w  a  [        SUR                    SU 35      eUR                  U5      nXxR                  -  nU$ )N   )	generatordevicedtypezUnexpected latents shape, got z, expected )shapeintconfigr   r   
ValueErrortoinit_noise_sigma)r&   
batch_sizer   num_images_per_promptr/   r.   r-   r   r   _channelsheightwidthlatents_shapes                 r(   prepare_latents,StableCascadeDecoderPipeline.prepare_latentsq   s     &6%;%;"V.55564445	
 ?"=fbG}}- #A'--P[\i[j!klljj(G666r*   prompt_embedsr   r   negative_prompt_embeds_pooledc                    UGc  U R                  USU R                   R                  SSS9nUR                  nUR                  nU R                  USSS9R                  nUR                  S   UR                  S   :  a  [
        R                  " X5      (       d  U R                   R                  US S 2U R                   R                  S-
  S24   5      n[        R                  S	U R                   R                   S
U 35        US S 2S U R                   R                  24   nUS S 2S U R                   R                  24   nU R                  UR                  U5      UR                  U5      SS9nUR                  S   nUc  UR                  R                  S5      nUR                  U R                  R                  US9nUR                  U R                  R                  US9nUR!                  USS9nUR!                  USS9nU	Gc,  U(       Ga$  Uc  S/U-  nO[#        U5      [#        U5      La$  [%        S[#        U5       S[#        U5       S35      e['        U[(        5      (       a  U/nO2U[+        U5      :w  a!  [-        SU S[+        U5       SU SU S3	5      eUnU R                  USU R                   R                  SSS9nU R                  UR                  R                  U5      UR                  R                  U5      SS9nUR                  S   n	UR                  R                  S5      n
U(       a  U	R                  S   nU	R                  U R                  R                  US9n	U	R/                  SUS5      n	U	R1                  X#-  US5      n	U
R                  S   nU
R                  U R                  R                  US9n
U
R/                  SUS5      n
U
R1                  X#-  US5      n
XxX4$ )N
max_lengthTpt)paddingrB   
truncationreturn_tensorslongest)rD   rF      z\The following part of your input was truncated because CLIP can only handle sequences up to z	 tokens: )attention_maskoutput_hidden_states)r/   r.   r   dim z?`negative_prompt` should be the same type to `prompt`, but got z != .z`negative_prompt`: z has batch size z, but `prompt`: zT. Please make sure that passed `negative_prompt` matches the batch size of `prompt`.)r   model_max_length	input_idsrJ   r0   torchequalbatch_decodeloggerwarningr   r4   hidden_statestext_embeds	unsqueezer/   repeat_interleavetype	TypeError
isinstancestrlenr3   repeatview)r&   r.   r6   r7   do_classifier_free_guidancepromptnegative_promptr?   r   r   r@   text_inputstext_input_idsrJ   untruncated_idsremoved_texttext_encoder_outputuncond_tokensuncond_input*negative_prompt_embeds_text_encoder_outputseq_lens                        r(   encode_prompt*StableCascadeDecoderPipeline.encode_prompt   s`     ..$>>::# ) K )22N(77N"nnVYW[n\ffO$$R(N,@,@,DDU[[N N  $~~::#At~~'F'F'JR'O$OP  778	,Q "03TT^^5T5T3T0T!U!/3TT^^5T5T3T0T!U"&"3"3!!&).:K:KF:Sjn #4 # 0==bAM#+':'F'F'P'PQR'S$%((t/@/@/F/Fv(V366T=N=N=T=T]c6d%778MST7U3EEF[abEc!).I&!#z 1fT/%::UVZ[jVkUl mV~Q(  OS11!0 1s?33 )/)::J3K_J` ax/
| <33  !0>>$>>::# * L :>9J9J&&))&1+::==fE%) :K :6 &P%]%]^`%a",V,b,b,l,lmn,o)&,2215G%;%>%>TEVEVE\E\ek%>%l"%;%B%B1F[]^%_"%;%@%@Acelnp%q"399!<G,I,L,L''--f -M -) -J,P,PQRTikl,m),I,N,N2GR-)
 4Jiir*   c           
      ^  ^  UbW  [        U 4S jU 5       5      (       d=  [        ST R                   SU Vs/ s H  ofT R                  ;  d  M  UPM     sn 35      eUb  Ub  [        SU SU S35      eUc  Uc  [        S5      eUbA  [        U[        5      (       d,  [        U[
        5      (       d  [        S[        U5       35      eUb  Ub  [        S	U S
U S35      eUbE  UbA  UR                  UR                  :w  a&  [        SUR                   SUR                   S35      eg g g s  snf )Nc              3   @   >#    U  H  oTR                   ;   v   M     g 7fN)_callback_tensor_inputs).0kr&   s     r(   	<genexpr><StableCascadeDecoderPipeline.check_inputs.<locals>.<genexpr>   s      F
7Y!---7Ys   z2`callback_on_step_end_tensor_inputs` has to be in z, but found zCannot forward both `prompt`: z and `prompt_embeds`: z2. Please make sure to only forward one of the two.zeProvide either `prompt` or `prompt_embeds`. Cannot leave both `prompt` and `prompt_embeds` undefined.z2`prompt` has to be of type `str` or `list` but is z'Cannot forward both `negative_prompt`: z and `negative_prompt_embeds`: zu`prompt_embeds` and `negative_prompt_embeds` must have the same shape when passed directly, but got: `prompt_embeds` z != `negative_prompt_embeds` rO   )allr3   rs   r]   r^   listr[   r0   )r&   rc   rd   r?   r   "callback_on_step_end_tensor_inputsru   s   `      r(   check_inputs)StableCascadeDecoderPipeline.check_inputs   s    .9# F
7YF
 C
 C
 DTEaEaDbbn  |^  pH  |^vw  ko  kG  kG  bGpq  |^  pH  oI  J  -";08N}o ^0 0  ^ 5w  FC)@)@TZ\`IaIaQRVW]R^Q_`aa&+A+M9/9J K*++]_ 
 $)?)K""&<&B&BB --:-@-@,A B.445Q8  C *L$+ pHs   D*D*c                     U R                   $ rr   _guidance_scaler&   s    r(   guidance_scale+StableCascadeDecoderPipeline.guidance_scale  s    ###r*   c                      U R                   S:  $ )NrI   r~   r   s    r(   rb   8StableCascadeDecoderPipeline.do_classifier_free_guidance  s    ##a''r*   c                     U R                   $ rr   )_num_timestepsr   s    r(   num_timesteps*StableCascadeDecoderPipeline.num_timesteps"  s    """r*   c                    [         R                  " S/5      nSS/n[         R                  " USU-   -  [         R                  -  S-  5      S-  nX!   nUR                  " U6 nUR                  UR                  5      UR                  UR                  5      pSXe-  S-  R                  5       [         R                  S-  -  SU-   -  U-
  nU$ )NgMb?r   rI   g      ?r   )rR   tensorcospiclampr4   r.   acos)r&   talphas_cumprodsclamp_rangemin_varvarratios           r(   get_timestep_ratio_conditioning<StableCascadeDecoderPipeline.get_timestep_ratio_conditioning&  s    LL%!!f))AQK%((2S89Q>ii%TT#**%wzz#**'=7=S(..0EHHsNCANQRRr*   
   g        rI   pilTr   r   rc   num_inference_stepsr   rd   r7   r-   output_typereturn_dictcallback_on_step_endrz   c                    U R                   nU R                  R                  nX@l        [	        SS5      (       a  U[
        R                  :X  a  [        S5      eU R                  UUUUUS9  [        U[        5      (       a  [
        R                  " USS9nUb  [        U[        5      (       a  SnO3Ub!  [        U[        5      (       a  [        U5      nOUR                  S   nXR                  S   U-  -  n
Uc*  Uc'  U R                  UUUU
U R                   UUUUU	S	9
u  nnnn	U R                   (       a  [
        R                  " Xy/5      OUnU R                   (       a,  [
        R                  " U[
        R"                  " U5      /5      OUnU R$                  R'                  UUS
9  U R$                  R(                  nU R+                  UXUUXU R$                  5      n[        U R$                  [,        5      (       a  USS nOz[/        U R$                  R0                  S5      (       aU  U R$                  R0                  R2                  (       a0  SU R$                  R0                  l        [4        R7                  S5        [/        U R$                  S5      (       a/  SU R$                  R8                  -
  n[
        R:                  " USS9nO/ n[        U5      U l        [?        U RA                  U5      5       GH  u  nn[        U R$                  [,        5      (       d  [        U5      S:  am  U RC                  URE                  5       RG                  5       U5      nURI                  URK                  S5      5      RM                  U5      RM                  U5      nOURO                  5       RQ                  U R$                  R(                  S   5      RI                  URK                  S5      5      RM                  U5      nO/URI                  URK                  S5      5      RM                  U5      nU R                  U R                   (       a  [
        R                  " U/S-  5      OUU R                   (       a  [
        R                  " U/S-  5      OUUUSS9S   nU R                   (       a6  URS                  S5      u  nn[
        RT                  " UUU RV                  5      n[        U R$                  [,        5      (       d  UnU R$                  RY                  UUUUS9RZ                  nUb\  0 nU H  n []        5       U    UU '   M     U" U UUU5      n!U!R_                  SU5      nU!R_                  SU5      nU!R_                  SU5      n[`        (       d  GM  [b        Rd                  " 5         GM     US;  a  [        SU 35      eUS:X  d  U Rf                  R0                  Rh                  U-  nU Rf                  Rk                  U5      Rl                  Ro                  SS5      n"US:X  a?  U"Rq                  SSSS5      RG                  5       RO                  5       Rs                  5       n"OXUS:X  aO  U"Rq                  SSSS5      RG                  5       RO                  5       Rs                  5       n"U Ru                  U"5      n"OUn"U Rw                  5         U(       d  U"$ [y        U"5      $ )aW  
Function invoked when calling the pipeline for generation.

Args:
    image_embedding (`torch.Tensor` or `list[torch.Tensor]`):
        Image Embeddings either extracted from an image or generated by a Prior Model.
    prompt (`str` or `list[str]`):
        The prompt or prompts to guide the image generation.
    num_inference_steps (`int`, *optional*, defaults to 12):
        The number of denoising steps. More denoising steps usually lead to a higher quality image at the
        expense of slower inference.
    guidance_scale (`float`, *optional*, defaults to 0.0):
        Guidance scale as defined in [Classifier-Free Diffusion
        Guidance](https://huggingface.co/papers/2207.12598). `decoder_guidance_scale` is defined as `w` of
        equation 2. of [Imagen Paper](https://huggingface.co/papers/2205.11487). Guidance scale is enabled by
        setting `decoder_guidance_scale > 1`. Higher guidance scale encourages to generate images that are
        closely linked to the text `prompt`, usually at the expense of lower image quality.
    negative_prompt (`str` or `list[str]`, *optional*):
        The prompt or prompts not to guide the image generation. Ignored when not using guidance (i.e., ignored
        if `decoder_guidance_scale` is less than `1`).
    prompt_embeds (`torch.Tensor`, *optional*):
        Pre-generated text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt weighting. If not
        provided, text embeddings will be generated from `prompt` input argument.
    prompt_embeds_pooled (`torch.Tensor`, *optional*):
        Pre-generated pooled text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt weighting.
        If not provided, pooled text embeddings will be generated from `prompt` input argument.
    negative_prompt_embeds (`torch.Tensor`, *optional*):
        Pre-generated negative text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt
        weighting. If not provided, negative_prompt_embeds will be generated from `negative_prompt` input
        argument.
    negative_prompt_embeds_pooled (`torch.Tensor`, *optional*):
        Pre-generated negative pooled text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt
        weighting. If not provided, negative_prompt_embeds_pooled will be generated from `negative_prompt`
        input argument.
    num_images_per_prompt (`int`, *optional*, defaults to 1):
        The number of images to generate per prompt.
    generator (`torch.Generator` or `list[torch.Generator]`, *optional*):
        One or a list of [torch generator(s)](https://pytorch.org/docs/stable/generated/torch.Generator.html)
        to make generation deterministic.
    latents (`torch.Tensor`, *optional*):
        Pre-generated noisy latents, sampled from a Gaussian distribution, to be used as inputs for image
        generation. Can be used to tweak the same generation with different prompts. If not provided, a latents
        tensor will be generated by sampling using the supplied random `generator`.
    output_type (`str`, *optional*, defaults to `"pil"`):
        The output format of the generate image. Choose between: `"pil"` (`PIL.Image.Image`), `"np"`
        (`np.array`) or `"pt"` (`torch.Tensor`).
    return_dict (`bool`, *optional*, defaults to `True`):
        Whether or not to return a [`~pipelines.ImagePipelineOutput`] instead of a plain tuple.
    callback_on_step_end (`Callable`, *optional*):
        A function that calls at the end of each denoising steps during the inference. The function is called
        with the following arguments: `callback_on_step_end(self: DiffusionPipeline, step: int, timestep: int,
        callback_kwargs: Dict)`. `callback_kwargs` will include a list of all tensors as specified by
        `callback_on_step_end_tensor_inputs`.
    callback_on_step_end_tensor_inputs (`list`, *optional*):
        The list of tensor inputs for the `callback_on_step_end` function. The tensors specified in the list
        will be passed as `callback_kwargs` argument. You will only be able to include variables listed in the
        `._callback_tensor_inputs` attribute of your pipeline class.

Examples:

Returns:
    [`~pipelines.ImagePipelineOutput`] or `tuple` [`~pipelines.ImagePipelineOutput`] if `return_dict` is True,
    otherwise a `tuple`. When returning a tuple, the first element is a list with the generated image
    embeddings.
<z2.2.0zW`StableCascadeDecoderPipeline` requires torch>=2.2.0 when using `torch.bfloat16` dtype.)rd   r?   r   rz   r   rL   NrI   )
rc   r.   r6   r7   rb   rd   r?   r   r   r@   )r.   rH   clip_sampleFz set `clip_sample` to be Falsebetasg      ?r   )sampletimestep_ratioclip_text_pooledeffnetr   )model_outputtimestepr   r-   r   r?   r   )rC   npr   latentzSOnly the output types `pt`, `np`, `pil` and `latent` are supported not output_type=r   r   r   r   )=_execution_devicer   r/   r   r	   rR   bfloat16r3   r{   r]   ry   catr^   r_   r0   rn   rb   
zeros_liker   set_timesteps	timestepsr=   r   hasattrr2   r   rU   rV   r   cumprodr   	enumerateprogress_barr   longcpuexpandsizer4   floatdivchunklerpr   stepprev_samplelocalspopXLA_AVAILABLExm	mark_stepr   scale_factordecoder   r   permutenumpynumpy_to_pilmaybe_free_model_hooksr   )#r&   r   rc   r   r   rd   r?   r   r   r@   r7   r-   r   r   r   r   rz   r.   r/   r6   r8   r   r   alphasr   ir   r   predicted_latentspredicted_latents_textpredicted_latents_uncondcallback_kwargsru   callback_outputsimagess#                                      r(   __call__%StableCascadeDecoderPipeline.__call__0  s   p ''""-C))eu~~.Evww 	+'#9/Q 	 	
 &--$yy)9qA*VS"9"9JJvt$<$<VJ&,,Q/J !69O9OPQ9RV`9` a  %;%CHLHZHZ%&;,0,L,L /+%9'=.K I[ IEA#Q(E  // II+KL% 	 // II')9)9:J)KLM! 	 	$$%8$HNN,,	 &&(PYdhdrdr
 dnn&=>>!#2It~~,,m<<AVAVAbAb49%%1?@ 4>>7++4>>///F"]]6q9NN!)nd//	:;DAqdnn.EFF~&*%)%I%I!&&(,,.Zh%iN%3%:%:7<<?%K%N%Nu%U%X%XY_%`N%&WWY]]4>>3K3KB3O%P%W%WX_XdXdefXg%h%k%klq%rN!"',,q/!:!=!=e!D !%373S3Suyy'Q/Y`BFBbBbuyy.)9A)=>hv!5! !- ! ! //CTCZCZ[\C]@&(@$)JJ/GI_aeatat$u! dnn.EFF!"nn)).'#	 * 
 k  $/"$;A)/!OA& <#7aO#T *..y'B 0 4 4_m T)9)=)=>VXn)o&}[ <^ ;;efqers  h&jj''44w>GZZ&&w/66<<QBFd"1a3779??AGGI%1a3779??AGGI**62F 	##%M"6**r*   )r   r   )gףp=
W%@)NNNNNN)NNNN)'__name__
__module____qualname____firstlineno____doc___last_supported_version	unet_nametext_encoder_namemodel_cpu_offload_seqrs   r   r   r   r   r   r   r#   r=   rR   Tensorrn   r{   propertyr   rb   r   r   no_gradr   EXAMPLE_DOC_STRINGry   r^   r1   	Generatorboolr   r   __static_attributes____classcell__)r'   s   @r(   r   r   :   s   . 'I&: #(C"C !C 2	C
 +C C  C 
C C&6 -1486:=Aij ||d*ij $llT1ij !&t 3ij (-||d':ij\ #+/'R $ $ ( ( # # ]]_12 #'#% #26-1486:=A%&DH'+"' BF9B#k+,,ell);;k+ d3ik+ !	k+
 k+ tCy4/k+ ||d*k+ $llT1k+ !&t 3k+ (-||d':k+  #k+ ??T%//%::TAk+ $k+ 4Zk+ k+  'Sz4'784?!k+" -1I#k+ 3 k+r*   r   )!typingr   rR   transformersr   r   modelsr   
schedulersr   utilsr	   r
   r   r   utils.torch_utilsr   .deprecated.wuerstchen.modeling_paella_vq_modelr   pipeline_utilsr   r   r   torch_xla.core.xla_modelcore	xla_modelr   r   
get_loggerr   rU   r   r    r*   r(   <module>r      sm      C ' 1 a a - J \ \ ))MM			H	% (c+#:<M c+r*   