
    
3jg              
       t   S SK r S SKJrJr  S SKrS SKJrJr  SSKJ	r	  SSK
JrJr  SSKJr  SSKJr  SS	KJr  SS
KJr  SSKJrJr  SSKJr  SSKJr  \R8                  " \5      rSr    SS\ S\ S\!S\!4S jjr"    SS\ S-  S\#\RH                  -  S-  S\%\    S-  S\%\!   S-  4S jjr& " S S\\\5      r'g)    N)AnyCallable)AutoTokenizerPreTrainedModel   )VaeImageProcessor)FromSingleFileMixinZImageLoraLoaderMixin)AutoencoderKL)ZImageTransformer2DModel)DiffusionPipeline)FlowMatchEulerDiscreteScheduler)loggingreplace_example_docstring)randn_tensor   )ZImagePipelineOutputut  
    Examples:
        ```py
        >>> import torch
        >>> from diffusers import ZImagePipeline

        >>> pipe = ZImagePipeline.from_pretrained("Z-a-o/Z-Image-Turbo", torch_dtype=torch.bfloat16)
        >>> pipe.to("cuda")

        >>> # Optionally, set the attention backend to flash-attn 2 or 3, default is SDPA in PyTorch.
        >>> # (1) Use flash attention 2
        >>> # pipe.transformer.set_attention_backend("flash")
        >>> # (2) Use flash attention 3
        >>> # pipe.transformer.set_attention_backend("_flash_3")

        >>> prompt = "一幅为名为“造相「Z-IMAGE-TURBO」”的项目设计的创意海报。画面巧妙地将文字概念视觉化：一辆复古蒸汽小火车化身为巨大的拉链头，正拉开厚厚的冬日积雪，展露出一个生机盎然的春天。"
        >>> image = pipe(
        ...     prompt,
        ...     height=1024,
        ...     width=1024,
        ...     num_inference_steps=9,
        ...     guidance_scale=0.0,
        ...     generator=torch.Generator("cuda").manual_seed(42),
        ... ).images[0]
        >>> image.save("zimage.png")
        ```
base_seq_lenmax_seq_len
base_shift	max_shiftc                 4    XC-
  X!-
  -  nX5U-  -
  nX-  U-   nU$ N )image_seq_lenr   r   r   r   mbmus           f/home/wildlama/miniconda3/lib/python3.13/site-packages/diffusers/pipelines/z_image/pipeline_z_image.pycalculate_shiftr    @   s3     
	K$>?A%%A		Q	BI    num_inference_stepsdevice	timestepssigmasc                    Ub  Ub  [        S5      eUb  S[        [        R                  " U R                  5      R
                  R                  5       5      ;   nU(       d  [        SU R                   S35      eU R                  " S
X2S.UD6  U R                  n[        U5      nX14$ Ub  S[        [        R                  " U R                  5      R
                  R                  5       5      ;   nU(       d  [        SU R                   S35      eU R                  " S
XBS.UD6  U R                  n[        U5      nX14$ U R                  " U4S	U0UD6  U R                  nX14$ )a  
Calls the scheduler's `set_timesteps` method and retrieves timesteps from the scheduler after the call. Handles
custom timesteps. Any kwargs will be supplied to `scheduler.set_timesteps`.

Args:
    scheduler (`SchedulerMixin`):
        The scheduler to get timesteps from.
    num_inference_steps (`int`):
        The number of diffusion steps used when generating samples with a pre-trained model. If used, `timesteps`
        must be `None`.
    device (`str` or `torch.device`, *optional*):
        The device to which the timesteps should be moved to. If `None`, the timesteps are not moved.
    timesteps (`list[int]`, *optional*):
        Custom timesteps used to override the timestep spacing strategy of the scheduler. If `timesteps` is passed,
        `num_inference_steps` and `sigmas` must be `None`.
    sigmas (`list[float]`, *optional*):
        Custom sigmas used to override the timestep spacing strategy of the scheduler. If `sigmas` is passed,
        `num_inference_steps` and `timesteps` must be `None`.

Returns:
    `tuple[torch.Tensor, int]`: A tuple where the first element is the timestep schedule from the scheduler and the
    second element is the number of inference steps.
zYOnly one of `timesteps` or `sigmas` can be passed. Please choose one to set custom valuesr$   zThe current scheduler class zx's `set_timesteps` does not support custom timestep schedules. Please check whether you are using the correct scheduler.)r$   r#   r%   zv's `set_timesteps` does not support custom sigmas schedules. Please check whether you are using the correct scheduler.)r%   r#   r#   r   )

ValueErrorsetinspect	signatureset_timesteps
parameterskeys	__class__r$   len)	schedulerr"   r#   r$   r%   kwargsaccepts_timestepsaccept_sigmass           r   retrieve_timestepsr4   N   s}   > !3tuu'3w/@/@AXAX/Y/d/d/i/i/k+ll .y/B/B.C Da b  	M)MfM''	!)n )) 
	 C(9(9):Q:Q(R(](](b(b(d$ee.y/B/B.C D_ `  	GvGG''	!)n )) 	 3MFMfM''	))r!   c            +         ^  \ rS rSrSr/ rSS/rS\S\S\	S\
S	\4
U 4S
 jjr      S3S\\\   -  S\R                   S-  S\S\\\   -  S-  S\\R$                     S-  S\R$                  S-  S\4S jjr   S4S\\\   -  S\R                   S-  S\\R$                     S-  S\S\\R$                     4
S jjr S5S jr\S 5       r\S 5       r\S 5       r\S 5       r\S 5       r\R:                  " 5       \" \5      SSSSSSSS SS!SSSSS"SSSS/S4S\\\   -  S#\S-  S$\S-  S%\S&\\    S-  S'\ S(\S)\ S\\\   -  S-  S*\S-  S+\RB                  \\RB                     -  S-  S\R$                  S-  S\\R$                     S-  S\\R$                     S-  S,\S-  S-\S.\"\\#4   S-  S/\$\\/S4   S-  S0\\   S\4(S1 jj5       5       r%S2r&U =r'$ )6ZImagePipeline   ztext_encoder->transformer->vaelatentsprompt_embedsr0   vaetext_encoder	tokenizertransformerc                 &  > [         TU ]  5         U R                  UUUUUS9  [        U S5      (       a<  U R                  b/  S[        U R                  R                  R                  5      S-
  -  OSU l        [        U R                  S-  S9U l
        g )N)r:   r;   r<   r0   r=   r:      r      )vae_scale_factor)super__init__register_moduleshasattrr:   r/   configblock_out_channelsrA   r   image_processor)selfr0   r:   r;   r<   r=   r.   s         r   rC   ZImagePipeline.__init__   s     	%# 	 	
 CJ$PUBVBV[_[c[c[oA#dhhoo889A=>uv 	  1$BWBWZ[B[\r!   NT   promptr#   do_classifier_free_guidancenegative_promptnegative_prompt_embedsmax_sequence_lengthc                 :   [        U[        5      (       a  U/OUnU R                  UUUUS9nU(       a_  Uc  U Vs/ s H  nSPM     nnO[        U[        5      (       a  U/OUn[        U5      [        U5      :X  d   eU R                  UUUUS9nXV4$ / nXV4$ s  snf )N)rL   r#   r9   rP    )
isinstancestr_encode_promptr/   )	rI   rL   r#   rM   rN   r9   rO   rP   _s	            r   encode_promptZImagePipeline.encode_prompt   s     (44&&++' 3	 , 
 '&/5"6v!2v"67A/SV7W7W?"3]lv;#o"6666%)%8%8&4$7	 &9 &" 44 &("44 #7s   Breturnc                 2   U=(       d    U R                   nUb  U$ [        U[        5      (       a  U/n[        U5       H+  u  pVSUS./nU R                  R                  USSSS9nXaU'   M-     U R	                  USUSSS9nUR                  R                  U5      n	UR                  R                  U5      R                  5       n
U R                  U	U
SS	9R                  S
   n/ n[        [        U5      5       H  nUR                  X5   X      5        M     U$ )Nuser)rolecontentFT)tokenizeadd_generation_promptenable_thinking
max_lengthpt)paddingra   
truncationreturn_tensors)	input_idsattention_maskoutput_hidden_states)_execution_devicerS   rT   	enumerater<   apply_chat_templaterf   torg   boolr;   hidden_statesranger/   append)rI   rL   r#   r9   rP   iprompt_itemmessagestext_inputstext_input_idsprompt_masksembeddings_lists               r   rU   ZImagePipeline._encode_prompt   sF    1411$  fc""XF'/NAK8H ..<<&* $	 = K $1I 0 nn * % 
 %..11&9"1144V<AAC))$'!% * 
 -	 s=)*A""=#3LO#DE + r!   c	                    S[        U5      U R                  S-  -  -  nS[        U5      U R                  S-  -  -  nXX44n	Uc  [        XXeS9nU$ UR                  U	:w  a  [	        SUR                   SU	 35      eUR                  U5      nU$ )Nr?   )	generatorr#   dtypezUnexpected latents shape, got z, expected )intrA   r   shaper'   rm   )
rI   
batch_sizenum_channels_latentsheightwidthr|   r#   r{   r8   r~   s
             r   prepare_latentsZImagePipeline.prepare_latents   s     c&kd&;&;a&?@ASZD$9$9A$=>?6A?"5fZG
  }}% #A'--P[\a[b!cddjj(Gr!   c                     U R                   $ r   _guidance_scalerI   s    r   guidance_scaleZImagePipeline.guidance_scale  s    ###r!   c                      U R                   S:  $ )Nr   r   r   s    r   rM   *ZImagePipeline.do_classifier_free_guidance  s    ##a''r!   c                     U R                   $ r   )_joint_attention_kwargsr   s    r   joint_attention_kwargs%ZImagePipeline.joint_attention_kwargs  s    +++r!   c                     U R                   $ r   )_num_timestepsr   s    r   num_timestepsZImagePipeline.num_timesteps  s    """r!   c                     U R                   $ r   )
_interruptr   s    r   	interruptZImagePipeline.interrupt!  s    r!   2   g      @Fg      ?r   pilr   r   r"   r%   r   cfg_normalizationcfg_truncationnum_images_per_promptr{   output_typereturn_dictr   callback_on_step_end"callback_on_step_end_tensor_inputsc           
      V   U=(       d    SnU=(       d    SnU R                   S-  nUU-  S:w  a  [        SU SU SU S35      eUU-  S:w  a  [        SU SU S	U S35      eU R                  nX`l        UU l        S
U l        Xpl        Xl        Ub  [        U[        5      (       a  SnO/Ub!  [        U[        5      (       a  [        U5      nO[        U5      nUb#  Uc   U R                  (       a  Uc  [        S5      eO"U R                  UU	U R                  UUUUS9u  nnU R                  R                  nU R!                  UU
-  UUU["        R$                  UUU5      nU
S:  af  U VVs/ s H  n['        U
5        H  nUPM     M     nnnU R                  (       a.  U(       a'  U VVs/ s H  n['        U
5        H  nUPM     M     nnnUU
-  nUR(                  S   S-  UR(                  S   S-  -  n[+        UU R,                  R.                  R1                  SS5      U R,                  R.                  R1                  SS5      U R,                  R.                  R1                  SS5      U R,                  R.                  R1                  SS5      5      nSU R,                  l        SU0n[5        U R,                  UU4SU0UD6u  n n[7        [        U 5      X@R,                  R8                  -  -
  S5      n![        U 5      U l        U R,                  R=                  S5        U R                  (       aK  U R                  b>  [?        U R                  5      S::  a%  SU R?                  5       -
  S-  RA                  5       n"OSn"U RC                  US9 n#[E        U 5       GH  u  n$n%U RF                  (       a  M  U%RI                  UR(                  S   5      n&SU&-
  S-  n&U RJ                  n'U"b  U"U$   U R                  :  a  Sn'U R                  =(       a    U'S:  n(U((       aO  URM                  U R                  RN                  5      n)U)RQ                  SSSS5      n*X-   n+U&RQ                  S5      n,O)URM                  U R                  RN                  5      n*Un+U&n,U*RS                  S5      n*[        U*RU                  SS95      n-U R                  U-U,U+S
S9S   n.U((       Ga  U.SU n/U.US n0/ n1['        U5       H  n2U/U2   R?                  5       n3U0U2   R?                  5       n4U3U'U3U4-
  -  -   n5U R                  (       a}  [?        U R                  5      S:  ad  ["        RV                  RY                  U35      n6["        RV                  RY                  U55      n7U6[?        U R                  5      -  n8U7U8:  a  U5U8U7-  -  n5U1R[                  U55        M     ["        R\                  " U1SS9n1O4["        R\                  " U. V%s/ s H  n%U%R?                  5       PM     sn%SS9n1U1R_                  S5      n1U1* n1U R,                  Ra                  U1RM                  ["        R$                  5      W%US
S9S   nURN                  ["        R$                  :X  d   eUb\  0 n9U H  n:[c        5       U:   U9U:'   M     U" U U$U%U95      n;U;Re                  SU5      nU;Re                  S U5      nU;Re                  S!U5      nU$[        U 5      S-
  :X  d/  U$S-   U!:  d  GMf  U$S-   U R,                  R8                  -  S:X  d  GM  U#Rg                  5         GM     SSS5        US":X  a  Un<OURM                  U Rh                  RN                  5      nXRh                  R.                  Rj                  -  U Rh                  R.                  Rl                  -   nU Rh                  Ro                  US
S9S   n<U Rp                  Rs                  U<US#9n<U Ru                  5         U(       d  U<4$ [w        U<S$9$ s  snnf s  snnf s  sn%f ! , (       d  f       N= f)%a  
Function invoked when calling the pipeline for generation.

Args:
    prompt (`str` or `list[str]`, *optional*):
        The prompt or prompts to guide the image generation. If not defined, one has to pass `prompt_embeds`.
        instead.
    height (`int`, *optional*, defaults to 1024):
        The height in pixels of the generated image.
    width (`int`, *optional*, defaults to 1024):
        The width in pixels of the generated image.
    num_inference_steps (`int`, *optional*, defaults to 50):
        The number of denoising steps. More denoising steps usually lead to a higher quality image at the
        expense of slower inference.
    sigmas (`list[float]`, *optional*):
        Custom sigmas to use for the denoising process with schedulers which support a `sigmas` argument in
        their `set_timesteps` method. If not defined, the default behavior when `num_inference_steps` is passed
        will be used.
    guidance_scale (`float`, *optional*, defaults to 5.0):
        Guidance scale as defined in [Classifier-Free Diffusion Guidance](https://arxiv.org/abs/2207.12598).
        `guidance_scale` is defined as `w` of equation 2. of [Imagen
        Paper](https://arxiv.org/pdf/2205.11487.pdf). Guidance scale is enabled by setting `guidance_scale >
        1`. Higher guidance scale encourages to generate images that are closely linked to the text `prompt`,
        usually at the expense of lower image quality.
    cfg_normalization (`bool`, *optional*, defaults to False):
        Whether to apply configuration normalization.
    cfg_truncation (`float`, *optional*, defaults to 1.0):
        The truncation value for configuration.
    negative_prompt (`str` or `list[str]`, *optional*):
        The prompt or prompts not to guide the image generation. If not defined, one has to pass
        `negative_prompt_embeds` instead. Ignored when not using guidance (i.e., ignored if `guidance_scale` is
        less than `1`).
    num_images_per_prompt (`int`, *optional*, defaults to 1):
        The number of images to generate per prompt.
    generator (`torch.Generator` or `list[torch.Generator]`, *optional*):
        One or a list of [torch generator(s)](https://pytorch.org/docs/stable/generated/torch.Generator.html)
        to make generation deterministic.
    latents (`torch.FloatTensor`, *optional*):
        Pre-generated noisy latents, sampled from a Gaussian distribution, to be used as inputs for image
        generation. Can be used to tweak the same generation with different prompts. If not provided, a latents
        tensor will be generated by sampling using the supplied random `generator`.
    prompt_embeds (`list[torch.FloatTensor]`, *optional*):
        Pre-generated text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt weighting. If not
        provided, text embeddings will be generated from `prompt` input argument.
    negative_prompt_embeds (`list[torch.FloatTensor]`, *optional*):
        Pre-generated negative text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt
        weighting. If not provided, negative_prompt_embeds will be generated from `negative_prompt` input
        argument.
    output_type (`str`, *optional*, defaults to `"pil"`):
        The output format of the generate image. Choose between
        [PIL](https://pillow.readthedocs.io/en/stable/): `PIL.Image.Image` or `np.array`.
    return_dict (`bool`, *optional*, defaults to `True`):
        Whether or not to return a [`~pipelines.stable_diffusion.ZImagePipelineOutput`] instead of a plain
        tuple.
    joint_attention_kwargs (`dict`, *optional*):
        A kwargs dictionary that if specified is passed along to the `AttentionProcessor` as defined under
        `self.processor` in
        [diffusers.models.attention_processor](https://github.com/huggingface/diffusers/blob/main/src/diffusers/models/attention_processor.py).
    callback_on_step_end (`Callable`, *optional*):
        A function that calls at the end of each denoising steps during the inference. The function is called
        with the following arguments: `callback_on_step_end(self: DiffusionPipeline, step: int, timestep: int,
        callback_kwargs: Dict)`. `callback_kwargs` will include a list of all tensors as specified by
        `callback_on_step_end_tensor_inputs`.
    callback_on_step_end_tensor_inputs (`List`, *optional*):
        The list of tensor inputs for the `callback_on_step_end` function. The tensors specified in the list
        will be passed as `callback_kwargs` argument. You will only be able to include variables listed in the
        `._callback_tensor_inputs` attribute of your pipeline class.
    max_sequence_length (`int`, *optional*, defaults to 512):
        Maximum sequence length to use with the `prompt`.

Examples:

Returns:
    [`~pipelines.z_image.ZImagePipelineOutput`] or `tuple`: [`~pipelines.z_image.ZImagePipelineOutput`] if
    `return_dict` is True, otherwise a `tuple`. When returning a tuple, the first element is a list with the
    generated images.
i   r?   r   zHeight must be divisible by z (got z-). Please adjust the height to a multiple of .zWidth must be divisible by z,). Please adjust the width to a multiple of FNr   zWhen `prompt_embeds` is provided without `prompt`, `negative_prompt_embeds` must also be provided for classifier-free guidance.)rL   rN   rM   r9   rO   r#   rP   r   base_image_seq_len   max_image_seq_len   r         ?r   ffffff?g        r   r%   i  )total)dim)r   r8   r9   rO   latent)r   )images)<rA   r'   rj   r   r   r   _cfg_normalization_cfg_truncationrS   rT   listr/   rM   rW   r=   in_channelsr   torchfloat32rp   r~   r    r0   rF   get	sigma_minr4   maxorderr   set_begin_indexfloattolistprogress_barrk   r   expandr   rm   r|   repeat	unsqueezeunbindlinalgvector_normrq   stacksqueezesteplocalspopupdater:   scaling_factorshift_factordecoderH   postprocessmaybe_free_model_hooksr   )=rI   rL   r   r   r"   r%   r   r   r   rN   r   r{   r8   r9   rO   r   r   r   r   r   rP   	vae_scaler#   r   r   perV   npeactual_batch_sizer   r   scheduler_kwargsr$   num_warmup_steps_precomputed_t_normsr   rr   ttimestepcurrent_guidance_scale	apply_cfglatents_typedlatent_model_inputprompt_embeds_model_inputtimestep_model_inputlatent_model_input_listmodel_out_listpos_outneg_out
noise_predjposnegpredori_pos_normnew_pos_normmax_new_normcallback_kwargskcallback_outputsimages=                                                                r   __call__ZImagePipeline.__call__%  s   L 4))A-	I".ykx H==FKqJ  9!-i[ug F<<E;aI 
 ''-'=$"3-*VS"9"9JJvt$<$<VJ]+J $//4J4R c  "" /,0,L,L+'=$7 # &  $//;;&&.. MM	
 !1$*7^-BG\A]ARA]R-M^//4J9O)v9O#Y^_tYuTU#Yu#9O&)v&)>> q)Q.7==3Cq3HI NN!!%%&:C@NN!!%%&94@NN!!%%lC8NN!!%%k48
 $'  ":);NN*
 	*

 *
&	& s9~0CnnFZFZ0ZZ\]^!)n 	&&q)++0D0D0PUZ[_[o[oUptuUu%)IOO,=%=$E#M#M#O #'  %89\!),1>> 88GMM!$45 8Ot3)-)<)<&'3+A.1E1EE14. !<<[AWZ[A[	$+JJt/?/?/E/E$FM)6)=)=aAq)I&0=0V-+3??1+=()0D4D4D4J4J)K&0=-+3(%7%A%A!%D"*./A/H/HQ/H/O*P'!%!1!1+-AC\jo "2 "" ,-?.?@G,->-?@G!#J"#45%aj..0%aj..0"%;sSy%II  22uT=T=T7UX[7[+0<<+C+CC+HL+0<<+C+CD+IL+7%@W@W:X+XL+l:'+|l/J'K"))$/ 6  "'ZQ!?J!&-PAaggi-PVW!XJ'//2
([
 ..--jmmEMM.JAwdi-jklm}}555'3&(O?-3Xa[* @';D!Q'X$.229gFG$4$8$8-$XM-=-A-ABZ\r-s* I**A9I/IqSTuX\XfXfXlXlNlpqNq '')[ - :` ("E jj0G!?!??488??C_C__GHHOOGO?BE((44U4TE 	##%8O#511K _)vx .Qs :9s8   *`	)`I%`1`C.`>`!``
`()r   r   r   r   r   r   rH   rA   )NTNNNrK   )NNrK   r   )(__name__
__module____qualname____firstlineno__model_cpu_offload_seq_optional_components_callback_tensor_inputsr   r   r   r   r   rC   rT   r   r   r#   rn   FloatTensorr}   rW   rU   r   propertyr   rM   r   r   r   no_gradr   EXAMPLE_DOC_STRINGr   	Generatordictr   r   r   __static_attributes____classcell__)r.   s   @r   r6   r6      s   <(/:]2] ] &	]
 !] .]2 '+,0268<;?#& 5d3i 5 t# 5 &*	 5
 tCy4/ 5 E--.5 5 !& 1 1D 8 5 ! 5J '+8<#&1d3i1 t#1 E--.5	1
 !1 
e	 1x 0 $ $ ( ( , , # #   ]]_12 #'! #%%) #"' #26,-DH,08<AE"' 8<BF9B#&+l2d3il2 d
l2 Tz	l2
 !l2 Ud"l2 l2  l2 l2 tCy4/l2  #Tzl2 ??T%//%::TAl2 ""T)l2 E--.5l2 !%U%6%6 7$ >l2  4Z!l2" #l2$ !%S#X 5%l2& 'Sz4'784?'l2( -1I)l2* !+l2 3 l2r!   r6   )r   r   r   r   )NNNN)(r)   typingr   r   r   transformersr   r   rH   r   loadersr	   r
   models.autoencodersr   models.transformersr   pipelines.pipeline_utilsr   
schedulersr   utilsr   r   utils.torch_utilsr   pipeline_outputr   
get_loggerr   loggerr   r}   r   r    rT   r#   r   r4   r6   r   r!   r   <module>r	     s        7 0 A 0 ; 9 9 7 - 1 
		H	% @ 

 
 	

 
  '+(,"&!%8*t8* %,,%8* Cy4	8*
 K$8*vJ2&(=?R J2r!   