
    
3j              
       L   S SK r S SKrS SKJrJr  S SKrS SKrS SKrS SK	J
r
JrJrJr  SSKJrJr  SSKJr  SSKJrJr  SSKJr  SS	KJr  SS
KJr  SSKJrJrJrJ r   SSK!J"r"  SSK#J$r$  \r%\r&\" SS5      (       a  S SK	J&r&J%r%  \" 5       (       a  S SK'J(s  J)r*  Sr+OSr+\RX                  " \-5      r.Sr/   S%S\0S\1S\1S\14S jjr2    S&S\0S-  S\3\Rh                  -  S-  S\5\0   S-  S\5\1   S-  4S jjr6 S'S\Rn                  S \Rp                  S-  S!\34S" jjr9 " S# S$\5      r:g)(    N)AnyCallable)ByT5TokenizerPreTrainedModelProcessorMixinT5EncoderModel   )MultiPipelineCallbacksPipelineCallback)VaeImageProcessor)AutoencoderKLGlmImageTransformer2DModel)GlmImageKVCache)DiffusionPipeline)FlowMatchEulerDiscreteScheduler)is_torch_xla_availableis_transformers_versionloggingreplace_example_docstring)randn_tensor   )GlmImagePipelineOutputz>=z
5.0.0.dev0) GlmImageForConditionalGenerationGlmImageProcessorTFa  
    Examples:
        ```python
        >>> import torch
        >>> from diffusers import GlmImagePipeline

        >>> pipe = GlmImagePipeline.from_pretrained("zai-org/GLM-Image", torch_dtype=torch.bfloat16)
        >>> pipe.to("cuda")

        >>> prompt = "A photo of an astronaut riding a horse on mars"
        >>> image = pipe(prompt).images[0]
        >>> image.save("output.png")
        ```
base_seq_len
base_shift	max_shiftreturnc                 "    X-  S-  nXC-  U-   nU$ )Ng      ? )image_seq_lenr   r   r   mmus         j/home/wildlama/miniconda3/lib/python3.13/site-packages/diffusers/pipelines/glm_image/pipeline_glm_image.pycalculate_shiftr%   D   s"     
	%#-A	
	#BI    num_inference_stepsdevice	timestepssigmasc                 b   S[        [        R                  " U R                  5      R                  R                  5       5      ;   nS[        [        R                  " U R                  5      R                  R                  5       5      ;   nUbY  UbV  U(       d   U(       d  [        SU R                   S35      eU R                  " SX4US.UD6  U R                  n[        U5      nX14$ UbQ  UcN  U(       d  [        SU R                   S35      eU R                  " SX2S.UD6  U R                  n[        U5      nX14$ UcQ  UbN  U(       d  [        SU R                   S35      eU R                  " SXBS	.UD6  U R                  n[        U5      nX14$ U R                  " U4S
U0UD6  U R                  nX14$ )a  
Calls the scheduler's `set_timesteps` method and retrieves timesteps from the scheduler after the call. Handles
custom timesteps. Any kwargs will be supplied to `scheduler.set_timesteps`.

Args:
    scheduler (`SchedulerMixin`):
        The scheduler to get timesteps from.
    num_inference_steps (`int`):
        The number of diffusion steps used when generating samples with a pre-trained model. If used, `timesteps`
        must be `None`.
    device (`str` or `torch.device`, *optional*):
        The device to which the timesteps should be moved to. If `None`, the timesteps are not moved.
    timesteps (`list[int]`, *optional*):
        Custom timesteps used to override the timestep spacing strategy of the scheduler. If `timesteps` is passed,
        `num_inference_steps` and `sigmas` must be `None`.
    sigmas (`list[float]`, *optional*):
        Custom sigmas used to override the timestep spacing strategy of the scheduler. If `sigmas` is passed,
        `num_inference_steps` and `timesteps` must be `None`.

Returns:
    `tuple[torch.Tensor, int]`: A tuple where the first element is the timestep schedule from the scheduler and the
    second element is the number of inference steps.
r)   r*   zThe current scheduler class z's `set_timesteps` does not support custom timestep or sigma schedules. Please check whether you are using the correct scheduler.)r)   r*   r(   zx's `set_timesteps` does not support custom timestep schedules. Please check whether you are using the correct scheduler.)r)   r(   zv's `set_timesteps` does not support custom sigmas schedules. Please check whether you are using the correct scheduler.)r*   r(   r(   r    )
setinspect	signatureset_timesteps
parameterskeys
ValueError	__class__r)   len)	schedulerr'   r(   r)   r*   kwargsaccepts_timestepsaccepts_sigmass           r$   retrieve_timestepsr9   P   s   > $s7+<+<Y=T=T+U+`+`+e+e+g'hhW%6%6y7N7N%O%Z%Z%_%_%a!bbN!3 .y/B/B.C Dj k  	\)6\U[\''	!)n, ))+ 
	6> .y/B/B.C Da b  	M)MfM''	!)n )) 
	v1.y/B/B.C D_ `  	GvGG''	!)n )) 	 3MFMfM''	))r&   encoder_output	generatorsample_modec                    [        U S5      (       a!  US:X  a  U R                  R                  U5      $ [        U S5      (       a   US:X  a  U R                  R                  5       $ [        U S5      (       a  U R                  $ [        S5      e)Nlatent_distsampleargmaxlatentsz3Could not access latents of provided encoder_output)hasattrr>   r?   moderA   AttributeError)r:   r;   r<   s      r$   retrieve_latentsrE      s}     ~}--+2I))00;;		/	/K84K))..00		+	+%%%RSSr&   c            4          ^  \ rS rSrSr/ rSrSS/rS\S\	S\
S	\S
\S\S\4U 4S jjr\S\4S j5       r\S\R(                  S\S\S\S\R(                  4
S j5       r\S\R(                  S\S\S\R(                  4S j5       r\S\\R4                  R4                     \\\R4                  R4                        -  S\S\\\R4                  R4                        4S j5       r   SJS\\\   -  S\S \S\\\R4                  R4                        S-  S!\R:                  S-  S"\R<                  S-  4S# jjrS$ r     SKS\\\   -  S&\S!\R:                  S-  S'\RB                  S-  4S( jjr"       SLS\\\   -  S+\S,\S\R(                  S-  S-\R(                  S-  S!\R:                  S-  S'\RB                  S-  S&\4S. jjr#SMS/ jr$      SNS0 jr%\&S1 5       r'\&S2 5       r(\&S3 5       r)\&S4 5       r*\&S5 5       r+\&S6 5       r,\RZ                  " 5       \." \/5      SSSSS7SSS8S*SSSSSSSS9S:S)SSS/S%4S\\\   -  S-  S\R(                  \R4                  R4                  -  \0Rb                  -  \\R(                     -  \\R4                  R4                     -  \\0Rb                     -  S-  S\S-  S \S-  S;\S<\\   S-  S=\\2   S-  S>\2S,\S"\R<                  \\R<                     -  S-  S\R(                  S-  S\R(                  S-  S-\R(                  S-  S?\R(                  S-  S@\\R(                     S-  SA\\R(                     S-  SB\3\\4   SC\SD\SE\4\\54   S-  SF\6\\\4/S4   \7-  \8-  S-  SG\\   S&\S\9\3-  40SH jj5       5       r:SIr;U =r<$ )OGlmImagePipeline   a  
Pipeline for text-to-image generation using GLM-Image.

This pipeline integrates both the AR (autoregressive) model for token generation and the DiT (diffusion
transformer) model for image decoding.

Args:
    tokenizer (`PreTrainedTokenizer`):
        Tokenizer for the text encoder.
    processor (`AutoProcessor`):
        Processor for the AR model to handle chat templates and tokenization.
    text_encoder ([`T5EncoderModel`]):
        Frozen text-encoder for glyph embeddings.
    vision_language_encoder ([`GlmImageForConditionalGeneration`]):
        The AR model that generates image tokens from text prompts.
    vae ([`AutoencoderKL`]):
        Variational Auto-Encoder (VAE) Model to encode and decode images to and from latent representations.
    transformer ([`GlmImageTransformer2DModel`]):
        A text conditioned transformer to denoise the encoded image latents (DiT).
    scheduler ([`SchedulerMixin`]):
        A scheduler to be used in combination with `transformer` to denoise the encoded image latents.
z7vision_language_encoder->text_encoder->transformer->vaerA   prompt_embeds	tokenizer	processortext_encodervision_language_encodervaetransformerr5   c           
        > [         TU ]  5         U R                  UUUUUUUS9  [        U SS 5      (       a/  S[	        U R
                  R                  R                  5      S-
  -  OSU l        [        U R                  S9U l
        [        U S5      (       aX  U R                  bK  [        U R                  R                  S5      (       a&  U R                  R                  R                  U l        g S	U l        g )
N)rJ   rK   rL   rM   rN   rO   r5   rN      r      )vae_scale_factorrO   sample_size   )super__init__register_modulesgetattrr4   rN   configblock_out_channelsrS   r   image_processorrB   rO   rT   default_sample_size)	selfrJ   rK   rL   rM   rN   rO   r5   r3   s	           r$   rW   GlmImagePipeline.__init__   s     	%$;# 	 	
 W^^bdikoVpVpc$((//*L*L&MPQ&Q Rvw0$BWBWX t]++  ,((//?? ##// 	 
  	 r&   is_text_to_imagec                 x   / n/ n[        U R                  S   5       HY  nX   R                  5       u  pVnUR                  [	        Xg-  5      5        UR                  [	        U5      [	        U5      45        M[     U(       d  US   S-   nSn	US   u  pO%[        U5      nUS-   n[        USS  5      n	US   u  pXX4$ )Nr   r   )rangeshapetolistappendintsum)image_grid_thwr`   
grid_sizesgrid_hwithwmax_new_tokenslarge_image_start_offsettarget_grid_htarget_grid_wtotal_tokenss                r$   _compute_generation_params+GlmImagePipeline._compute_generation_params   s    
 
~++A./A$'..0GA!c!%j)NNCFCF+, 0
  '^a/N'($+22;(M=z?L)A-N'*:ab>':$+21:(MUUr&   outputsinput_lengthrq   large_image_tokensr   c                 &    U S   US  nUnXS-   nXEU $ )Nr   r    )rw   rx   rq   ry   generated_tokenslarge_image_startlarge_image_ends          r$   _extract_large_image_tokens,GlmImagePipeline._extract_large_image_tokens   s.     #1:lm44+@/BBr&   	token_idstoken_htoken_wc                     U R                  SSX5      n [        R                  R                  R	                  U R                  5       SSS9R                  [        R                  S9n U R                  SS5      n U $ )Nr   rQ   nearest)scale_factorrC   dtyperb   )viewtorchnn
functionalinterpolatefloattolong)r   r   r   s      r$   _upsample_token_ids$GlmImagePipeline._upsample_token_ids   sn    NN1a:	HH''33IOO4ETU\e3fii** j 
	 NN1b)	r&   image
batch_sizec                    U b  [        U 5      S:X  a  gU S   nUS:X  a]  [        U[        [        45      (       d  [        U 5      /$ [        U 5      S:w  a  [	        S[        U 5       S35      e[        U S   5      /$ [        U[        [        45      (       d"  [	        S[        U5      R                   S35      e[        U 5      U:w  a  [	        S[        U 5       S	U S
35      e[        U S   5      n[        U 5       H2  u  pE[        U5      U:w  d  M  [	        SU SU S[        U5       S35      e   U  Vs/ s H  n[        U5      PM     sn$ s  snf )a  
Validate and normalize image inputs to List[List[PIL.Image]].

Rules:
- batch_size > 1: Only accepts List[List[PIL.Image]], each sublist must have equal length
- batch_size == 1: Accepts List[PIL.Image] for legacy compatibility (converted to [[img1, img2, ...]])
- Other formats raise ValueError

Args:
    image: Input images in various formats
    batch_size: Number of prompts in the batch

Returns:
    Normalized images as List[List[PIL.Image]], or None if no images provided
Nr   r   zOFor batch_size=1 with List[List[PIL.Image]] format, expected 1 image list, got .zJFor batch_size > 1, images must be List[List[PIL.Image]] format. Got List[zA] instead. Each prompt requires its own list of condition images.zNumber of image lists (z) must match batch size (z).zHAll prompts must have the same number of condition images. Prompt 0 has z images, but prompt z has z images.)r4   
isinstancelisttupler2   type__name__	enumerate)r   r   first_elementnum_input_images_per_promptidximgss         r$   _validate_and_normalize_images/GlmImagePipeline._validate_and_normalize_images  s{   ( =CJ!Oa?mdE];;U}$5zQ efijofpeqqrs  qN## -$77 /889 :IJ  u:#6s5zlB[\f[ggijkk '*%(m#"5)IC4y77 $$?#@@TUXTYY^_bcg_h^iiqs  * (--utT
u---s   6ENpromptheightwidthr(   r;   c           
      2	   U=(       d    U R                   n[        U[        5      (       a  U/OUn[        U5      nUSL n	/ n
[	        U5       HV  u  p/ nU	(       d  XK    H  nUR                  SUS.5        M     UR                  SUS.5        U
R                  SUS./5        MX     U R                  R                  U
SUS	:  a  SOS
UUSSS9R                  U5      nUR                  S5      nUR                  S5      nU	(       a  SO[        US   5      nUb  US   R                  5       nOUR                  S   nUSU nU R                  UU	S9u  nnnnSnSnU	(       Gd  / n[        U5       H&  nUU-  nUR                  [        UUU-   5      5        M(     UU   n[        U5      S:  GaN  U R                  R!                  US   U5      R"                  n[$        R&                  " USS9nU R                  R)                  UU5      n UR+                  SS9R-                  5       n![$        R.                  " U U!5      n"/ n#[	        U"5       Hb  u  n$n%UU$   R-                  5       u  n&n'n(U R1                  U%[3        U'5      [3        U(5      5      n)U#R                  U)R5                  S5      5        Md     [$        R&                  " U#SS9nUR7                  5       n*U*SS2S	4   S-  U*SS2S	4'   U*SS2S4   S-  U*SS2S4'   U*nUbX  UR9                  5       n+[$        R:                  " U+5        Ub/  UR<                  S:X  a  [$        R>                  R;                  U+5        U R                  R@                  " S0 UDUSS.D6n,/ n-US   R                  S   n.[        U5       HC  nU RC                  U,XS	-    U.UUU-  5      n/U R1                  U/UU5      n0U-R                  U05        ME     [$        R&                  " U-SS9n0Sn1Sn2Ub  Ub  [E        [$        R.                  " UU5      5      n2UR+                  SS9R-                  5       n3/ n4[        U5       H*  n$U$U-  n5U5U-   n6U4R                  [G        U3U5U6 5      5        M,     [E        [$        R.                  " UU45      5      n1U0U1U24$ )a  
Generate prior tokens for the DiT model using the AR model.

Args:
    prompt: Single prompt or list of prompts
    height: Target image height
    width: Target image width
    image: Normalized image input as List[List[PIL.Image]]. Should be pre-validated
           using _validate_and_normalize_images() before calling this method.
    device: Target device
    generator: Random generator for reproducibility

Returns:
    Tuple of:
        - prior_token_ids: Tensor of shape (batch_size, num_tokens) with upsampled prior tokens
        - prior_token_image_ids_per_sample: List of tensors, one per sample. Each tensor contains
            the upsampled prior token ids for all condition images in that sample. None for t2i.
        - source_image_grid_thw_per_sample: List of tensors, one per sample. Each tensor has shape
            (num_condition_images, 3) with upsampled grid info. None for t2i.
Nr   )r   r   text)r   r   user)rolecontentTr   Fpt)tokenizepaddingtarget_htarget_wreturn_dictreturn_tensorsri   images_per_sampler   )ri   r`   pixel_valuesdimrb   rQ   cuda)rp   	do_sample	input_idsr    )$_execution_devicer   strr4   r   rf   rK   apply_chat_templater   getitemrd   ru   rc   extendrM   get_image_featurespooler_outputr   catget_image_tokensprodre   splitr   rg   squeezecloneinitial_seedmanual_seedr   r   generater~   r   rh   )7r^   r   r   r   r   r(   r;   prompt_listr   r`   all_messagesr   pr   imginputsri   r   num_condition_imagesnum_grids_per_samplefirst_sample_gridsrp   large_image_offsetr   r   prior_token_image_idssource_image_grid_thwsource_indices
sample_idxbasesource_gridsprior_token_image_embedprior_token_image_ids_d32split_sizesprior_ids_per_sourceupsampled_prior_idsrl   	prior_idsrm   rn   ro   	upsampledupsampled_gridsseedrw   all_prior_token_idsmax_input_lengthprior_token_ids_d32prior_token_ids prior_token_image_ids_per_sample source_image_grid_thw_per_sampletokens_per_imagetokens_per_sample	start_idxend_idxs7                                                          r$   generate_prior_tokens&GlmImagePipeline.generate_prior_tokensA  s   : 1411 #-VS"9"9vhv%
 !D=,FCG# :CNNGc#BC &NNFA67&W!E FG - 33&ND 4 
 "V* 	  $45"JJ':; %5q#eAh-(#4Q#7#<#<#>  $2#7#7#:  ,,A-AB?C?^?^-@P @_ @
<*GW
 !% $  N#J/
!$88%%eD$9M2M&NO 0 *.9L< 1$*.*F*F*Y*Y>*L+- ( +0))4KQR*S',0,H,H,Y,Y+\-) +//B/7>>@',{{3Lk'Z$&(#$-.B$CLAy*1o446GAq! $ 8 8CFCPQF SI'..y/@/@/CD %D ).		2E1(M%"."4"4"6(71(=(A1%(71(=(A1%(7%
  ))+Dd#!fkkV&;

&&t,..77 

)
 !!+.44R8$C #'"B"BAg&(8:LgX_N_# #667JGU\]O&&7 %  ))$7Q? ,0(+/( ,1F1R/3EKK@UWk4l/m,499b9AHHJ ":& 44	#&::!((-=i-P)QR ' 04EKK@UWh4i/j, @Bbbbr&   c                 ,   [        U[        5      (       a  U/n/ nU Hs  n[        R                  " SU5      [        R                  " SU5      -   [        R                  " SU5      -   [        R                  " SU5      -   nUR	                  U5        Mu     U$ )zQExtract glyph texts from prompt(s). Returns a list of lists for batch processing.z	'([^']*)'z\u201c([^\u201c\u201d]*)\u201dz	"([^"]*)"u   「([^「」]*)」)r   r   refindallrf   )r^   r   all_ocr_textsr   	ocr_textss        r$   get_glyph_texts GlmImagePipeline.get_glyph_texts  s    fc""XFA

<+**>BC**\1-. **2A67    +  r&      max_sequence_lengthr   c                    U=(       d    U R                   nU=(       d    U R                  R                  nU R                  U5      n/ nU GH\  n[	        U5      S:X  a  S/nU R                  UUSS9R                  nU V	s/ s H.  oR
                  R                  /[	        U5      S-   S-  -  U	-   PM0     nn	[        S U 5       5      n
[        R                  " U V	s/ s H%  n	S/[	        U	5      -  S/U
[	        U	5      -
  -  -   PM'     sn	US9n[        R                  " U V	s/ s H+  n	XR
                  R                  /U
[	        U	5      -
  -  -   PM-     sn	US9nU R                  XS	9nUR                  UR                  5          R                  S5      nUR                  U5        GM_     [        S
 U 5       5      n/ nU H  nUR                  S5      U:  ai  [        R                   " UR                  S5      UUR                  S5      -
  UR                  S5      UUR                  S9n[        R"                  " UU/SS9nUR                  U5        M     [        R"                  " USS9nUR%                  X4S9$ s  sn	f s  sn	f s  sn	f )z2Get glyph embeddings for each prompt in the batch.r    T)
max_length
truncationr   rQ   c              3   8   #    U  H  n[        U5      v   M     g 7fN)r4   ).0
input_ids_s     r$   	<genexpr>5GlmImagePipeline._get_glyph_embeds.<locals>.<genexpr>  s     IyS__ys   r(   )attention_maskc              3   B   #    U  H  oR                  S 5      v   M     g7f)r   N)size)r   embs     r$   r   r     s     B1A#((1++1As   r(   r   r   )r   rL   r   r   r4   rJ   r   pad_token_idmaxr   tensorlast_hidden_statebool	unsqueezerf   r  zerosr   r   )r^   r   r   r(   r   all_glyph_textsall_glyph_embedsglyph_textsr   r   r   r  rw   glyph_embedsmax_seq_lenpadded_embedsr  pads                     r$   _get_glyph_embeds"GlmImagePipeline._get_glyph_embeds  s    14110**00 ..v6*K;1$!d. '  i	  ktjs\f,,-#i.12D1IJZWjs   IyIIJ"\\`ij`iR\!s:&!
S_0L)MM`ijN  '0&/
 .."="=!>*sS]B^!__&/ I ''	'QG"44^5H5H5JKUUVWXL##L13 +8 B1ABB#Cxx{[(kk#((1+{SXXa[/H#((ST+^dloluluviic
2  %	 $ yyA6f::;
 ks   ?5I;,J 
"2J
Tr   do_classifier_free_guidancenum_images_per_promptnegative_prompt_embedsc	                    U=(       d    U R                   n[        U[        5      (       a  U/OUnUb  [        U5      n	OUR                  S   n	Uc  U R                  XXg5      nUS:  a  UR                  USS9nU(       aI  UcF  Sn
[        U
[        5      (       a  X/-  OU
n
U R                  XXg5      nUS:  a  UR                  USS9nXE4$ )a  
Encodes the prompt into text encoder hidden states.

Args:
    prompt (`str` or `list[str]`, *optional*):
        prompt to be encoded
    do_classifier_free_guidance (`bool`, *optional*, defaults to `True`):
        Whether to use classifier free guidance or not.
    num_images_per_prompt (`int`, *optional*, defaults to 1):
        Number of images that should be generated per prompt. torch device to place the resulting embeddings on
    prompt_embeds (`torch.Tensor`, *optional*):
        Pre-generated text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt weighting. If not
        provided, text embeddings will be generated from `prompt` input argument.
    device: (`torch.device`, *optional*):
        torch device
    dtype: (`torch.dtype`, *optional*):
        torch dtype
    max_sequence_length (`int`, defaults to `2048`):
        Maximum sequence length in encoded prompt. Can be set to other values but may lead to poorer results.
r   r   r   r   )r   r   r   r4   rd   r  repeat_interleave)r^   r   r  r  rI   r  r(   r   r   r   negative_prompts              r$   encode_promptGlmImagePipeline.encode_prompt!  s    > 1411'44&&VJ&,,Q/J  226PV^M !1$);;<QWX;YM '+A+I O@J?\_@`@`j+<<fuO%)%;%;Obh%p"$q()?)Q)QRgmn)Q)o&44r&   c	                 $   Ub  UR                  U5      $ UU[        U5      U R                  -  [        U5      U R                  -  4n	[        U[        5      (       a*  [        U5      U:w  a  [        S[        U5       SU S35      e[        XXeS9nU$ )Nz/You have passed a list of generators of length z+, but requested an effective batch size of z@. Make sure the batch size matches the length of the generators.)r;   r(   r   )r   rg   rS   r   r   r4   r2   r   )
r^   r   num_channels_latentsr   r   r   r(   r;   rA   rd   s
             r$   prepare_latents GlmImagePipeline.prepare_latentsZ  s    ::f%%  K4000J$///	
 i&&3y>Z+GA#i.AQ R&<'gi  u&Vr&   c           
      Z  ^  Ub7  UT R                   T R                  R                  R                  -  S-  -  S:w  d-  UbL  UT R                  R                  R                  S-  -  S:w  a"  [	        ST R                   S-   SU SU S35      eUbW  [        U 4S jU 5       5      (       d=  [	        S	T R                   S
U Vs/ s H  oT R                  ;  d  M  UPM     sn 35      eUb  Ub  [	        SU SU S35      eUc  Uc  [	        S5      eUbA  [        U[        5      (       d,  [        U[        5      (       d  [	        S[        U5       35      eUc  Uc  [	        S5      eUbC  Ub@  UR                  UR                  :w  a&  [	        SUR                   SUR                   S35      eX/n[        S U 5       5      nUS:  a%  U[        U5      :  a  [	        SUS L SU	S L S35      eUS:  a  Uc  [	        S5      eUS:  a  U
c  [	        S5      eUb  Uc  Uc  [	        S5      eg g g s  snf )NrQ   r   z-`height` and `width` have to be divisible by    z	 but are z and r   c              3   @   >#    U  H  oTR                   ;   v   M     g 7fr   )_callback_tensor_inputs)r   kr^   s     r$   r   0GlmImagePipeline.check_inputs.<locals>.<genexpr>  s      F
7Y!---7Ys   z2`callback_on_step_end_tensor_inputs` has to be in z, but found zCannot forward both `prompt`: z and `prompt_embeds`: z2. Please make sure to only forward one of the two.zeProvide either `prompt` or `prompt_embeds`. Cannot leave both `prompt` and `prompt_embeds` undefined.z2`prompt` has to be of type `str` or `list` but is ziProvide either `prompt` or `prior_token_ids`. Cannot leave both `prompt` and `prior_token_ids` undefined.zu`prompt_embeds` and `negative_prompt_embeds` must have the same shape when passed directly, but got: `prompt_embeds` z != `negative_prompt_embeds` c              3   (   #    U  H  oS Lv   M
     g 7fr   r    )r   xs     r$   r   r'    s     $O<Nqd]<Ns   zv`prior_token_image_ids` and `source_image_grid_thw` must be provided together for i2i mode. Got prior_token_image_ids=z, source_image_grid_thw=zi`prior_token_ids` must be provided when `prior_token_image_ids` and `source_image_grid_thw` are provided.z`image` must be provided when `prior_token_image_ids` and `source_image_grid_thw` are provided for i2i mode, as the images are needed for VAE encoding to build the KV cache.zI`prompt_embeds` or `prompt` must also be provided with `prior_token_ids`.)rS   rO   rZ   
patch_sizer2   allr%  r   r   r   r   rd   rh   r4   )r^   r   r   r   "callback_on_step_end_tensor_inputsrI   r  r   r   r   r   r&  prior_image_inputsnum_prior_image_inputss   `             r$   check_inputsGlmImagePipeline.check_inputsl  s    $//$2B2B2I2I2T2TTWXXY]^^ ))00;;a?@AE ?@U@UXY@Y?ZZcdjckkpqvpwwxy  .9# F
7YF
 C
 C
 DTEaEaDbbn  |^  pH  |^vw  ko  kG  kG  bGpq  |^  pH  oI  J  -";08N}o ^0 0  ^ 5w  FC)@)@TZ\`IaIaQRVW]R^Q_`aa>o5{  $)?)K""&<&B&BB --:-@-@,A B.445Q8  4K!$$O<N$O!O!A%*@3GYCZ*Z--B$-N,O P))>d)J(K1N 
 "A%/*A{  "A%%-a 
 &=+@V^hii FT+@&Y pHs   ?H(H(c                     U R                   $ r   _guidance_scaler^   s    r$   guidance_scaleGlmImagePipeline.guidance_scale  s    ###r&   c                      U R                   S:  $ )Nr   r2  r4  s    r$   r  ,GlmImagePipeline.do_classifier_free_guidance  s    ##a''r&   c                     U R                   $ r   )_num_timestepsr4  s    r$   num_timestepsGlmImagePipeline.num_timesteps  s    """r&   c                     U R                   $ r   )_attention_kwargsr4  s    r$   attention_kwargs!GlmImagePipeline.attention_kwargs      %%%r&   c                     U R                   $ r   )_current_timestepr4  s    r$   current_timestep!GlmImagePipeline.current_timestep  rA  r&   c                     U R                   $ r   )
_interruptr4  s    r$   	interruptGlmImagePipeline.interrupt  s    r&   2   g      ?)r   r   pilr'   r)   r*   r5  r   r   r   crops_coords_top_leftoutput_typer   r?  callback_on_step_endr,  c                    [        U[        [        45      (       a  UR                  nU R	                  UUUUUUUUUU5
        Xl        UU l        SU l        SU l        Ub  [        U[        5      (       a  SnO3Ub!  [        U[        5      (       a  [        U5      nOUR                  S   nU R                  nU R                  UU5      n[        U
[        5      (       a  U
S   OU
nUc  U R                  UUUUUUS9u  nnnOUnUnSnUb  / nU H  n/ n U H  n![        U![         R"                  R"                  5      (       a  U!R$                  SSS2   OU!R                  SS u  n"n#U R&                  U R(                  R*                  R,                  -  n$U"U$-  U$-  n"U#U$-  U$-  n#U R.                  R1                  U!U"U#S9n!U R3                  U!5        U=(       d    U"nU=(       d    U#nM     UR3                  U 5        M     U R5                  UU R6                  U	UUUUU R8                  S	9u  pU R(                  R*                  R:                  n%U R=                  UU	-  U%UUUR8                  UU
US
9n[?        U R(                  R*                  R@                  S9n&UGbn  U&RC                  S5        [D        RF                  " U RH                  R*                  RJ                  5      RM                  SU RH                  R*                  RN                  SS5      n'[D        RF                  " U RH                  R*                  RP                  5      RM                  SU RH                  R*                  RN                  SS5      n(U'RS                  UUR8                  S9n'U(RS                  UUR8                  S9n([U        U5       GHQ  n)UU)   nUU)   n*UU)   n+U+RW                  SS9RY                  5       n,[D        RZ                  " U*U,5      n-[]        UU-5       H  u  n.n/U.RS                  UUR8                  S9n.[_        U RH                  Ra                  U.5      U
SS9n0U0U'-
  U(-  n0U R)                  U0[D        Rb                  " U5      SS2SS2S4   U/[D        Rd                  " U/S[D        Rf                  S9[D        Rh                  " SUS9[D        RF                  " U.R                  SS /US9[D        Rh                  " SUS9UU&S9	n1M     U&Rk                  5         GMT     X44n2[D        RF                  " U2/UR8                  US9n2[D        RF                  " U/UR8                  US9nU2Rm                  UU	-  S5      n2URm                  UU	-  S5      nX0R&                  -  X@R&                  -  -  U R(                  R*                  R,                  S-  -  n3Uc<  [n        Rp                  " U Rr                  R*                  Rt                  SUS-   5      SS O[n        Rv                  " U5      nURy                  [n        Rz                  5      Ry                  [n        R|                  5      nUc"  X`Rr                  R*                  Rt                  -  OUn[        U3U Rr                  R*                  R                  SS5      U Rr                  R*                  R                  SS5      U Rr                  R*                  R                  SS5      5      n4[        U Rr                  UUXgU4S 9u  pe[        U5      U lB        U R(                  R8                  n5[        [        U5      XPRr                  R                  -  -
  S5      n6U	S:  a  UR                  U	SS9n[D        Rd                  " US[D        Rf                  S9n7[D        Rd                  " US![D        Rf                  S9n8U R                  US"9 n9[        U5       GH  u  n:n;U R                  (       a  M  U;U l        URS                  U55      n<U;R                  UR                  S   5      S-
  n=Ub  U&RC                  S#5        U R)                  U<UUU7U=U2UUSU&S$9
S   R                  5       n>U R6                  (       aS  Ub  U&RC                  S%5        U R)                  U<UUU8U=U2UUSU&S$9
S   R                  5       n?U?U R                  U>U?-
  -  -   n@OU>n@U Rr                  R                  W@U;USS&9S   nUba  0 nAU H  nB[        5       UB   WAUB'   M     U" U U:U Rr                  R                  U:   WA5      nCUCR                  S'U5      nUCR                  S(U5      nU:[        U5      S-
  :X  d)  U:S-   U6:  a0  U:S-   U Rr                  R                  -  S:X  a  U9R                  5         [        (       d  GM  [        R                  " 5         GM     SSS5        SU l        U&R                  5         US):X  Gdv  URS                  U RH                  R8                  5      n[D        RF                  " U RH                  R*                  RJ                  5      RM                  SU RH                  R*                  RN                  SS5      RS                  UR                  UR8                  5      n'[D        RF                  " U RH                  R*                  RP                  5      RM                  SU RH                  R*                  RN                  SS5      RS                  UR                  UR8                  5      n(UU(-  U'-   nU RH                  R                  USU
S*9S   nU R.                  R                  UUS+9nOUnU R                  5         U(       d  U4$ [        US,9$ ! , (       d  f       GN= f)-a  
Function invoked when calling the pipeline for generation.

Args:
    prompt (`str` or `list[str]`, *optional*):
        The prompt or prompts to guide the image generation. Must contain shape info in the format '<sop>H
        W<eop>' where H and W are token dimensions (d32). Example: "A beautiful sunset<sop>36 24<eop>"
        generates a 1152x768 image.
    image: Optional condition images for image-to-image generation.
    height (`int`, *optional*):
        The height in pixels. If not provided, derived from prompt shape info.
    width (`int`, *optional*):
        The width in pixels. If not provided, derived from prompt shape info.
    num_inference_steps (`int`, *optional*, defaults to `50`):
        The number of denoising steps for DiT.
    guidance_scale (`float`, *optional*, defaults to `1.5`):
        Guidance scale for classifier-free guidance.
    num_images_per_prompt (`int`, *optional*, defaults to `1`):
        The number of images to generate per prompt.
    generator (`torch.Generator`, *optional*):
        Random generator for reproducibility.
    output_type (`str`, *optional*, defaults to `"pil"`):
        Output format: "pil", "np", or "latent".

Examples:

Returns:
    [`GlmImagePipelineOutput`] or `tuple`: Generated images.
NFr   r   )r   r   r   r   r(   r;   rb   rQ   )r   r   )r  rI   r  r   r(   r   )r   r  r   r   r   r(   r;   rA   )
num_layerswriter  r   r@   )r;   r<   .r   )r   r   )r   rQ   )	hidden_statesencoder_hidden_statesprior_token_idprior_token_droptimesteptarget_sizecrop_coordsr?  	kv_caches)r   r(   g      ?base_image_seq_len   r         ?r         ?)r#   T)totalread)
rS  rT  rU  rV  rW  rX  rY  r?  r   rZ  skip)r   rA   rI   latent)r   r;   )rM  )images)Zr   r   r
   tensor_inputsr/  r3  r>  rC  rG  r   r   r4   rd   r   r   r   PILImager  rS   rO   rZ   r*  r\   
preprocessrf   r  r  r   in_channelsr   r   rP  set_moder   r  rN   latents_meanr   latent_channelslatents_stdr   rc   r   re   r   ziprE   encode
zeros_like	full_liker
  r  next_samplerepeatnplinspacer5   num_train_timestepsarrayastypeint64float32r%   r   r9   r:  r  orderr  progress_barr   rH  expandr   r5  steplocalsr*   popupdateXLA_AVAILABLExm	mark_stepclearr(   decodepostprocessmaybe_free_model_hooksr   )Dr^   r   r   r   r   r'   r)   r*   r5  r  r;   rA   rI   r  r   r   r   rL  rM  r   r?  rN  r,  r   r   r(   normalized_imagear_generatorr   r   preprocessed_imagesprompt_imagesprompt_preprocessedr   image_heightimage_widthmultiple_ofrk  rZ  rj  rl  
prompt_idxprompt_prior_idsprompt_grid_thwr   prior_ids_per_imagecondition_imagecondition_image_prior_token_idcondition_latent_rX  r!   r#   transformer_dtypenum_warmup_stepsprior_token_drop_condprior_token_drop_uncondr{  rl   rm   latent_model_inputrW  noise_pred_condnoise_pred_uncond
noise_predcallback_kwargsr&  callback_outputssD                                                                       r$   __call__GlmImagePipeline.__call__  s
   F *-=?U,VWW1E1S1S. 	."!!	
  .!1!%*VS"9"9JJvt$<$<VJ&,,Q/J''  >>ujQ (2)T'B'By|	"**!*!!* +  `O=?_ 0E,/D, #'"$!1&(#(CBLSRUR[R[RaRaBbBb2hkhqhqrtsthu-L+"&"7"7$:J:J:Q:Q:\:\"\K$0K$?;#NL#.+#="LK..99#lZe9fC'..s3#3|F!0[E ) $**+>? "2 150B0B,,"7'#9 3** 1C 	1
- **11==&&!$99!0%% ' 	
 $t/?/?/F/F/Q/QR	'w' <<(D(DEJJ1dhhooNmNmoprstL,,txx'B'BCHHDHHOOLkLkmnpqrK'??&@S@S?TL%..m>Q>Q.RK $J/
 3J ?#CJ#O "B:"N .22r2:AAC&+kk2BK&P#GJ=ZmGnCO%C&5&8&8mNaNa&8&bO'78I[c($ )9<(G;'V$((&6.3.>.>}.MbqbRTSTRTVYk.Z'E).9WY^fkfpfp)q!&T&!A$)LL/2G2G2L1MV\$]$)KKv$F)9"+ ) 
A Ho& %%'9 0> ollK=8K8KTZ[ %.C-DML_L_hn o!((6K)KQO 5 < <ZJ_=_ab c !$9$99eG\G\>\]##..1

   KK--AA3H[^_H_`adbde)$ 	
 $$RXX.55bjjA	JP.^^22FFF^dNN!!%%&:C@NN!!%%lD9NN!!%%k48	
 *<NN/r*
&	 ")n !,,22s9~0CnnFZFZ0ZZ\]^ !1$-??@U[\?]O %ejj Y"'///4uzz"Z%89\!),1>>)*&%,ZZ0A%B"88GMM!$4593?&&v."&"2"2"4*7#2%:% + 5%5 %' #3 # # UW   337C!**62(,(8(8&8.D'6)@!)$/$9)9$)"+ )9 ) )  & "3T5H5HO^oLo5p!pJ!0J..--j!WRW-XYZ['3&(O?-3Xa[* @';D!T^^EZEZ[\E]_n'o$.229gFG$4$8$8-$XMI**A9I/IqSTuX\XfXfXlXlNlpqNq '') =LLNy - :~ "&h&jj0GTXX__99:a88!Q?GNNGMM2  TXX__889a88!Q?GNNGMM2 
 +l:GHHOOG)OTUVWE((44U4TEE 	##%8O%U33u :9s   Gm:m::
n	)r>  rC  r3  rG  r:  r]   r\   rS   )NNN)Nr   NN)Tr   NNNNr   r   )NNNNNN)=r   
__module____qualname____firstlineno____doc___optional_componentsmodel_cpu_offload_seqr%  r   r   r   r   r   r   r   rW   staticmethodr
  ru   r   Tensorrg   r~   r   r   re  rf  r   r   r(   	Generatorr   r   r   r  r  r   r/  propertyr5  r  r;  r?  rD  rH  no_gradr   EXAMPLE_DOC_STRINGrs  ndarrayr   r   dictr   r   r   r
   r   r  __static_attributes____classcell__)r3   s   @r$   rG   rG      s;   . U(/:
 
 %
 %	

 "B
 
 0
 3
@ VV V. CC-0CLOCehC	C C u|| c C TYT`T`   7.CIIOO$tD,A'BB7.7. 
d399??#	$7. 7.| 59&*,0Ycd3iYc Yc 	Yc
 D)*T1Yc t#Yc ??T)Ycv" #'#'&*$(4;d3i4; !4; t#	4;
 {{T!4;r -1%&-16:&*$(#'75d3i75 &*75  #	75
 ||d*75 !&t 375 t#75 {{T!75 !75r0 #""IjV $ $ ( ( # # & & & &   ]]_12 *. ! #%&*%) #%&DH'+-16:/3;?;?17  26 9B#'CH4d3i$&H4 ||
))//
** u||
 syy
	 
 rzz
 H4 d
H4 TzH4 !H4 9t#H4 Ud"H4 H4   #!H4" ??T%//%::TA#H4$ $%H4& ||d*'H4( !&t 3)H4* ,+H4,  $ELL1D8-H4.  $ELL1D8/H40  %S#X1H42 3H44 5H46 sCx.4/7H48 'S$'7'=>

 ! 9H4@ -1IAH4B !CH4D 
 %	'EH4 3 H4r&   rG   )r\  r]  r^  )NNNN)Nr?   );r-   r   typingr   r   numpyrs  re  r   transformersr   r   r   r   	callbacksr
   r   r\   r   modelsr   r   )models.transformers.transformer_glm_imager   pipelines.pipeline_utilsr   
schedulersr   utilsr   r   r   r   utils.torch_utilsr   pipeline_outputr   r   r   torch_xla.core.xla_modelcore	xla_modelr  r  
get_loggerr   loggerr  rg   r   r%   r   r(   r   r9   r  r  rE   rG   r    r&   r$   <module>r     sk     	    
  W W A 0 ? H 9 9 h h - 3 # #2  4..P ))MM			H	% $ 	  	
  '+(,"&!%@*t@* %,,%@* Cy4	@*
 K$@*J `h
TLL
T-2__t-C
TY\
Tx4( x4r&   