
    
3j>                         S r SSKrSSKJrJrJrJr  SSKrSSKJ	r	  SSK
JrJrJr  SSKJr  SSKJr  SS	KJr  SS
KJr  SSKJr  SSKJr  SSKJr   " S S\\5      rg)z1
Ernie-Image Pipeline for HuggingFace Diffusers.
    N)CallableListOptionalUnion)Image)	AutoModelAutoModelForCausalLMAutoTokenizer   )ErnieImageLoraLoaderMixin)AutoencoderKLFlux2)ErnieImageTransformer2DModel)DiffusionPipeline)FlowMatchEulerDiscreteScheduler)randn_tensor   )ErnieImagePipelineOutputc            "         ^  \ rS rSrSrSrSS/rS/r  S5S\S	\	S
\
S\S\S\\   S\\   4U 4S jjjr\S 5       r\S 5       r\R(                  " 5            S6S\S\R,                  S\S\S\\   S\S\S\4S jj5       r\R(                  " 5        S7S\\\\   4   S\R,                  S\S\\R8                     4S jj5       r\S\R8                  S\R8                  4S j5       r\S\R8                  S\R8                  4S j5       r \S\\R8                     S\R,                  S \RB                  S!\4S" j5       r"\R(                  " 5       SS#SSS$S%SSSSSS&S'SS/S'4S\\\\\   4      S(\\\\\   4      S\S\S)\S*\S\S+\\RF                     S\\R8                     S,\$\RJ                     S-  S-\$\RJ                     S-  S.\S/\&S0\\'\\\(/S4      S1\\   S2\&4 S3 jj5       r)S4r*U =r+$ )8ErnieImagePipeline#   a  
Pipeline for text-to-image generation using ErnieImageTransformer2DModel.

This pipeline uses:
- A custom DiT transformer model
- A Flux2-style VAE for encoding/decoding latents
- A text encoder (e.g., Qwen) for text conditioning
- Flow Matching Euler Discrete Scheduler
z"pe->text_encoder->transformer->vaepepe_tokenizerlatentsNtransformervaetext_encoder	tokenizer	schedulerc           
         > [         TU ]  5         U R                  UUUUUUUS9  [        U SS 5      (       a2  S[	        U R
                  R                  R                  5      -  U l        g SU l        g )N)r   r   r   r   r   r   r   r         )	super__init__register_modulesgetattrlenr   configblock_out_channelsvae_scale_factor)	selfr   r   r   r   r   r   r   	__class__s	           n/home/wildlama/miniconda3/lib/python3.13/site-packages/diffusers/pipelines/ernie_image/pipeline_ernie_image.pyr#   ErnieImagePipeline.__init__3   su     	#%% 	 	
 SZZ^`egkRlRlc$((//*L*L&M Nrt    c                     U R                   $ )N_guidance_scaler*   s    r,   guidance_scale!ErnieImagePipeline.guidance_scaleI   s    ###r.   c                      U R                   S:  $ )N      ?r0   r2   s    r,   do_classifier_free_guidance.ErnieImagePipeline.do_classifier_free_guidanceM   s    ##c))r.      promptdevicewidthheightsystem_prompttemperaturetop_preturnc                 `   [         R                  " XUS.SS9n/ n	Ub  U	R                  SUS.5        U	R                  SUS.5        U R                  R	                  U	SSS9n
U R                  U
S	S
9R                  U5      nU R                  R                  " S0 UDU R                  R                  US:g  =(       d    US:g  UUU R                  R                  U R                  R                  S.D6nUS   US   R                  S   S nU R                  R                  USS9R                  5       $ )zAUse PE model to rewrite/enhance a short prompt via chat_template.)r:   r<   r=   F)ensure_asciiNsystem)rolecontentuser)tokenizeadd_generation_promptpt)return_tensorsr6   )max_new_tokens	do_sampler?   r@   pad_token_ideos_token_idr   	input_idsr   T)skip_special_tokens )jsondumpsappendr   apply_chat_templatetor   generatemodel_max_lengthrN   rO   shapedecodestrip)r*   r:   r;   r<   r=   r>   r?   r@   user_contentmessages
input_textinputs
output_idsgenerated_idss                 r,   _enhance_prompt_with_pe*ErnieImagePipeline._enhance_prompt_with_peQ   sA    zz@
 $OOX-HILAB &&::"' ; 


 "":d"CFFvNWW%% 

,,==!S(8ESL#**77**77

 #1f[&9&?&?&B&DE  ''4'PVVXXr.   r   num_images_per_promptc                    [        U[        5      (       a  U/n/ nU H  nU R                  USSSS9S   n[        U5      S:X  a2  U R                  R                  b  U R                  R                  /nOS/n[
        R                  " U/US9n[
        R                  " 5          U R                  USS9nUR                  S	   S   n	SSS5        [        U5       H  n
UR                  W	5        M     M     U$ ! , (       d  f       N7= f)
z"Encode text prompts to embeddings.TF)add_special_tokens
truncationpaddingrP   r   N)r;   )rP   output_hidden_states)
isinstancestrr   r&   bos_token_idtorchtensorno_gradr   hidden_statesrangerU   )r*   r:   r;   re   text_hiddenspidsrP   outputshidden_s              r,   encode_prompt ErnieImagePipeline.encode_prompt{   s    fc""XFA..#'	 ! 
 C 3x1}>>..:>>667C#CcU6:I++')- , 
 !..r215 ! 01##F+ 21 6  !s   !#C55
D	c                     U R                   u  pp4U R                  XUS-  SUS-  S5      n U R                  SSSSSS5      n U R                  XS-  US-  US-  5      $ )z12x2 patchify: [B, 32, H, W] -> [B, 128, H/2, W/2]r    r   r   r         )rZ   viewpermutereshaper   bchws        r,   _patchify_latents$ErnieImagePipeline._patchify_latents   sf     ]]
a,,qQ!VQQ://!Q1a3qa%aa88r.   c                     U R                   u  pp4U R                  XS-  SSX45      n U R                  SSSSSS5      n U R                  XS-  US-  US-  5      $ )z5Reverse patchify: [B, 128, H/2, W/2] -> [B, 32, H, W]r~   r    r   r   r}   r   )rZ   r   r   r   s        r,   _unpatchify_latents&ErnieImagePipeline._unpatchify_latents   s`     ]]
a//!!VQ18//!Q1a3qq&!a%Q77r.   rt   dtypetext_in_dimc                    [        U 5      nUS:X  a<  [        R                  " SSU4XS9[        R                  " SU[        R                  S94$ U  Vs/ s He  oUR	                  5       S:X  a/  UR                  S5      R                  U5      R                  U5      OUR                  U5      R                  U5      PMg     nn[        R                  " U Vs/ s H  owR                  S   PM     snU[        R                  S9n[        UR                  5       R                  5       5      n	[        R                  " XIU4XS9n
[        U5       H  u  pXzUS UR                  S   2S S 24'   M     X4$ s  snf s  snf )Nr   r;   r   )r   r   r   )r&   ro   zeroslongdimsqueezerW   rp   rZ   intmaxitem	enumerate)rt   r;   r   r   Bth
normalizedtlensTmaxtext_bthis               r,   	_pad_textErnieImagePipeline._pad_text   sI   6;;1k26OQVQ\Q\V5::R   ht
gsac&&(a-BJJqMV$''.RUU6]EUEUV[E\\gs 	 
 ||<AWWQZ<VSXS]S]^488:??$%;;5fRj)DA+,Q!''!*a'( *~
 =s   A,E.E3 2   g      @pilTnegative_promptnum_inference_stepsr3   	generatorprompt_embedsnegative_prompt_embedsoutput_typereturn_dictcallback_on_step_end"callback_on_step_end_tensor_inputsuse_pec           
         U R                   nU R                  R                  nX`l        Uc  U
c  [	        S5      eUb  U
b  [	        S5      eX0R
                  -  S:w  d  X@R
                  -  S:w  a  [	        SU R
                   35      eUb  [        U[        5      (       a  U/nSnUbM  U(       aF  U R                  b9  U R                  b,  U Vs/ s H  nU R                  UUXCS9PM     nn[        U5      nUb  [        U5      nO[        U
5      nUU-  nUc  Sn[        U[        5      (       a  U/U-  n[        U5      U:w  a  [	        SU S	35      eU
b(  U
 VVs/ s H  n[        U5        H  nUPM     M     nnnOU R                  UUU5      nU R                  (       a>  Ub(  U VVs/ s H  n[        U5        H  nUPM     M     nnnOU R                  UUU5      nX0R
                  -  nX@R
                  -  nU R                  R                   R"                  nU	c  [%        UUUU4UUUS
9n	[&        R(                  " SSUS-   5      nU R*                  R-                  USS US9  U R                  (       a  [        W5      [        U5      -   nOUnU R/                  UUUU R                  R                   R0                  S9u  n n!U R3                  US9 n"[5        U R*                  R6                  5       GH7  u  n#n$U R                  (       a@  [&        R8                  " X/SS9n%[&        R:                  " US-  4U$R=                  5       UUS9n&O(U	n%[&        R:                  " U4U$R=                  5       UUS9n&U R                  U%U&U U!SS9S   n'U R                  (       a  U'R?                  SSS9u  n(n)U(UU)U(-
  -  -   n'U R*                  RA                  U'U$U	5      RB                  n	Ub8  0 n*U H  n+[E        5       U+   U*U+'   M     U" U U#U$U*5      n,U,RG                  SU	5      n	U"RI                  5         GM:     SSS5        US:X  a  U	$ U RJ                  RL                  RN                  RQ                  SSSS5      RS                  U5      n-[&        RT                  " U RJ                  RL                  RV                  RQ                  SSSS5      S-   5      RS                  U5      n.U	U.-  U--   n	U RY                  U	5      n	U RJ                  R[                  U	SS9S   n/U/R]                  SS5      S-   S-  n/U/R_                  5       Ra                  SSSS5      Rc                  5       Re                  5       n/US:X  a8  U/ V0s/ s H+  n0[f        Rh                  " U0S-  Rk                  S5      5      PM-     n/n0U Rm                  5         U(       d  U/4$ [o        U/US9$ s  snf s  snnf s  snnf ! , (       d  f       GN= fs  sn0f ) al  
Generate images from text prompts.

Args:
    prompt: Text prompt(s)
    negative_prompt: Negative prompt(s) for CFG. Default is "".
    height: Image height in pixels (must be divisible by 16). Default: 1024.
    width: Image width in pixels (must be divisible by 16). Default: 1024.
    num_inference_steps: Number of denoising steps
    guidance_scale: CFG scale (1.0 = no guidance). Default: 4.0.
    num_images_per_prompt: Number of images per prompt
    generator: Random generator for reproducibility
    latents: Pre-generated latents (optional)
    prompt_embeds: Pre-computed text embeddings for positive prompts (optional).
        If provided, `encode_prompt` is skipped for positive prompts.
    negative_prompt_embeds: Pre-computed text embeddings for negative prompts (optional).
        If provided, `encode_prompt` is skipped for negative prompts.
    output_type: "pil" or "latent"
    return_dict: Whether to return a dataclass
    callback_on_step_end: Optional callback invoked at the end of each denoising step.
        Called as `callback_on_step_end(pipeline, step, timestep, callback_kwargs)` where `callback_kwargs`
        contains the tensors listed in `callback_on_step_end_tensor_inputs`. The callback may return a dict to
        override those tensors for subsequent steps.
    callback_on_step_end_tensor_inputs: List of tensor names passed into the callback kwargs.
        Must be a subset of `_callback_tensor_inputs` (default: `["latents"]`).
    use_pe: Whether to use the PE model to enhance prompts before generation.

Returns:
    :class:`ErnieImagePipelineOutput` with `images` and `revised_prompts`.
Nz0Must provide either `prompt` or `prompt_embeds`.zBCannot provide both `prompt` and `prompt_embeds` at the same time.r   z&Height and width must be divisible by )r<   r=   r   z1negative_prompt must have same length as prompt ())r   r;   r   r6   g        r   )sigmasr;   )rt   r;   r   r   )total)r   r    r   F)rr   timestepr   	text_lensr   r   latentgh㈵>)r   r   r      uint8)imagesrevised_prompts)8_execution_devicer   r   r1   
ValueErrorr)   rl   rm   r   r   rc   listr&   rs   rz   r7   r'   in_channelsr   ro   linspacer   set_timestepsr   r   progress_barr   	timestepscatfullr   chunkstepprev_samplelocalspopupdater   bnrunning_meanr   rW   sqrtrunning_varr   r[   clampcpur   floatnumpyr   	fromarrayastypemaybe_free_model_hooksr   )1r*   r:   r   r=   r<   r   r3   re   r   r   r   r   r   r   r   r   r   r;   r   r   ru   
batch_sizetotal_batch_sizer   ry   rt   uncond_text_hiddenslatent_hlatent_wlatent_channelsr   cfg_text_hiddensr   r   r   r   r   latent_model_inputt_batchpredpred_uncond	pred_condcallback_kwargskcallback_outputsbn_meanbn_stdr   imgs1                                                    r,   __call__ErnieImagePipeline.__call__   s   d ''  &&- >m3OPP-";abb )))Q.%:O:O2OST2TEdF[F[E\]^^ &#&&  04&TWW-@TEVEVEbcijci^_d221fE2YciFj"6lOVJ]+J%(== " Oos++./*<O:-PQ[P\\]^__ $'4[}!eDY>ZA>ZA}L[L--ff>STL ++%12H&o2HQRWXmRnQqRnq2H#&o#&*&8&8&Rg&h# 222111**11== ?"!?HhG#	G S*=*AB$$F3BK$G ++#$784;MM+"nn)&SWScScSjSjSvSv - 
) %89\!$..":":;133).G3E1)M&#jj*:Q*>)@!&&(SYafgG)0&#jj*:)<affhv]bcG ''"4$%' % (   33-1ZZqZ-A*K&9{;R)SSD ..--dAw?KK (3&(O?-3Xa[* @';D!Q'X$.229gFG##%C < :H ("N ((++**//2q!<??GDHHKK3388B1ELMPPQWXF"W, **73 e<Q? ,,r1%)Q.%%aAq1779??A%NTUfseoosSy&8&8&ABfFU 	##%9'vWWs k& \ 'p> :9p Vs%   WW
(WEW-2W(
W%)r1   r)   )NN)r9   r9   Ng333333?gffffff?)r   ),__name__
__module____qualname____firstlineno____doc__model_cpu_offload_seq_optional_components_callback_tensor_inputsr   r   r   r
   r   r   r	   r#   propertyr3   r7   ro   rq   rm   r;   r   r   rc   r   r   Tensorrz   staticmethodr   r   r   r   	Generatorr   FloatTensorboolr   dictr   __static_attributes____classcell__)r+   s   @r,   r   r   #   s    A .1(k .204u1u  u  	u
 !u 3u )*u }-u u, $ $ * * ]]_
 '+ 'Y'Y 'Y 	'Y
 'Y  }'Y 'Y 'Y 
'Y 'YR ]]_
 &'	'c49n%' '  #	'
 
ell	' 'R 95<< 9ELL 9 9 8U\\ 8ell 8 8 U\\ 2 ELL QVQ\Q\ kn    ]]_ 37;=#% #%&/3*.8<AE  KO9B#AXsDI~./AX "%T#Y"78AX 	AX
 AX !AX AX  #AX EOO,AX %,,'AX E--.5AX !%U%6%6 7$ >AX AX AX 'xc40@$0F'GHAX  -1I!AX" #AX AXr.   r   )r   rS   typingr   r   r   r   ro   PILr   transformersr   r	   r
   loadersr   modelsr   models.transformersr   pipelines.pipeline_utilsr   
schedulersr   utils.torch_utilsr   pipeline_outputr   r   rR   r.   r,   <module>r     sJ     2 2   G G 0 ( ? 9 9 - 5eX*,E eXr.   