
    
3j\              
          S SK r S SKJr  S SKrS SKrS SKJs  Jr	  S SK
Jr  S SKJrJrJrJr  SSKJrJr  SSKJr  SSKJrJr  SS	KJr  SS
KJr  SSKJrJrJ r   SSK!J"r"  SSK#J$r$  SSK%J&r&  \" 5       (       a  S SK'J(s  J)r*  Sr+OSr+\RX                  " \-5      r.Sr/S r0S r1S r2S S jr3S!S jr4S"S jr5    S#S\6S-  S\7\Rp                  -  S-  S\9\6   S-  S\9\:   S-  4S jjr; " S S\5      r<g)$    N)Callable)Image)	BertModelBertTokenizerQwen2TokenizerQwen2VLForConditionalGeneration   )MultiPipelineCallbacksPipelineCallback)VaeImageProcessor)AutoencoderKLMagvitEasyAnimateTransformer3DModel)DiffusionPipeline)FlowMatchEulerDiscreteScheduler)is_torch_xla_availableloggingreplace_example_docstring)randn_tensor)VideoProcessor   )EasyAnimatePipelineOutputTFaw  
    Examples:
        ```py
        >>> import torch
        >>> from diffusers import EasyAnimateInpaintPipeline
        >>> from diffusers.pipelines.easyanimate.pipeline_easyanimate_inpaint import get_image_to_video_latent
        >>> from diffusers.utils import export_to_video, load_image

        >>> pipe = EasyAnimateInpaintPipeline.from_pretrained(
        ...     "alibaba-pai/EasyAnimateV5.1-12b-zh-InP-diffusers", torch_dtype=torch.bfloat16
        ... )
        >>> pipe.to("cuda")

        >>> prompt = "An astronaut hatching from an egg, on the surface of the moon, the darkness and depth of space realised in the background. High quality, ultrarealistic detail and breath-taking movie-like camera shot."
        >>> validation_image_start = load_image(
        ...     "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/astronaut.jpg"
        ... )

        >>> validation_image_end = None
        >>> sample_size = (448, 576)
        >>> num_frames = 49
        >>> input_video, input_video_mask = get_image_to_video_latent(
        ...     [validation_image_start], validation_image_end, num_frames, sample_size
        ... )

        >>> video = pipe(
        ...     prompt,
        ...     num_frames=num_frames,
        ...     negative_prompt="Twisted body, limb deformities, text subtitles, comics, stillness, ugliness, errors, garbled text.",
        ...     height=sample_size[0],
        ...     width=sample_size[1],
        ...     video=input_video,
        ...     mask_video=input_video_mask,
        ... )
        >>> export_to_video(video.frames[0], "output.mp4", fps=8)
        ```
c                    [        U [        R                  5      (       aI  [        R                  R                  R                  U R                  S5      USSS9R                  S5      n O[        U [        R                  5      (       a0  U R                  US   US   45      n [        R                  " U 5      n On[        U [        R                  5      (       aD  [        R                  " U 5      R                  US   US   45      n [        R                  " U 5      n O[        S5      e[        U [        R                  5      (       d8  [        R                  " U 5      R!                  SSS5      R#                  5       S-  n U $ )	z\
Preprocess a single image (PIL.Image, numpy.ndarray, or torch.Tensor) to a resized tensor.
r   bilinearFsizemodealign_cornersr   zKUnsupported input type. Expected PIL.Image, numpy.ndarray, or torch.Tensor.   g     o@)
isinstancetorchTensornn
functionalinterpolate	unsqueezesqueezer   resizenparrayndarray	fromarray
ValueError
from_numpypermutefloat)imagesample_sizes     v/home/wildlama/miniconda3/lib/python3.13/site-packages/diffusers/pipelines/easyanimate/pipeline_easyanimate_inpaint.pypreprocess_imager3   Z   s#    %&&##//OOA[zQV 0 

'!* 	 
E5;;	'	'k!nk!n=>	E2::	&	&&--{1~{1~.NOfgg eU\\**  '//1a8>>@5HL    c                 z   SnSnU GbW  [        U [        5      (       a  U  Vs/ s H  n[        Xc5      PM     nnO[        X5      n[        U[        5      (       a  [        R                  " U Vs/ s H"  ofR                  S5      R                  S5      PM$     snSS9n[        R                  " USS2SS2SS24   SSUSS/5      nXSS2SS2S[        U5      24'   O:[        R                  " UR                  S5      R                  S5      SSUSS/5      n[        R                  " USS2SS24   5      n[        U[        5      (       a  SUSS2SS2[        U5      S24'   OSUSS2SS2SS24'   Ub  [        U[        5      (       a  U Vs/ s H  n[        Xc5      PM     n	n[        R                  " U	 Vs/ s H"  ofR                  S5      R                  S5      PM$     snSS9n
XSS2SS2[        U
5      * S24'   SUSS2SS2[        U	5      * S24'   XE4$ [        X5      n	U	R                  S5      R                  S5      USS2SS2SS24'   SUSS2SS2SS24'   XE4$ U cC  [        R                  " SSX#S   US   /5      n[        R                  " SSX#S   US   /5      S-  nXE4$ s  snf s  snf s  snf s  snf )	z
Generate latent representations for video from start and end images. Inputs can be PIL.Image, numpy.ndarray, or
torch.Tensor.
Nr   r   r   dim   r	   )r   listr3   r    catr%   tilelen
zeros_likezerosones)validation_image_startvalidation_image_end
num_framesr1   input_videoinput_video_maskimgimage_startstart_video	image_end	end_videos              r2   get_image_to_video_latentrK   u   s   
 K),d33I_`I_#+C=I_K`K*+AOK k4(()):EF+3q!++A.+FK  **[Arr%:Q:qRS<TUK4?10K 0001**%%a(2215Az1a(K !++K2A2,>?k4((9<Q3{#3#556),Q12X&  +.55K_`K_C-c?K_	`!II>GHis]]1%//2iH	 8AAq3y>/"334<= AI'8!89 (( --AO	)2)<)<Q)?)I)I!)LAq"#I&./ Ars+ (( 
 	'kk1a^[QR^"TU ::q!ZQUV&XY\__((_ a G. aHs   J).)J.J37)J8c                    UnUnU u  pVXV-  nXtU-  :  a  Un[        [        XE-  U-  5      5      n	OUn	[        [        X6-  U-  5      5      n[        [        XH-
  S-  5      5      n
[        [        X9-
  S-  5      5      nX4X-   X-   44$ )Ng       @)intround)src	tgt_width
tgt_heighttwthhwrresize_heightresize_widthcrop_top	crop_lefts               r2   get_resize_crop_region_for_gridr[      s    	B	BDA	AG}5!,-E"&1*-.5",345HE2,345I 8#;Y=U"VVVr4   c                     UR                  [        [        SUR                  5      5      SS9nU R                  [        [        SU R                  5      5      SS9nXU-  -  nX%-  SU-
  U -  -   n U $ )a  
Rescales `noise_cfg` tensor based on `guidance_rescale` to improve image quality and fix overexposure. Based on
Section 3.4 from [Common Diffusion Noise Schedules and Sample Steps are
Flawed](https://huggingface.co/papers/2305.08891).

Args:
    noise_cfg (`torch.Tensor`):
        The predicted noise tensor for the guided diffusion process.
    noise_pred_text (`torch.Tensor`):
        The predicted noise tensor for the text-guided diffusion process.
    guidance_rescale (`float`, *optional*, defaults to 0.0):
        A rescale factor applied to the noise predictions.

Returns:
    noise_cfg (`torch.Tensor`): The rescaled noise prediction tensor.
r   T)r7   keepdim)stdr:   rangendim)	noise_cfgnoise_pred_textguidance_rescalestd_textstd_cfgnoise_pred_rescaleds         r2   rescale_noise_cfgrg      s{    " ""tE!_5I5I,J'KUY"ZHmmU1inn%= >mMG#''9: 6!>N:NR[9[[Ir4   c           	         UR                  5       nU(       a  [        USS  5      nSUS'   [        R                  " U S S 2S S 2SS2S S 2S S 24   USSS9n[        USS  5      nUS   S-
  US'   US   S:w  aA  [        R                  " U S S 2S S 2SS 2S S 2S S 24   USSS9n[        R
                  " XV/SS9nU$ Un U$ [        USS  5      n[        R                  " XSSS9nU$ )Nr   r   r   	trilinearFr   r6   )r   r:   Fr$   r    r;   )masklatentprocess_first_frame_onlylatent_sizetarget_sizefirst_frame_resizedremaining_frames_resizedresized_masks           r2   resize_maskrs      s   ++-K;qr?+AmmAqsAq!+UZ
 ;qr?+$Q!+Aq>Q'(}}Q12q!^$;[X]($ !99&9%TZ[\L 	 /L  ;qr?+}}T+]bcr4   c                    Ucm  [         R                  " SSU R                  S   4S9R                  U R                  5      n[         R
                  " U5      R                  U R                  5      nOK[         R                  " U R                  S   45      R                  U R                  U R                  5      U-  nUbF  [         R                  " U R                  5       X R                  U R                  S9US S 2S S S S 4   -  nO#[         R                  " U 5      US S 2S S S S 4   -  n[         R                  " U S:H  [         R                  " U 5      U5      nX-   n U $ )Ng            ?r   )meanr^   r   )	generatordtypedevicer9   )r    normalshapetory   exprx   r@   randnr   
randn_likewherer>   )r0   ratiorw   sigmaimage_noises        r2   add_noise_to_reference_videor      s   }$Cu{{1~6GHKKELLY		% ##EKK0

EKKN,-00u{{KeSKK

	UZUaUabAtT4-./ 	
 &&u-atT46O0PP++erk5+;+;E+BKPKELr4   num_inference_stepsry   	timestepssigmasc                    Ub  Ub  [        S5      eUb  S[        [        R                  " U R                  5      R
                  R                  5       5      ;   nU(       d  [        SU R                   S35      eU R                  " S
X2S.UD6  U R                  n[        U5      nX14$ Ub  S[        [        R                  " U R                  5      R
                  R                  5       5      ;   nU(       d  [        SU R                   S35      eU R                  " S
XBS.UD6  U R                  n[        U5      nX14$ U R                  " U4S	U0UD6  U R                  nX14$ )a  
Calls the scheduler's `set_timesteps` method and retrieves timesteps from the scheduler after the call. Handles
custom timesteps. Any kwargs will be supplied to `scheduler.set_timesteps`.

Args:
    scheduler (`SchedulerMixin`):
        The scheduler to get timesteps from.
    num_inference_steps (`int`):
        The number of diffusion steps used when generating samples with a pre-trained model. If used, `timesteps`
        must be `None`.
    device (`str` or `torch.device`, *optional*):
        The device to which the timesteps should be moved to. If `None`, the timesteps are not moved.
    timesteps (`list[int]`, *optional*):
        Custom timesteps used to override the timestep spacing strategy of the scheduler. If `timesteps` is passed,
        `num_inference_steps` and `sigmas` must be `None`.
    sigmas (`list[float]`, *optional*):
        Custom sigmas used to override the timestep spacing strategy of the scheduler. If `sigmas` is passed,
        `num_inference_steps` and `timesteps` must be `None`.

Returns:
    `tuple[torch.Tensor, int]`: A tuple where the first element is the timestep schedule from the scheduler and the
    second element is the number of inference steps.
zYOnly one of `timesteps` or `sigmas` can be passed. Please choose one to set custom valuesr   zThe current scheduler class zx's `set_timesteps` does not support custom timestep schedules. Please check whether you are using the correct scheduler.)r   ry   r   zv's `set_timesteps` does not support custom sigmas schedules. Please check whether you are using the correct scheduler.)r   ry   ry    )
r,   setinspect	signatureset_timesteps
parameterskeys	__class__r   r=   )	schedulerr   ry   r   r   kwargsaccepts_timestepsaccept_sigmass           r2   retrieve_timestepsr     s}   > !3tuu'3w/@/@AXAX/Y/d/d/i/i/k+ll .y/B/B.C Da b  	M)MfM''	!)n )) 
	 C(9(9):Q:Q(R(](](b(b(d$ee.y/B/B.C D_ `  	GvGG''	!)n )) 	 3MFMfM''	))r4   c            8       T  ^  \ rS rSrSrSr/ SQrS\S\\	-  S\
\-  S\S	\4
U 4S
 jjr          SAS\\\   -  S\S\S\\\   -  S-  S\R(                  S-  S\R(                  S-  S\R(                  S-  S\R(                  S-  S\R*                  S-  S\R,                  S-  S\4S jjrS r      SBS jrS rS r      SCS jr\S 5       r\S  5       r\S! 5       r \S" 5       r!\S# 5       r"\RF                  " 5       \$" \%5      SS$SSSS%S%S&S'SSS(SSSSSSS)SSS*/S(S+S,S4S\\\   -  S-\S-  S.\RL                  S/\RL                  S0\RL                  S1\S-  S2\S-  S3\S-  S4\'S-  S\\\   -  S-  S\S-  S5\'S-  S6\RP                  \\RP                     -  S-  S*\R(                  S-  S\R(                  S-  S\R(                  S-  S\R(                  S-  S\R(                  S-  S7\S-  S8\S9\)\\/S4   \*-  \+-  S-  S:\\   S;\'S<\'S=\'S>\\   S-  44S? jj5       5       r,S@r-U =r.$ )DEasyAnimateInpaintPipelineiJ  a~  
Pipeline for text-to-video generation using EasyAnimate.

This model inherits from [`DiffusionPipeline`]. Check the superclass documentation for the generic methods the
library implements for all the pipelines (such as downloading or saving, running on a particular device, etc.)

EasyAnimate uses one text encoder [qwen2 vl](https://huggingface.co/Qwen/Qwen2-VL-7B-Instruct) in V5.1.

Args:
    vae ([`AutoencoderKLMagvit`]):
        Variational Auto-Encoder (VAE) Model to encode and decode video to and from latent representations.
    text_encoder (`~transformers.Qwen2VLForConditionalGeneration`, `~transformers.BertModel` | None):
        EasyAnimate uses [qwen2 vl](https://huggingface.co/Qwen/Qwen2-VL-7B-Instruct) in V5.1.
    tokenizer (`~transformers.Qwen2Tokenizer`, `~transformers.BertTokenizer` | None):
        A `Qwen2Tokenizer` or `BertTokenizer` to tokenize text.
    transformer ([`EasyAnimateTransformer3DModel`]):
        The EasyAnimate model designed by EasyAnimate Team.
    scheduler ([`FlowMatchEulerDiscreteScheduler`]):
        A scheduler to be used in combination with EasyAnimate to denoise the encoded image latents.
ztext_encoder->transformer->vae)latentsprompt_embedsnegative_prompt_embedsvaetext_encoder	tokenizertransformerr   c                   > [         TU ]  5         U R                  UUUUUS9  [        U SS 5      b   U R                  R
                  R                  OSU l        [        U SS 5      b  U R                  R                  OSU l	        [        U SS 5      b  U R                  R                  OSU l        [        U R                  S9U l        [        U R                  SSSS	9U l        [        U R                  S9U l        g )
N)r   r   r   r   r   r   Tr         )vae_scale_factorF)r   do_normalizedo_binarizedo_convert_grayscale)super__init__register_modulesgetattrr   configenable_text_attention_maskr   spatial_compression_ratiovae_spatial_compression_ratiotemporal_compression_ratiovae_temporal_compression_ratior   image_processormask_processorr   video_processor)selfr   r   r   r   r   r   s         r2   r   #EasyAnimateInpaintPipeline.__init__c  s     	%# 	 	
 t]D1= ##>> 	' 3:$t2L2XDHH..^_ 	* 4;43M3YDHH//_` 	+  1$BdBde/!??!%	
  .t?a?abr4   r   TNpromptnum_images_per_promptdo_classifier_free_guidancenegative_promptr   r   prompt_attention_masknegative_prompt_attention_maskry   rx   max_sequence_lengthc           
         U
=(       d    U R                   R                  n
U	=(       d    U R                   R                  n	Ub  [        U[        5      (       a  SnO3Ub!  [        U[
        5      (       a  [        U5      nOUR                  S   nUGc  [        U[        5      (       a  SSUS./S./nOU Vs/ s H  nSSUS./S.PM     nnU Vs/ s H  oR                  R                  U/SSS	9PM      nnU R                  US
USSSSS9nUR                  U R                   R                  5      nUR                  nUR                  nU R                  (       a  U R                  UUSS9R                  S   nO[        S5      eUR!                  US5      nUR                  XS9nUR                  u  nnnUR!                  SUS5      nUR#                  UU-  US5      nUR                  U	S9nU(       Ga
  UGc  Ub   [        U[        5      (       a  SSUS./S./nOU Vs/ s H  nSSUS./S.PM     nnU Vs/ s H  oR                  R                  U/SSS	9PM      nnU R                  US
USSSSS9nUR                  U R                   R                  5      nUR                  nUR                  nU R                  (       a  U R                  UUSS9R                  S   nO[        S5      eUR!                  US5      nU(       aU  UR                  S   nUR                  XS9nUR!                  SUS5      nUR#                  X-  US5      nUR                  U	S9nXVXx4$ s  snf s  snf s  snf s  snf )as  
Encodes the prompt into text encoder hidden states.

Args:
    prompt (`str` or `list[str]`, *optional*):
        prompt to be encoded
    device: (`torch.device`):
        torch device
    dtype (`torch.dtype`):
        torch dtype
    num_images_per_prompt (`int`):
        number of images that should be generated per prompt
    do_classifier_free_guidance (`bool`):
        whether to use classifier free guidance or not
    negative_prompt (`str` or `list[str]`, *optional*):
        The prompt or prompts not to guide the image generation. If not defined, one has to pass
        `negative_prompt_embeds` instead. Ignored when not using guidance (i.e., ignored if `guidance_scale` is
        less than `1`).
    prompt_embeds (`torch.Tensor`, *optional*):
        Pre-generated text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt weighting. If not
        provided, text embeddings will be generated from `prompt` input argument.
    negative_prompt_embeds (`torch.Tensor`, *optional*):
        Pre-generated negative text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt
        weighting. If not provided, negative_prompt_embeds will be generated from `negative_prompt` input
        argument.
    prompt_attention_mask (`torch.Tensor`, *optional*):
        Attention mask for the prompt. Required when `prompt_embeds` is passed directly.
    negative_prompt_attention_mask (`torch.Tensor`, *optional*):
        Attention mask for the negative prompt. Required when `negative_prompt_embeds` is passed directly.
    max_sequence_length (`int`, *optional*): maximum sequence length to use for the prompt.
r   r   usertext)typer   )rolecontentFT)tokenizeadd_generation_prompt
max_lengthrightpt)r   paddingr   
truncationreturn_attention_maskpadding_sidereturn_tensors)	input_idsattention_maskoutput_hidden_stateszLLM needs attention_mask)rx   ry   r9   ry   )r   rx   ry   r   strr:   r=   r{   r   apply_chat_templater|   r   r   r   hidden_statesr,   repeatview)r   r   r   r   r   r   r   r   r   ry   rx   r   
batch_sizemessages_promptmr   text_inputstext_input_idsbs_embedseq_len__negative_prompts                          r2   encode_prompt(EasyAnimateInpaintPipeline.encode_prompt  s   Z 0**0034,,33*VS"9"9JJvt$<$<VJ&,,Q/J &#&& !'-3V$D#E $*
 $* !'-3W$E#F $*   nvmuhi22A3^b2cmu   ..$.&*$# ) K &..):):)A)ABK(22N$/$>$>!.. $ 1 1,=Rim !2 !-!$ !!;<<$9$@$@AVXY$Z!%((u(D,22'1%,,Q0EqI%**86K+KWVXY 5 8 8 8 G '+A+I*z/3/O/O !'-3_$M#N -<
 -<( !'-3=M$N#O -<   nvmuhi22A3^b2cmu   ..$.&*$# ) K &..):):)A)ABK(22N-8-G-G*..)-):):,#A)- *; *  -	*$& !!;<<-K-R-RShjk-l*&,2215G%;%>%>U%>%Z"%;%B%B1F[]^%_"%;%@%@Acelnp%q"-K-N-NV\-N-]*6KkkAXs   5M%M"M'7%M,c                 n   S[        [        R                  " U R                  R                  5      R
                  R                  5       5      ;   n0 nU(       a  X$S'   S[        [        R                  " U R                  R                  5      R
                  R                  5       5      ;   nU(       a  XS'   U$ )Netarw   )r   r   r   r   stepr   r   )r   rw   r   accepts_etaextra_step_kwargsaccepts_generators         r2   prepare_extra_step_kwargs4EasyAnimateInpaintPipeline.prepare_extra_step_kwargs-  s     s7#4#4T^^5H5H#I#T#T#Y#Y#[\\'*e$ (3w/@/@ATAT/U/`/`/e/e/g+hh-6k*  r4   c
           
        ^  US-  S:w  d	  US-  S:w  a  [        SU SU S35      eU	bW  [        U 4S jU	 5       5      (       d=  [        ST R                   SU	 V
s/ s H  oT R                  ;  d  M  U
PM     sn
 35      eUb  Ub  [        S	U S
U S35      eUc  Uc  [        S5      eUbA  [        U[        5      (       d,  [        U[
        5      (       d  [        S[        U5       35      eUb  Uc  [        S5      eUb  Ub  [        SU SU S35      eUb  Uc  [        S5      eUbE  UbA  UR                  UR                  :w  a&  [        SUR                   SUR                   S35      eg g g s  sn
f )N   r   z8`height` and `width` have to be divisible by 16 but are z and .c              3   @   >#    U  H  oTR                   ;   v   M     g 7fN)_callback_tensor_inputs).0kr   s     r2   	<genexpr>:EasyAnimateInpaintPipeline.check_inputs.<locals>.<genexpr>M  s      F
7Y!---7Ys   z2`callback_on_step_end_tensor_inputs` has to be in z, but found zCannot forward both `prompt`: z and `prompt_embeds`: z2. Please make sure to only forward one of the two.zeProvide either `prompt` or `prompt_embeds`. Cannot leave both `prompt` and `prompt_embeds` undefined.z2`prompt` has to be of type `str` or `list` but is zEMust provide `prompt_attention_mask` when specifying `prompt_embeds`.z'Cannot forward both `negative_prompt`: z and `negative_prompt_embeds`: zWMust provide `negative_prompt_attention_mask` when specifying `negative_prompt_embeds`.zu`prompt_embeds` and `negative_prompt_embeds` must have the same shape when passed directly, but got: `prompt_embeds` z != `negative_prompt_embeds` )r,   allr   r   r   r:   r   r{   )r   r   heightwidthr   r   r   r   r   "callback_on_step_end_tensor_inputsr   s   `          r2   check_inputs'EasyAnimateInpaintPipeline.check_inputs>  s4    B;!urzQWX^W__dejdkklmnn-9# F
7YF
 C
 C
 DTEaEaDbbn  |^  pH  |^vw  ko  kG  kG  bGpq  |^  pH  oI  J  -";08N}o ^0 0  ^ 5w  FC)@)@TZ\`IaIaQRVW]R^Q_`aa$)>)Fdee&+A+M9/9J K*++]_ 
 "-2P2Xvww$)?)K""&<&B&BB --:-@-@,A B.445Q8  C *L$7 pHs   E01E0c                 N   [        [        X-  5      U5      n[        X-
  S5      nU R                  R                  XPR                  R
                  -  S  n[        U R                  S5      (       a1  U R                  R                  XPR                  R
                  -  5        XaU-
  4$ )Nr   set_begin_index)minrM   maxr   r   orderhasattrr   )r   r   strengthry   init_timestept_startr   s          r2   get_timesteps(EasyAnimateInpaintPipeline.get_timestepsu  s    C 3 >?ATU)91=NN,,W~~7K7K-K-MN	4>>#455NN**7^^5I5I+IJ777r4   c                 `   Ub  UR                  XvS9n/ nSn[        SUR                  S   U5       HH  nXX-    nU R                  R	                  U5      S   nUR                  5       nUR                  U5        MJ     [        R                  " USS9nXR                  R                  R                  -  nUb  UR                  XvS9nU R                  R                  R                  (       a
  [        X*US9n/ nSn[        SUR                  S   U5       HH  nX-X-    nU R                  R	                  U5      S   nUR                  5       nUR                  U5        MJ     [        R                  " USS9nUU R                  R                  R                  -  nUR                  XvS9nUU4$ S nUU4$ )Nry   rx   r   r   r6   )r   rw   )r|   r_   r{   r   encoder   appendr    r;   r   scaling_factorr   add_noise_in_inpaint_modelr   )r   rk   masked_imager   r   r   rx   ry   rw   r   noise_aug_strengthnew_maskbsimask_bsnew_mask_pixel_valuesmask_pixel_values_bsmasked_image_latentss                     r2   prepare_mask_latents/EasyAnimateInpaintPipeline.prepare_mask_latents  s     77&76DHB1djjmR016*((//'215!,,.(	 1
 99X1-D((//888D#'??&?FL&&AA; i  %'!B1l003R8'3'?$'+xx7K'LQ'O$';'@'@'B$%,,-AB	 9
 $)99-B#J #7$((//:X:X#X  $8#:#:&#:#V  ))) $( )))r4   c                 (   UUUS-
  U R                   -  S-   X0R                  -  X@R                  -  4n[        U[        5      (       a*  [	        U5      U:w  a  [        S[	        U5       SU S35      eU(       d
  U	c  U(       d  U
R                  XvS9n
Sn/ n[        SU
R                  S   U5       HJ  nU
UUU-    nU R                  R                  U5      S   nUR                  5       nUR                  U5        ML     [        R                  " USS9n
XR                  R                  R                   -  n
U
R#                  XR                  S   -  SSSS5      nUR                  XvS9nU	c  [%        XXvS9n[        U R&                  [(        5      (       a'  U(       a  UOU R&                  R+                  WUU5      n	O&U(       a  UOU R&                  R-                  WUU5      n	[/        U R&                  S	5      (       a!  U(       a  XR&                  R0                  -  OU	n	OW[/        U R&                  S	5      (       a+  U	R                  U5      nUU R&                  R0                  -  n	OU	R                  U5      n	U	4nU(       a  UW4-  nU(       a  UW4-  nU$ )
Nr   z/You have passed a list of generators of length z+, but requested an effective batch size of z@. Make sure the batch size matches the length of the generators.r  r   r6   )rw   ry   rx   init_noise_sigma)r   r   r   r:   r=   r,   r|   r_   r{   r   r  sampler  r    r;   r   r	  r   r   r   r   scale_noise	add_noiser   r  )r   r   num_channels_latentsr   r   rC   rx   ry   rw   r   videotimestepis_strength_maxreturn_noisereturn_video_latentsr{   r  	new_videor  video_bsvideo_latentsnoiseoutputss                          r2   prepare_latents*EasyAnimateInpaintPipeline.prepare_latents  s_   $  !^ C CCaG888777
 i&&3y>Z+GA#i.AQ R&<'gi 
  GOOHHFH8EBI1ekk!nb1 QV,88??84Q7#??,  *	 2
 IIiQ/EHHOO:::E!LL{{1~)Eq!QPQRM),,F,HM? FXE$..*IJJ#2%8R8RS`bjlq8r#2%8P8PQ^`ego8pt~~'9::GV'NN$C$CC\ct~~'9::

6*$.."A"AA!**V, *xG''Gr4   c                     U R                   $ r   _guidance_scaler   s    r2   guidance_scale)EasyAnimateInpaintPipeline.guidance_scale  s    ###r4   c                     U R                   $ r   )_guidance_rescaler+  s    r2   rc   +EasyAnimateInpaintPipeline.guidance_rescale  s    %%%r4   c                      U R                   S:  $ )Nr   r)  r+  s    r2   r   6EasyAnimateInpaintPipeline.do_classifier_free_guidance  s    ##a''r4   c                     U R                   $ r   )_num_timestepsr+  s    r2   num_timesteps(EasyAnimateInpaintPipeline.num_timesteps  s    """r4   c                     U R                   $ r   )
_interruptr+  s    r2   	interrupt$EasyAnimateInpaintPipeline.interrupt  s    r4   1   i   2   g      @        pilr         ?gޓZӬ?rC   r  
mask_videomasked_video_latentsr   r   r   r,  r   rw   output_typereturn_dictcallback_on_step_endr   rc   r   r  r   c                 j   [        U[        [        45      (       a  UR                  n[	        US-  S-  5      n[	        US-  S-  5      nU R                  UUUU
UUUUU5	        Xl        UU l        SU l        Ub  [        U[        5      (       a  SnO3Ub!  [        U[        5      (       a  [        U5      nOUR                  S   nU R                  nU R                  b  U R                  R                  nOU R                   R                  nU R#                  UUUUU R$                  U
UUUUS9
u  nnnn[&        (       a  SnOUn[        U R(                  [*        5      (       a  [-        U R(                  UUUSS9u  nnO[-        U R(                  UUU5      u  nnU R/                  UUUS	9u  nnUSS R1                  UU-  5      nUS
:H  n Ub  UR                  u  nn!nn"n#U R2                  R5                  UR7                  SSSSS5      R9                  UU-  U!U"U#5      UUS9n$U$R;                  [<        R>                  S9n$U$R9                  UUU!Xg5      R7                  SSSSS5      n$OSn$U R@                  RB                  RD                  n%U R                   RB                  RF                  n&U&U%:H  n'U RI                  UU-  U%UUUUUUUU$UU SU'S9n(U'(       a  U(u  nn)n*OU(u  nn)UGb  US:H  RK                  5       (       GaI  [<        RL                  " U5      R;                  UU5      n+U R                   RB                  RN                  (       a0  [<        RL                  " U5      SS2SS24   R;                  UU5      n,O&[<        RL                  " U5      R;                  UU5      n,[<        RL                  " U5      R;                  UU5      nU R$                  (       a  [<        RP                  " U,/S-  5      OU,n-U R$                  (       a  [<        RP                  " U/S-  5      OUn.[<        RP                  " U-U./SS9R;                  U5      n/GOTUR                  u  nn!nn"n#U RR                  R5                  UR7                  SSSSS5      R9                  UU-  U!U"U#5      UUS9n0U0R;                  [<        R>                  S9n0U0R9                  UUU!Xg5      R7                  SSSSS5      n0U&U%:w  Ga  [<        RT                  " U0/ SQ5      n1Uc)  U$U1S:  -  [<        RV                  " U$5      U1S:  -  S-  -   n2OUn2U R                   RB                  RN                  (       a  U RY                  SU2UUUUUUU R$                  US9
u  n3n[[        SU0-
  XPR@                  RB                  R\                  5      n,U,R;                  UU5      U R@                  RB                  R^                  -  n,O%U RY                  U1U2UUUUUUU R$                  US9
u  n,nU R$                  (       a  [<        RP                  " U,/S-  5      OU,n-U R$                  (       a  [<        RP                  " U/S-  5      OUn.[<        RP                  " U-U./SS9R;                  U5      n/OSn/[<        RT                  " U0SU%SSS/5      n+[`        Rb                  " U+URe                  5       SS SSS9R;                  UU5      n+GOU&U%:w  GaH  [<        RL                  " U5      R;                  UU5      n+U R                   RB                  RN                  (       a0  [<        RL                  " U5      SS2SS24   R;                  UU5      n,O&[<        RL                  " U5      R;                  UU5      n,[<        RL                  " U5      R;                  UU5      nU R$                  (       a  [<        RP                  " U,/S-  5      OU,n-U R$                  (       a  [<        RP                  " U/S-  5      OUn.[<        RP                  " U-U./SS9R;                  U5      n/Ou[<        RL                  " U$SS2SS24   5      n+[<        RT                  " U+SU%SSS/5      n+[`        Rb                  " U+URe                  5       SS SSS9R;                  UU5      n+Sn/U&U%:w  a  W,R                  S   n4UR                  S   n5U%U4-   U5-   U R                   RB                  RF                  :w  aV  [g        SU R                   RB                   SU R                   RB                  RF                   SU% SU4 SU5 S U%U5-   U4-    S!35      eU Ri                  X5      n6U R$                  (       a0  [<        RP                  " UU/5      n[<        RP                  " UU/5      nUR;                  US"9nUR;                  US"9n[        U5      XR(                  Rj                  -  -
  n7[        U5      U l6        U Ro                  US#9 n8[q        U5       GH  u  n9n:U Rr                  (       a  M  U R$                  (       a  [<        RP                  " U/S-  5      OUn;[u        U R(                  S$5      (       a  U R(                  Rw                  U;U:5      n;[<        Rx                  " U:/U;R                  S   -  US"9R;                  U;R                  S9n<U R!                  U;U<UU/SS%9S   n=U=Re                  5       S   U R@                  RB                  RD                  :w  a  U=R{                  SSS9u  n=n3U R$                  (       a  U=R{                  S5      u  n>n?U>U	U?U>-
  -  -   n=U R$                  (       a  US&:  a  [}        U=W?US'9n=U R(                  R~                  " U=U:U40 U6DS(S0D6S   nU&U%:X  a  W*n@U+nAU9[        U5      S-
  :  a  UU9S-      nB[        U R(                  [*        5      (       a3  U R(                  R                  W@[<        Rx                  " WB/U)5      5      n@O2U R(                  R                  W@U)[<        Rx                  " WB/5      5      n@SWA-
  W@-  UAU-  -   nUb\  0 nCU H  nD[        5       UD   WCUD'   M     U" U U9U:WC5      nEUER                  S)U5      nUER                  S*U5      nUER                  S+U5      nU9[        U5      S-
  :X  d)  U9S-   U7:  a0  U9S-   U R(                  Rj                  -  S:X  a  U8R                  5         [&        (       d  GM  [        R                  " 5         GM     SSS5        US,:X  d^  SU R@                  RB                  R^                  -  U-  nU R@                  R                  USS-9S   nU R                  R                  UUS.9nOUnU R                  5         U(       d  U4$ [        US/9$ ! , (       d  f       N= f)0ad  
The call function to the pipeline for generation with HunyuanDiT.

Examples:
    prompt (`str` or `list[str]`, *optional*):
        The prompt or prompts to guide image generation. If not defined, you need to pass `prompt_embeds`.
    num_frames (`int`, *optional*):
        Length of the video to be generated in seconds. This parameter influences the number of frames and
        continuity of generated content.
    video (`torch.FloatTensor`, *optional*):
        A tensor representing an input video, which can be modified depending on the prompts provided.
    mask_video (`torch.FloatTensor`, *optional*):
        A tensor to specify areas of the video to be masked (omitted from generation).
    masked_video_latents (`torch.FloatTensor`, *optional*):
        Latents from masked portions of the video, utilized during image generation.
    height (`int`, *optional*):
        The height in pixels of the generated image or video frames.
    width (`int`, *optional*):
        The width in pixels of the generated image or video frames.
    num_inference_steps (`int`, *optional*, defaults to 50):
        The number of denoising steps. More denoising steps usually lead to a higher quality image but slower
        inference time. This parameter is modulated by `strength`.
    guidance_scale (`float`, *optional*, defaults to 5.0):
        A higher guidance scale value encourages the model to generate images closely linked to the text
        `prompt` at the expense of lower image quality. Guidance scale is effective when `guidance_scale > 1`.
    negative_prompt (`str` or `list[str]`, *optional*):
        The prompt or prompts to guide what to exclude in image generation. If not defined, you need to provide
        `negative_prompt_embeds`. This parameter is ignored when not using guidance (`guidance_scale < 1`).
    num_images_per_prompt (`int`, *optional*, defaults to 1):
        The number of images to generate per prompt.
    eta (`float`, *optional*, defaults to 0.0):
        A parameter defined in the [DDIM](https://huggingface.co/papers/2010.02502) paper. Only applies to the
        [`~schedulers.DDIMScheduler`] and is ignored in other schedulers. It adjusts noise level during the
        inference process.
    generator (`torch.Generator` or `list[torch.Generator]`, *optional*):
        A [`torch.Generator`](https://pytorch.org/docs/stable/generated/torch.Generator.html) for setting
        random seeds which helps in making generation deterministic.
    latents (`torch.Tensor`, *optional*):
        A pre-computed latent representation which can be used to guide the generation process.
    prompt_embeds (`torch.Tensor`, *optional*):
        Pre-generated text embeddings. Can be used to easily tweak text inputs (prompt weighting). If not
        provided, embeddings are generated from the `prompt` input argument.
    negative_prompt_embeds (`torch.Tensor`, *optional*):
        Pre-generated negative text embeddings, aiding in fine-tuning what should not be represented in the
        outputs. If not provided, embeddings are generated from the `negative_prompt` argument.
    prompt_attention_mask (`torch.Tensor`, *optional*):
        Attention mask guiding the focus of the model on specific parts of the prompt text. Required when using
        `prompt_embeds`.
    negative_prompt_attention_mask (`torch.Tensor`, *optional*):
        Attention mask for the negative prompt, needed when `negative_prompt_embeds` are used.
    output_type (`str`, *optional*, defaults to `"latent"`):
        The output format of the generated image. Choose between `PIL.Image` and `np.array` to define how you
        want the results to be formatted.
    return_dict (`bool`, *optional*, defaults to `True`):
        If set to `True`, a [`~pipelines.stable_diffusion.StableDiffusionPipelineOutput`] will be returned;
        otherwise, a tuple containing the generated images and safety flags will be returned.
    callback_on_step_end (`Callable[[int, int], None]`, `PipelineCallback`, `MultiPipelineCallbacks`,
    *optional*):
        A callback function (or a list of them) that will be executed at the end of each denoising step,
        allowing for custom processing during generation.
    callback_on_step_end_tensor_inputs (`list[str]`, *optional*):
        Specifies which tensor inputs should be included in the callback function. If not defined, all tensor
        inputs will be passed, facilitating enhanced logging or monitoring of the generation process.
    guidance_rescale (`float`, *optional*, defaults to 0.0):
        Rescale parameter for adjusting noise configuration based on guidance rescale. Based on findings from
        [Common Diffusion Noise Schedules and Sample Steps are
        Flawed](https://huggingface.co/papers/2305.08891).
    strength (`float`, *optional*, defaults to 1.0):
        Affects the overall styling or quality of the generated output. Values closer to 1 usually provide
        direct adherence to prompts.

Examples:
    # Example usage of the function for generating images based on prompts.

Returns:
    [`~pipelines.stable_diffusion.StableDiffusionPipelineOutput`] or `tuple`:
        Returns either a structured output containing generated images and their metadata when `return_dict` is
        `True`, or a simpler tuple, where the first element is a list of generated images and the second
        element indicates if any of them contain "not-safe-for-work" (NSFW) content.
r   FNr   r   )
r   ry   rx   r   r   r   r   r   r   r   cpu)mu)r   r   ry   r?  r   r	   r   )r   r   )rx   T)r  r  r  r  r   r8   r6   )r   r	   r   r   r   ru   r9   )r  ri   r   zHIncorrect configuration settings! The config of `pipeline.transformer`: z	 expects z& but received `num_channels_latents`: z + `num_channels_mask`: z  + `num_channels_masked_image`: z = z[. Please verify the config of `pipeline.transformer` or your `mask_image` or `image` input.r   )totalscale_model_input)encoder_hidden_statesinpaint_latentsrC  r=  )rc   rC  r   r   r   rl   )rC  )r  rB  )frames)Lr   r   r
   tensor_inputsrM   r   r*  r/  r8  r   r:   r=   r{   _execution_devicer   rx   r   r   r   XLA_AVAILABLEr   r   r   r  r   r   
preprocessr.   reshaper|   r    float32r   r   latent_channelsin_channelsr&  r   r>   resize_inpaint_mask_directlyr;   r   r<   	ones_liker  rs   cache_mag_vaer	  rj   r$   r   r,   r   r   r4  progress_bar	enumerater9  r   rJ  tensorchunkrg   r   r  r  localspopupdatexm	mark_stepdecoder   postprocess_videomaybe_free_model_hooksr   )Fr   r   rC   r  r@  rA  r   r   r   r,  r   r   r   rw   r   r   r   r   r   rB  rC  rD  r   rc   r   r  r   r   ry   rx   timestep_devicelatent_timestepr  channelsheight_videowidth_video
init_videor  num_channels_transformerreturn_image_latentslatents_outputsr$  image_latentsrk   mask_latents
mask_inputmasked_video_latents_inputrL  mask_conditionmask_condition_tilemasked_videor   num_channels_masknum_channels_masked_imager   num_warmup_stepsrY  r  tlatent_model_inputt_expand
noise_prednoise_pred_uncondrb   init_latents_proper	init_masknoise_timestepcallback_kwargsr   callback_outputssF                                                                         r2   __call__#EasyAnimateInpaintPipeline.__call__  s   ` *-=?U,VWW1E1S1S. Vr\B&'ERK"$% 	"!*.
	
  .!1 *VS"9"9JJvt$<$<VJ&,,Q/J''(%%++E$$**E "7(,(H(H+'#9"7+I  
	
"!* =#O$Odnn&EFF-? 3_iTU.*I* .@ 3_i.*I* *.);); 3hv *< *
&	&
 $BQ-..z<Q/QR"c/JO++GJ*lK--88aAq!,44Z*5LhXdfqr 9 J
 $U]];J#++J
Hf\ddefhiklnoqrsJJ  $xx>>#'#3#3#:#:#F#F 7;OO .... $+!5 / 
   ,;)GUM,NGU !c!&&((''033FEB##**GG#(#3#3G#<QU#C#F#Fvu#UL#(#3#3G#<#?#?#NL','7'7'@'C'CFE'R$>B>^>^UYY~'9:dp
=A=]=]EII34q89cw + #())Z9S,TZ["\"_"_`e"f OYN^N^K
Hj,!%!4!4!?!?&&q!Q15=="Z/< " "@ " "0!2!2!2!G!/!7!7
JPXZ`!h!p!pq!Q" ,/CC*/**^_*U'+3&*=*CD#ooj9=PSV=VWZ\\] %
 (<''..KK262K2K (&"!!"% <</A 3L 3// (3.0DhhooFcFc( (4vu'EHfHf'f=A=V=V/(&"!!"% <</A >W >:&: CGBbBbL>A+=!>htJ  ;; 		#7"81"<=1 /
 ',ii=W0X^_&`&c&cdi&jO&*Ozz.16JAqRS2TU}}Trs0C+eijmmE (+??''033FEB##**GG#(#3#3G#<QU#C#F#Fvu#UL#(#3#3G#<#?#?#NL','7'7'@'C'CFE'R$>B>^>^UYY~'9:dp
=A=]=]EII34q89cw + #())Z9S,TZ["\"_"_`e"f''
1bqb5(9:zz$,@!Q(JK}}Trs0C+eijmmE #' $';; , 2 21 5(<(B(B1(E%$'88;TT##**667 !^_c_o_o_v_v^w x((//;;<<bcwbx y-->,??_`y_z.1JJM^^_ `UU  !::9J++!II'=}&MNM$)II/MOd.e$f! &(((7 5 8 8 8 G y>,?..BVBV,VV!)n%89\!),1>> BFAaAaUYYy1}%=gn"4>>+>??)-)I)IJ\^_)`& !<<.@.F.Fq.I(IRXY\\,22 ] 
 "--&*7$3 % .  
 ??$Q'488??+J+JJ$.$4$4QA$4$>MJ 339C9I9I!9L6%!2^YjGj5k!kJ338H38N!2:aq!rJ ..--j!WmHYmglmnop+/CC*7' $I3y>A--)21q5)9%dnn6UVV26..2L2L 3U\\>BRTY5Z3/ 37..2J2J 3UELL.IY<Z3/  !9}0CCiRYFYYG'3&(O?-3Xa[* @';D!Q'X$.229gFG$4$8$8-$XM-=-A-ABZ\r-s*I**A9I/IqSTuX\XfXfXlXlNlpqNq '') =LLNG - :L h&$((//8887BGHHOOGO?BE((::T_:`EE 	##%8O(66g :9s   .Lv$9v$$
v2)
r/  r*  r8  r4  r   r   r   r   r   r   )
r   TNNNNNNN   )NNNNNN)NNNTFF)/__name__
__module____qualname____firstlineno____doc__model_cpu_offload_seqr   r   r   r   r   r   r   r   r   r   r:   rM   boolr    r!   ry   rx   r   r   r   r  r  r&  propertyr,  rc   r   r5  r9  no_gradr   EXAMPLE_DOC_STRINGFloatTensorr/   	Generatorr   r   r
   r  __static_attributes____classcell__)r   s   @r2   r   r   J  sJ   * =T$c $c 6	A$c "M1	$c
 3$c 3$cT &',026-16:59>B&*$(#&`ld3i`l  #`l &*	`l
 tCy4/`l ||d*`l !&t 3`l  %||d2`l ).t(;`l t#`l {{T!`l !`lF!, #"'++/4n	81*z "HT $ $ & & ( ( # #   ]]_12 #'!##'(,26 *,'*26,-DH'+-16:59>B"' nr9B"%$*&*7A7d3iA7 $JA7   	A7
 %%A7 $//A7 d
A7 TzA7 !4ZA7 A7 tCy4/A7  #TzA7 T\A7 ??T%//%::TAA7 $A7  ||d*!A7" !&t 3#A7$  %||d2%A7& ).t(;'A7( 4Z)A7* +A7, 'Sz4'78;KKNddgkk-A7. -1I/A70  1A72 3A74 "5A76 9t#7A7 3 A7r4   r   )r=  )T)NN)NNNN)=r   typingr   numpyr(   r    torch.nn.functionalr"   r#   rj   PILr   transformersr   r   r   r   	callbacksr
   r   r   r   modelsr   r   pipelines.pipeline_utilsr   
schedulersr   utilsr   r   r   utils.torch_utilsr   r   r   pipeline_outputr   torch_xla.core.xla_modelcore	xla_modelr`  rP  
get_loggerr  loggerr  r3   rK   r[   rg   rs   r   rM   r   ry   r:   r/   r   r   r   r4   r2   <module>r     s            B 0 H 9 9 O O - - 6 ))MM			H	%$ N6:)|W&64, '+(,"&!%8*t8* %,,%8* Cy4	8*
 K$8*vM7!2 M7r4   