
    
3jnY                     L   S SK Jr  S SKrS SKrS SKJs  Jr  SSK	J
r
Jr  SSKJr  SSKJr  \ " S S	\5      5       rS
\R"                  S\S\R&                  4S jrS\R&                  S\R*                  S-  S\R&                  4S jrSS\4S jjrSS\4S jjr " S S\\
5      rg)    )	dataclassN   )ConfigMixinregister_to_config)
BaseOutput   )SchedulerMixinc                   8    \ rS rSr% Sr\R                  \S'   Srg)VQDiffusionSchedulerOutput   a  
Output class for the scheduler's step function output.

Args:
    prev_sample (`torch.LongTensor` of shape `(batch size, num latent pixels)`):
        Computed sample x_{t-1} of previous timestep. `prev_sample` should be used as next model input in the
        denoising loop.
prev_sample N)	__name__
__module____qualname____firstlineno____doc__torch
LongTensor__annotations____static_attributes__r       f/home/wildlama/miniconda3/lib/python3.13/site-packages/diffusers/schedulers/scheduling_vq_diffusion.pyr   r      s     !!!r   r   xnum_classesreturnc                     [         R                  " X5      nUR                  SSS5      n[        R                  " UR                  5       R                  SS95      nU$ )aw  
Convert batch of vector of class indices into batch of log onehot vectors

Args:
    x (`torch.LongTensor` of shape `(batch size, vector length)`):
        Batch of class indices

    num_classes (`int`):
        number of classes to be used for the onehot vectors

Returns:
    `torch.Tensor` of shape `(batch size, num classes, vector length)`:
        Log onehot vectors
r   r   r   KH9)min)Fone_hotpermuter   logfloatclamp)r   r   x_onehotlog_xs       r   index_to_log_onehotr(   (   sN     yy(H1a(HIIhnn&,,,78ELr   logits	generatorc                     [         R                  " U R                  U R                  US9n[         R                  " [         R                  " US-   5      * S-   5      * nX0-   nU$ )z 
Apply gumbel noise to `logits`
)devicer*   r   )r   randshaper,   r#   )r)   r*   uniformgumbel_noisenoiseds        r   gumbel_noisedr2   =   sQ     jjfmmyQGIIuyy599EABBL"FMr   num_diffusion_timestepsc                     [         R                  " SU 5      U S-
  -  X!-
  -  U-   n[         R                  " S/U45      nUSS USS -  n[         R                  " USS S/45      nXC4$ )zB
Cumulative and non-cumulative alpha schedules.

See section 4.1.
r   r   Nnparangeconcatenate)r3   alpha_cum_startalpha_cum_endattats        r   alpha_schedulesr>   G   s     			!,-1H11LMQ^Qpq
	  ..1#s
$C	QR3s8	B
..#ab'A3
(C7Nr   c                     [         R                  " SU 5      U S-
  -  X!-
  -  U-   n[         R                  " S/U45      nSU-
  nUSS USS -  nSU-
  n[         R                  " USS S/45      nXc4$ )zB
Cumulative and non-cumulative gamma schedules.

See section 4.1.
r   r   Nr5   r6   )r3   gamma_cum_startgamma_cum_endcttone_minus_cttone_minus_ctcts          r   gamma_schedulesrF   W   s     			!,-1H11LMQ^Qpq
	  ..1#s
$CGM $}Sb'99L	
\	B
..#ab'A3
(C7Nr   c                   n   \ rS rSrSrSr\     SS\S\S\S\S\S	\4S
 jj5       r	SS\S\
\R                  -  4S jjr  S S\R                  S\R                  S\R                   S\R"                  S-  S\S\\-  4S jjrS rS\R                  S\R                   S\R                  S\4S jrS rSrg)!VQDiffusionScheduleri   a  
A scheduler for vector quantized diffusion.

This model inherits from [`SchedulerMixin`] and [`ConfigMixin`]. Check the superclass documentation for the generic
methods the library implements for all schedulers such as loading and saving.

Args:
    num_vec_classes (`int`):
        The number of classes of the vector embeddings of the latent pixels. Includes the class for the masked
        latent pixel.
    num_train_timesteps (`int`, defaults to 100):
        The number of diffusion steps to train the model.
    alpha_cum_start (`float`, defaults to 0.99999):
        The starting cumulative alpha value.
    alpha_cum_end (`float`, defaults to 0.00009):
        The ending cumulative alpha value.
    gamma_cum_start (`float`, defaults to 0.00009):
        The starting cumulative gamma value.
    gamma_cum_end (`float`, defaults to 0.99999):
        The ending cumulative gamma value.
r   num_vec_classesnum_train_timestepsr:   r;   r@   rA   c                    Xl         U R                   S-
  U l        [        X#US9u  px[        X%US9u  pU R                   S-
  nSU-
  U	-
  U-  nSU-
  U
-
  U-  n[        R
                  " UR                  S5      5      n[        R
                  " UR                  S5      5      n[        R
                  " U	R                  S5      5      n	[        R                  " U5      n[        R                  " U5      n[        R                  " U	5      n[        R
                  " UR                  S5      5      n[        R
                  " UR                  S5      5      n[        R
                  " U
R                  S5      5      n
[        R                  " U5      n[        R                  " U5      n[        R                  " U
5      nUR                  5       U l	        UR                  5       U l
        UR                  5       U l        UR                  5       U l        UR                  5       U l        UR                  5       U l        S U l        [        R                   " ["        R$                  " SU5      S S S2   R'                  5       5      U l        g )Nr   )r:   r;   )r@   rA   float64r   r5   )	num_embed
mask_classr>   rF   r   tensorastyper#   r$   log_atlog_btlog_ctlog_cumprod_atlog_cumprod_btlog_cumprod_ctnum_inference_steps
from_numpyr7   r8   copy	timesteps)selfrJ   rK   r:   r;   r@   rA   r=   r<   rE   rB   num_non_mask_classesbtbttrR   rS   rT   rU   rV   rW   s                       r   __init__VQDiffusionScheduler.__init__   s    ) ..1,!"5fst!"5fst#~~1"frk113w} 44\\"))I./\\"))I./\\"))I./222ll3::i01ll3::i01ll3::i01333llnllnlln,224,224,224 $( ))"))A7J*KDbD*Q*V*V*XYr   NrX   r,   c                 N   Xl         [        R                  " SU R                   5      SSS2   R                  5       n[        R
                  " U5      R                  U5      U l        U R                  R                  U5      U l        U R                  R                  U5      U l	        U R                  R                  U5      U l
        U R                  R                  U5      U l        U R                  R                  U5      U l        U R                  R                  U5      U l        g)a  
Sets the discrete timesteps used for the diffusion chain (to be run before inference).

Args:
    num_inference_steps (`int`):
        The number of diffusion steps used when generating samples with a pre-trained model.
    device (`str` or `torch.device`, *optional*):
        The device to which the timesteps and diffusion process parameters (alpha, beta, gamma) should be moved
        to.
r   Nr5   )rX   r7   r8   rZ   r   rY   tor[   rR   rS   rT   rU   rV   rW   )r\   rX   r,   r[   s       r   set_timesteps"VQDiffusionScheduler.set_timesteps   s     $7 IIa!9!9:4R4@EEG	)))477?kknnV,kknnV,kknnV,"1144V<"1144V<"1144V<r   model_outputtimestepsampler*   return_dictr   c                     US:X  a  UnOU R                  XU5      n[        Xd5      nUR                  SS9nU(       d  U4$ [        US9$ )a+  
Predict the sample from the previous timestep by the reverse transition distribution. See
[`~VQDiffusionScheduler.q_posterior`] for more details about how the distribution is computer.

Args:
    log_p_x_0: (`torch.Tensor` of shape `(batch size, num classes - 1, num latent pixels)`):
        The log probabilities for the predicted classes of the initial latent pixels. Does not include a
        prediction for the masked class as the initial unnoised image cannot be masked.
    t (`torch.long`):
        The timestep that determines which transition matrices are used.
    x_t (`torch.LongTensor` of shape `(batch size, num latent pixels)`):
        The classes of each latent pixel at time `t`.
    generator (`torch.Generator`, or `None`):
        A random number generator for the noise applied to `p(x_{t-1} | x_t)` before it is sampled from.
    return_dict (`bool`, *optional*, defaults to `True`):
        Whether or not to return a [`~schedulers.scheduling_vq_diffusion.VQDiffusionSchedulerOutput`] or
        `tuple`.

Returns:
    [`~schedulers.scheduling_vq_diffusion.VQDiffusionSchedulerOutput`] or `tuple`:
        If return_dict is `True`, [`~schedulers.scheduling_vq_diffusion.VQDiffusionSchedulerOutput`] is
        returned, otherwise a tuple is returned where the first element is the sample tensor.
r   r   dim)r   )q_posteriorr2   argmaxr   )r\   rf   rg   rh   r*   ri   log_p_x_t_min_1	x_t_min_1s           r   stepVQDiffusionScheduler.step   sU    > q=*O"..|XNO'C#**q*1	<)i@@r   c                     [        X R                  5      nU R                  X2USS9nU R                  X2USS9nX-
  n[        R                  " USSS9nXx-
  nU R                  XsS-
  5      nXv-   U-   n	U	$ )ah  
Calculates the log probabilities for the predicted classes of the image at timestep `t-1`:

```
p(x_{t-1} | x_t) = sum( q(x_t | x_{t-1}) * q(x_{t-1} | x_0) * p(x_0) / q(x_t | x_0) )
```

Args:
    log_p_x_0 (`torch.Tensor` of shape `(batch size, num classes - 1, num latent pixels)`):
        The log probabilities for the predicted classes of the initial latent pixels. Does not include a
        prediction for the masked class as the initial unnoised image cannot be masked.
    x_t (`torch.LongTensor` of shape `(batch size, num latent pixels)`):
        The classes of each latent pixel at time `t`.
    t (`torch.Long`):
        The timestep that determines which transition matrix is used.

Returns:
    `torch.Tensor` of shape `(batch size, num classes, num latent pixels)`:
        The log probabilities for the predicted classes of the image at timestep `t-1`.
T)tx_tlog_onehot_x_t
cumulativeFr   )rl   keepdim)r(   rN   $log_Q_t_transitioning_to_known_classr   	logsumexpapply_cumulative_transitions)
r\   	log_p_x_0ru   rt   rv   log_q_x_t_given_x_0log_q_t_given_x_t_min_1qq_log_sum_expro   s
             r   rm    VQDiffusionScheduler.q_posterior   s    * -S..A"GGD H 
 #'"K"KE #L #
 + q$?  --aQ7 5E\ r   rt   ru   rv   rw   c                   U(       a.  U R                   U   nU R                  U   nU R                  U   nO-U R                  U   nU R                  U   nU R
                  U   nU(       d  USS2SSS24   R                  S5      nUSS2SS2SS24   nX5-   R                  U5      n	X R                  :H  n
U
R                  S5      R                  SU R                  S-
  S5      n
XyU
'   U(       d  [        R                  " U	W4SS9n	U	$ )a  
Calculates the log probabilities of the rows from the (cumulative or non-cumulative) transition matrix for each
latent pixel in `x_t`.

Args:
    t (`torch.Long`):
        The timestep that determines which transition matrix is used.
    x_t (`torch.LongTensor` of shape `(batch size, num latent pixels)`):
        The classes of each latent pixel at time `t`.
    log_onehot_x_t (`torch.Tensor` of shape `(batch size, num classes, num latent pixels)`):
        The log one-hot vectors of `x_t`.
    cumulative (`bool`):
        If cumulative is `False`, the single step transition matrix `t-1`->`t` is used. If cumulative is
        `True`, the cumulative transition matrix `0`->`t` is used.

Returns:
    `torch.Tensor` of shape `(batch size, num classes - 1, num latent pixels)`:
        Each _column_ of the returned matrix is a _row_ of log probabilities of the complete probability
        transition matrix.

        When non cumulative, returns `self.num_classes - 1` rows because the initial latent pixel cannot be
        masked.

        Where:
        - `q_n` is the probability distribution for the forward process of the `n`th latent pixel.
        - C_0 is a class of a latent pixel embedding
        - C_k is the class of the masked latent pixel

        non-cumulative result (omitting logarithms):
        ```
        q_0(x_t | x_{t-1} = C_0) ... q_n(x_t | x_{t-1} = C_0)
                  .      .                     .
                  .               .            .
                  .                      .     .
        q_0(x_t | x_{t-1} = C_k) ... q_n(x_t | x_{t-1} = C_k)
        ```

        cumulative result (omitting logarithms):
        ```
        q_0_cumulative(x_t | x_0 = C_0)    ...  q_n_cumulative(x_t | x_0 = C_0)
                  .               .                          .
                  .                        .                 .
                  .                               .          .
        q_0_cumulative(x_t | x_0 = C_{k-1}) ... q_n_cumulative(x_t | x_0 = C_{k-1})
        ```
Nr5   r   rk   )rU   rV   rW   rR   rS   rT   	unsqueeze	logaddexprO   expandrN   r   cat)r\   rt   ru   rv   rw   abc(log_onehot_x_t_transitioning_from_maskedlog_Q_tmask_class_masks              r   ry   9VQDiffusionScheduler.log_Q_t_transitioning_to_known_classc  s   b ##A&A##A&A##A&AAAAAAA 8FaQh7O7Y7YZ[7\4
 (3B3	2 "%003 0)33A6==b$..STBTVXY#$ ii*R SYZ[Gr   c                    UR                   S   nU R                  U   nU R                  U   nU R                  U   nUR                   S   nUR	                  USU5      nX-   R                  U5      n[        R                  " X4SS9nU$ )Nr   r   r   rk   )r.   rU   rV   rW   r   r   r   r   )r\   r   rt   bszr   r   r   num_latent_pixelss           r   r{   1VQDiffusionScheduler.apply_cumulative_transitions  s    ggaj"""GGAJHHS!./Ua IIqf!$r   )
rR   rS   rT   rU   rV   rW   rO   rN   rX   r[   )d   wJ??̔>r   r   )N)NT)r   r   r   r   r   orderr   intr$   r`   strr   r,   rd   Tensorlongr   	Generatorboolr   tuplerq   rm   ry   r{   r   r   r   r   rH   rH   i   s@   , E $'!('!)&,Z,Z !,Z 	,Z
 ,Z ,Z ,Z ,Z\= =cELL>P =6 -1 +All+A **+A   	+A
 ??T)+A +A 
$e	++AZm^aIIa$)$4$4aFKlla`daFr   rH   )r   r   )r   r   )dataclassesr   numpyr7   r   torch.nn.functionalnn
functionalr    configuration_utilsr   r   utilsr   scheduling_utilsr	   r   r   r   r   r(   r   r2   r>   rF   rH   r   r   r   <module>r      s    "     A  , 
" 
" 
"5++ # %,, *%,, 5??T3I ell S  S $i>; ir   