
    
3jK7                     j   S SK r S SKrS SKJr  S SKrS SKJr  SSKJr  SSK	J
r
JrJr  \R                  " \5      rSrSrS	r\\-   rS
rSr\ " S S5      5       r " S S5      r " S S\5      rS\S\\\   \\   4   4S jrS\R                  R:                  S\4S jrS\R:                  S\S\4S jr g)    N)	dataclass   )logging   )HookRegistry	ModelHookStateManagertaylorseer_cache)z^blocks.*attnz^transformer_blocks.*attnz ^single_transformer_blocks.*attn)z"^temporal_transformer_blocks.*attn)z^[^.]*block[^.]*\.[^.]+$)z
^proj_out$c                       \ rS rSr% SrSr\\S'   Sr\\S'   Sr	\S-  \S'   S	r
\\S
'   \R                  r\R                  S-  \S'   Sr\\   S-  \S'   Sr\\   S-  \S'   Sr\\S'   S\4S jrSrg)TaylorSeerCacheConfig   a  
Configuration for TaylorSeer cache. See: https://huggingface.co/papers/2503.06923

Attributes:
    cache_interval (`int`, defaults to `5`):
        The interval between full computation steps. After a full computation, the cached (predicted) outputs are
        reused for this many subsequent denoising steps before refreshing with a new full forward pass.

    disable_cache_before_step (`int`, defaults to `3`):
        The denoising step index before which caching is disabled, meaning full computation is performed for the
        initial steps (0 to disable_cache_before_step - 1) to gather data for Taylor series approximations. During
        these steps, Taylor factors are updated, but caching/predictions are not applied. Caching begins at this
        step.

    disable_cache_after_step (`int`, *optional*, defaults to `None`):
        The denoising step index after which caching is disabled. If set, for steps >= this value, all modules run
        full computations without predictions or state updates, ensuring accuracy in later stages if needed.

    max_order (`int`, defaults to `1`):
        The highest order in the Taylor series expansion for approximating module outputs. Higher orders provide
        better approximations but increase computation and memory usage.

    taylor_factors_dtype (`torch.dtype`, defaults to `torch.bfloat16`):
        Data type used for storing and computing Taylor series factors. Lower precision reduces memory but may
        affect stability; higher precision improves accuracy at the cost of more memory.

    skip_predict_identifiers (`list[str]`, *optional*, defaults to `None`):
        Regex patterns (using `re.fullmatch`) for module names to place as "skip" in "cache" mode. In this mode,
        the module computes fully during initial or refresh steps but returns a zero tensor (matching recorded
        shape) during prediction steps to skip computation cheaply.

    cache_identifiers (`list[str]`, *optional*, defaults to `None`):
        Regex patterns (using `re.fullmatch`) for module names to place in Taylor-series caching mode, where
        outputs are approximated and cached for reuse.

    use_lite_mode (`bool`, *optional*, defaults to `False`):
        Enables a lightweight TaylorSeer variant that minimizes memory usage by applying predefined patterns for
        skipping and caching (e.g., skipping blocks and caching projections). This overrides any custom
        `inactive_identifiers` or `active_identifiers`.

Notes:
    - Patterns are matched using `re.fullmatch` on the module name.
    - If `skip_predict_identifiers` or `cache_identifiers` are provided, only matching modules are hooked.
    - If neither is provided, all attention-like modules are hooked by default.

Example of inactive and active usage:

```py
def forward(x):
    x = self.module1(x)  # inactive module: returns zeros tensor based on shape recorded during full compute
    x = self.module2(x)  # active module: caches output here, avoiding recomputation of prior steps
    return x
```
   cache_interval   disable_cache_before_stepNdisable_cache_after_stepr   	max_ordertaylor_factors_dtypeskip_predict_identifierscache_identifiersFuse_lite_modereturnc                     SU R                    SU R                   SU R                   SU R                   SU R                   SU R
                   SU R                   SU R                   S	3$ )
Nz%TaylorSeerCacheConfig(cache_interval=z, disable_cache_before_step=z, disable_cache_after_step=z, max_order=z, taylor_factors_dtype=z, skip_predict_identifiers=z, cache_identifiers=z, use_lite_mode=))r   r   r   r   r   r   r   r   selfs    Z/home/wildlama/miniconda3/lib/python3.13/site-packages/diffusers/hooks/taylorseer_cache.py__repr__TaylorSeerCacheConfig.__repr__[   s    "112 3))-)G)G(H I((,(E(E'F G( )$$($=$=#> ?((,(E(E'F G!!%!7!7 8 9!//03
	
     )__name__
__module____qualname____firstlineno____doc__r   int__annotations__r   r   r   torchbfloat16r   dtyper   liststrr   r   boolr   __static_attributes__r!   r    r   r   r      s    5n NC%&s&+/cDj/Is/4~~%++,=15d3i$.5*.tCy4'.M4
# 
r    r   c                       \ rS rSr\R
                  SS4S\R                  S-  S\S\4S jjr	SS
 jr
S\\R                  S4   S	S4S jr\R                  R                  S	\\R                     4S j5       rSrg)TaylorSeerStatei   r   Fr   Nr   is_inactivec                 |    Xl         X l        X0l        SU l        S U l        0 U l        S U l        S U l        SU l        g )Nr!   )	r   r   r3   module_dtypeslast_update_steptaylor_factorsinactive_shapesdevicecurrent_step)r   r   r   r3   s       r   __init__TaylorSeerState.__init__j   sD     %9!"&68,0BDCG+/!#r    r   c                 J    SU l         S U l        0 U l        S U l        S U l        g )Nr5   )r;   r7   r8   r9   r:   r   s    r   resetTaylorSeerState.reset{   s)     $ #r    outputs.c           	         [        S U 5       5      U l        US   R                  U l        U R                  (       a  [        S U 5       5      U l        GO[        U5       GH
  u  p#SU0nU R                  S L nU(       d  U R                  U R                  -
  nUS:X  a  [        S5      eU R                  R                  U0 5      n[        U R                  5       HA  nUR                  U5      n	U	c    O,XH   U	R                  UR                  5      -
  U-  XHS-   '   MC     UR                  5        V
Vs0 s H   u  pXR                  U R                   5      _M"     snn
U R                  U'   GM     U R                  U l        g s  snn
f )Nc              3   8   #    U  H  oR                   v   M     g 7fNr+   .0outputs     r   	<genexpr>)TaylorSeerState.update.<locals>.<genexpr>   s     "FgF<<g   r   c              3   8   #    U  H  oR                   v   M     g 7frD   )shaperF   s     r   rI   rJ      s     (LG&GrK   z0Delta step cannot be zero for TaylorSeer update.r   )tupler6   r:   r3   r9   	enumerater7   r;   
ValueErrorr8   getranger   tor+   itemsr   )r   rA   ifeaturesnew_factorsis_first_update
delta_stepprev_factorsjprevorderfactors               r   updateTaylorSeerState.update   s`    #"Fg"FFaj''#((LG(L#LD (1898}"&"7"74"?&!%!2!2T5J5J!JJ!Q()[\\ $(#6#6#:#:1b#AL"4>>2+//2<!.9ntwwx~~?V.VZd-dE*	 3 VaUfUfUh*UhMEE99T%>%>??Uh*##A&  2& !% 1 1	*s   3'Fc           	      p   U R                   c  [        S5      eU R                  U R                   -
  n/ nU R                  (       a  U R                  c  [        S5      e[        [        U R                  5      5       HL  nUR                  [        R                  " U R                  U   U R                  U   U R                  S95        MN     U$ U R                  (       d  [        S5      e[        U R                  5      n[        U R                  S   5      n[        U5       H  nU R                  U   nU R                  U   n[        R                  " US   US9n[        U5       H8  n	X-  [        R                  " U	5      -  n
Xy   nXR!                  U5      U
-  -   nM:     UR                  U5        M     U$ )Nz3Cannot predict without prior initialization/update.z*Inactive shapes not set during prediction.)r+   r:   z'Taylor factors empty during prediction.r   rE   )r7   rP   r;   r3   r9   rR   lenr6   appendr)   zerosr:   r8   
zeros_likemath	factorialrS   )r   step_offsetrA   rU   num_outputs
num_ordersoutput_dtyper8   rH   r]   coeffr^   s               r   predictTaylorSeerState.predict   s     (RSS''$*?*??##+ !MNN3t1123KK,,Q/"003#{{ 4,  && !JKKd112KT0034J;'#11!4!%!4!4Q!7)).*;<P":.E(/4>>%3HHE+2F#ii&=&EEF / v& ( r    )	r;   r:   r9   r3   r7   r   r6   r8   r   )r   N)r"   r#   r$   r%   r)   r*   r+   r'   r.   r<   r?   rN   Tensorr_   compilerdisabler,   rm   r/   r!   r    r   r1   r1   i   s     49>>!	$#kkD0$ $ 	$"2u||S()2 
2> ^^ ell+    r    r1   c                   J  ^  \ rS rSrSr SS\S\S\R                  S\S\S-  4
U 4S	 jjjr	S
\R                  R                  4S jrS
\R                  R                  SS4S jr\R                  R                  S\4S j5       rS
\R                  R                  4S jrSrU =r$ )TaylorSeerCacheHook   TNr   r   r   state_managerr   c                 ^   > [         TU ]  5         Xl        X l        XPl        X0l        X@l        g rD   )superr<   r   r   r   r   ru   )r   r   r   r   ru   r   	__class__s         r   r<   TaylorSeerCacheHook.__init__   s.     	,)B&(@%$8!*r    modulec                     U$ rD   r!   r   rz   s     r   initialize_hook#TaylorSeerCacheHook.initialize_hook   s    r    r   c                 8    U R                   R                  5         g)z$
Reset state between sampling runs.
N)ru   r?   r|   s     r   reset_stateTaylorSeerCacheHook.reset_state   s     	  "r    c                 J   U R                   R                  5       nU=R                  S-  sl        UR                  nX R                  :  nX R                  -
  S-
  U R                  -  S:H  nU R
                  S L=(       a    X R
                  :  nU=(       d    U=(       d    UnXa4$ Nr   r   )ru   	get_stater;   r   r   r   )r   stater;   is_warmup_phaseis_compute_intervalis_cooldown_phaseshould_computes          r   _measure_should_compute+TaylorSeerCacheHook._measure_should_compute   s    !%!3!3!=!=!?a))&)G)GG+.L.LLqPTXTgTggkll 99Ew,ZwZwJw(T,?TCT$$r    c                 8   U R                  5       u  pEU(       aS  U R                  R                  " U0 UD6n[        U[        R
                  5      (       a  U4OUnUR                  U5        U$ UR                  5       n[        U5      S:X  a  US   $ [        U5      $ r   )
r   fn_reforiginal_forward
isinstancer)   ro   r_   rm   rb   rN   )	r   rz   argskwargsr   r   rA   wrapped_outputsoutputs_lists	            r   new_forwardTaylorSeerCacheHook.new_forward   s     $ < < >kk22DCFCG,6w,M,MwjSZOLL)N}}"%l"3q"8|AQeL>QQr    )r   r   r   ru   r   rD   )r"   r#   r$   r%   _is_statefulr'   r)   r+   r	   r<   nnModuler}   r   rp   rq   r.   r   r   r/   __classcell__)rx   s   @r   rs   rs      s    L 04++ $'+ $kk	+
 $+ #&*+ +ehhoo #%((// #d # ^^% % %	R%((// 	R 	Rr    rs   configr   c                     U R                   b  U R                   OSnU R                  b  U R                  OSnU=(       d    / U=(       d    / 4$ )zN
Resolve effective inactive and active pattern lists from config + templates.
N)r   r   )r   inactive_patternsactive_patternss      r   _resolve_patternsr      sM    
 <B;Z;Z;f77lp282J2J2Vf..\`O"O$9r99r    rz   c                   ^ [        U5      u  p#U=(       d    [        nUR                  (       aX  [        R	                  S5        [
        n[        nUR                  (       d  UR                  (       a  [        R                  S5        U R                  5        HK  u  mn[        U4S jU 5       5      n[        U4S jU 5       5      nU(       d	  U(       d  M@  [        UUUS9  MM     g)ae  
Applies the TaylorSeer cache to a given pipeline (typically the transformer / UNet).

This function hooks selected modules in the model to enable caching or skipping based on the provided
configuration, reducing redundant computations in diffusion denoising loops.

Args:
    module (torch.nn.Module): The model subtree to apply the hooks to.
    config (TaylorSeerCacheConfig): Configuration for the cache.

Example:
```python
>>> import torch
>>> from diffusers import FluxPipeline, TaylorSeerCacheConfig

>>> pipe = FluxPipeline.from_pretrained(
...     "black-forest-labs/FLUX.1-dev",
...     torch_dtype=torch.bfloat16,
... )
>>> pipe.to("cuda")

>>> config = TaylorSeerCacheConfig(
...     cache_interval=5,
...     max_order=1,
...     disable_cache_before_step=3,
...     taylor_factors_dtype=torch.float32,
... )
>>> pipe.transformer.enable_cache(config)
```
z(Using TaylorSeer Lite variant for cache.z"Lite mode overrides user patterns.c              3   R   >#    U  H  n[         R                  " UT5      v   M     g 7frD   re	fullmatchrG   patternnames     r   rI   )apply_taylorseer_cache.<locals>.<genexpr>-  s!     \J[wr||GT::J[   $'c              3   R   >#    U  H  n[         R                  " UT5      v   M     g 7frD   r   r   s     r   rI   r   .  s     XWR\\'488r   )rz   r   r3   N)r   _TRANSFORMER_BLOCK_IDENTIFIERSr   loggerinfo_PROJ_OUT_IDENTIFIERS_BLOCK_IDENTIFIERSr   r   warningnamed_modulesany_apply_taylorseer_cache_hook)rz   r   r   r   	submodulematches_inactivematches_activer   s          @r   apply_taylorseer_cacher     s    > *;6)B&%G)GO>?/.**f.F.FNN?@!//1i\J[\\XXX N$(	
 2r    r3   c                    [        [        UR                  UR                  US.S9n[        R
                  " U 5      n[        UR                  UR                  UR                  UR                  US9nUR                  U[        5        g)z
Registers the TaylorSeer hook on the specified nn.Module.

Args:
    name: Name of the module.
    module: The nn.Module to be hooked.
    config: Cache configuration.
    is_inactive: Whether this module should operate in "inactive" mode.
)r   r   r3   )init_kwargs)r   r   r   r   ru   N)r	   r1   r   r   r   check_if_exists_or_initializers   r   r   r   register_hook_TAYLORSEER_CACHE_HOOK)rz   r   r3   ru   registryhooks         r   r   r   8  s     !$*$?$?))&
M 99&AH,,"("B"B#88!'!@!@#D 4!78r    )!rf   r   dataclassesr   r)   torch.nnr   utilsr   hooksr   r   r	   
get_loggerr"   r   r   $_SPATIAL_ATTENTION_BLOCK_IDENTIFIERS%_TEMPORAL_ATTENTION_BLOCK_IDENTIFIERSr   r   r   r   r1   rs   rN   r,   r-   r   r   r   r.   r   r!   r    r   <module>r      s     	 !    8 8 
		H	%+ ( $
 )P %!EHm!m 3 '  L
 L
 L
^Y Yx/R) /Rd:3 :d3ic>R8S :3
588?? 3
<Q 3
l!9II!9!!9 !9r    