
    3j1                    b   S r SSKJr  SSKrSSKJr  SSKJr  SSKJ	r	  SSK
Jr  SS	KJrJr  \(       a  SS
KJr  SSKJr  0 r\\" SS9 " S S5      5       5       r " S S5      r\" S5      r S         SS jjr        SS jrSSS jjr\R4                  " 5       rS S jrSSS.S jjrg)!z
Contains the logic for automatic additional output capture with our forward decorators.
This mostly describe the hooks used and the logic to make capture thread/context safe.
    )annotationsN)
ContextVar)	dataclasswraps)TYPE_CHECKING   )is_torchdynamo_compilingrequires)nn   PreTrainedModel)torch)backendsc                  \    \ rS rSr% SrS\S'   SrS\S'   SrS	\S
'   SrS	\S'   Sr	S\S'   Sr
g)OutputRecorder'   ar  
Configuration for recording outputs from a model via hooks.

Attributes:
    target_class (Type): The class (e.g., nn.Module) to which the hook will be attached.
    index (Optional[int]): If the output is a tuple/list, optionally record only at a specific index.
    layer_name (Optional[str]): Name of the submodule to target (if needed), e.g., "transformer.layer.3.attn".
    class_name (Optional[str]): Name of the class to which the hook will be attached. Could be the suffix of class name in some cases.
    capture_initial_hidden_state  (bool): Whether to prepend the first module's input as the initial hidden state.
ztype[nn.Module]target_classr   intindexN
str | None
layer_name
class_nameTboolcapture_initial_hidden_state )__name__
__module____qualname____firstlineno____doc____annotations__r   r   r   r   __static_attributes__r       ]/home/wildlama/miniconda3/lib/python3.13/site-packages/transformers/utils/output_capturing.pyr   r   '   s7    	 "!E3N!J
!!J
!)- $-r%   r   c                  0    \ rS rSrSrS rS rS rS rSr	g)	CompileableContextVar<   a  
Convenience wrapper around a ContextVar for usage with `torch.compile`.
This behaves exactly as a `ContextVar`, except when compilation is triggered in which case it behaves as a simple
global variable. This is useful as `torch.compile` cannot trace the `get` method of `ContextVar`. This however means
that the access to the underlying variable is not thread-safe when compilation is triggered.
c                >    [        US S9U l        S U l        SU l        g )N)defaultF)r   context_var
global_var	compiling)selfnames     r&   __init__CompileableContextVar.__init__D   s    %dD9r%   c                p    U R                   (       a  U R                  $ U R                  R                  5       $ N)r.   r-   r,   get)r/   s    r&   r5   CompileableContextVar.getI   s(    >>??"##''))r%   c                r    [        5       (       a  Xl        SU l        g U R                  R	                  U5      $ )NT)r
   r-   r.   r,   set)r/   values     r&   r8   CompileableContextVar.setP   s1    #%%#O!DN##''..r%   c                    U R                   (       d  Uc  S U l        SU l         g U R                  R                  U5        g )NF)r.   r-   r,   reset)r/   tokens     r&   r<   CompileableContextVar.resetX   s/    >>U]"DO"DN""5)r%   )r.   r,   r-   N)
r   r   r    r!   r"   r1   r5   r8   r<   r$   r   r%   r&   r(   r(   <   s    
*/*r%   r(   output_collectorTc                <   ^^^ UUU4S jnU R                  U5        g)zaInstall the forward hook needed to capture the output described by `key` and `index` in `module`.c                Z  > [         R                  5       nUb  TUR                  5       ;  a  g T(       a/  TS:X  a)  [        UT   5      S:X  a  UT   R	                  US   5        [        U[        5      (       d  UT   R	                  U5        g UT   b  UT   R	                  UT   5        g g )Nhidden_statesr   )_active_collectorr5   keyslenappend
isinstancetuple)moduleargsoutputcollected_outputsr   r   keys       r&   output_capturing_hook;install_output_capuring_hook.<locals>.output_capturing_hooki   s    -113$3D3I3I3K(K'C?,BsK\]`KaGbfgGgc"))$q'2&%((c"))&1E]&c"))&-8 'r%   N)register_forward_hook)rI   rM   r   r   rN   s    ``` r&   install_output_capuring_hookrQ   d   s    
9   !67r%   c                   SSK Jn  U R                  5        H6  u  pE[        XS5      (       d  [	        XQ SU 3U5        M(  [        XQ SU 3S9  M8     U H  u  pgUR                  b  [        XR                  5      (       d1  UR                  c  M;  UR                  UR                  5      (       d  M]  UR                  b  UR                  U;  a  M|  [        XUR                  UR                  5        M     g)a  
Recursively install all output capturing hooks on all submodules of `parent_module`.
Note that we need to use this recursive approach instead of simply iterating over all modules, because we want
to respect the `capture_tasks` of all individual submodels (`PreTrainedModel` instances) in the graph. That is, once
we reach a submodel in the graph, its children should use this submodel's `capture_tasks`, but other parts of the graph
should not.
r   r   .)prefixN)modeling_utilsr   named_childrenrG   recursively_install_hooks"install_all_output_capturing_hooksr   r   endswithr   rQ   r   r   )parent_modulemodule_namecapture_tasksr   r0   rI   rM   specss           r&   rW   rW   z   s     1 &446&22%fQtf.E}U /vQtf>UV 7 $
*z-I[I[/\/\([-A-A%BRBR-S-S+0@0@0S(U[[%JlJlm $r%   c                   [         R                  [        U R                  5      5      =(       d    0 n/ nUR	                  5        H  u  pE[        U[        5      (       d  U/nU Hp  n[        U[        5      (       dF  SU;   a  SOSn[        U[        5      (       d  SOUn[        U[        5      (       d  UOSn	[        XUS9nUR                  XF45        Mr     M     Ub  UOSn[        XU5        [        U SS5        g)	z
Install the output recording hooks on all the modules in `model`. This will take care of correctly dispatching
the `_can_record_outputs` property of each individual submodels in case of composite models.
rB   r   r	   N)r   r   r    !_output_capturing_hooks_installedT)_CAN_RECORD_REGISTRYr5   str	__class__itemsrG   listr   rF   rW   setattr)
modelrT   capture_flagsr\   rM   layer_specsr]   r   r   r   s
             r&   rX   rX      s     ),,S-ABHbMM)//1+t,,&-K Ee^44,3)3E3)?)?TU
,6uc,B,Bu&LZde  #. ! 2 )VrFe];E6=r%   c                    [        U SS5      (       a  g[           [        U SS5      (       a
   SSS5        g[        U 5        SSS5        g! , (       d  f       g= f)z
Check if the model already has output capturing hooks installed, and install them if it is not already the
case.
Note that this is thread-safe, in case 2 (or more) threads want to install them concurrently.
r`   FN)getattr_hook_installation_lockrX   )rg   s    r&   maybe_install_capturing_hooksrm      sM     u95AA	  5=uEE	 
!	  	+51 
!	 	 s   AA
A)tie_last_hidden_statesc               *   ^ U4S jnU b  U" U 5      $ U$ )a  
Decorator to intercept specific layer outputs through hooks. The hooks are installed only once and lazily,
the first time output capture is requested with the `output_xxx` kwargs/config.
The implementation is fully context/thread safe, except when using `torch.compile`, as dynamo is unable to trace
through `ContextVar` methods.

Args:
    tie_last_hidden_states (`bool`, *optional*, defaults to `True`):
        Whether to overwrite `out.hidden_states[-1]` with the `out.last_hidden_state`.
        This is true for all language models and should be toggled off only if
        `out.hidden_states[-1]` has to be the hidden state before last layer norm, which
        is needed for some vision models (e.g. CLIP, SigLIP)
c                4   >^  [        T 5      U U4S j5       nU$ )Nc                  > UR                  S[        U R                  SS5      5      n[        R	                  [        U R                  5      5      =(       d    0 nU Vs0 s H4  nSU 3UR	                  SU 3[        U R                  SU 3S5      5      _M6     nnSU;   a*  UR	                  S[        U R                  SS5      5      US'   SU;   a*  UR	                  S[        U R                  SS5      5      US	'   UR                  5        VVs0 s H!  u  pWU(       d  M  UR                  SS
5      / _M#     nnn[        U5      S:  a  [        U 5        [        R                  U5      n	 T" U /UQ70 UD6n
[        R                  U	5        U GH  nUS:X  a  T(       d  Oo[        U
S5      (       a'  X   S S X'   X   R                  U
R                   5        O7[        U
S5      (       a&  X   S S X'   X   R                  U
R"                  5        [%        X   5      X'   M  US:X  aj  ['        XK   [(        5      (       aA  [        XK   5      S:X  a0  [%        X   SS S2   5      X'   [%        X   SS S2   5      U
SU-   '   M  [%        X   5      X'   GM  [%        X   5      X'   GM     USL a  U
R+                  5       n
U
$ s  snf s  snnf ! [        R                  U	5        f = f)Nreturn_dictToutput_Fcross_attentionsoutput_attentionsoutput_cross_attentionsmask_decoder_attentionsoutput_mask_decoder_attentionsr_   r   rB   vision_hidden_stateslast_hidden_state
attentionsr   r	   cross_)poprk   configra   r5   rb   rc   rd   replacerE   rm   rC   r8   r<   hasattrrF   ry   r{   rH   rG   re   to_tuple)r/   rJ   kwargsrr   capturable_flagskrecordable_keysvrL   output_tokenoutputsrM   funcrn   s               r&   wrapper4capture_outputs.<locals>.wrapped_fn.<locals>.wrapper   s    !**]GDKKX\4]^K  477DNN8KLRPR *)A !vzzGA3-PWXYWZm]b9cdd)  
 "%55=CZZ'>QSX)Y> 9: ),<<DJJJ'>QSX)YE @A KZJ_J_Ja gJa$!ef!=9b!92!=Ja g$%)-d3,001BCL6t5d5f5 "''5 )/)1 *@AA1B1G1L).).55g6R6RS *=>>1B1G1L).).55g6O6OP#():)?#@GLL(!"2"7>>3GWG\C]abCb',->-CADqD-I'J278I8NqtRSt8T2U3/',->-C'D#():)?#@GL) ), e#!**,No !h "''5s   ;J9J>J>)K Kr   )r   r   rn   s   ` r&   
wrapped_fn#capture_outputs.<locals>.wrapped_fn   s!    	t=	 
=	~ r%   r   )r   rn   r   s    ` r&   capture_outputsr      s#    AF $r%   )T)
rI   	nn.ModulerM   rb   r   r   r   r   returnNone)rZ   r   r[   rb   r\   z list[tuple[str, OutputRecorder]]r   r   r4   )rg   r   rT   r   r   r   )rg   r   r   r   )r"   
__future__r   	threadingcontextvarsr   dataclassesr   	functoolsr   typingr   import_utilsr
   r   r   r   rU   r   ra   r   r(   rC   rQ   rW   rX   Lockrl   rm   r   r   r%   r&   <module>r      s   
 #  " !    < 0   	:. .  .&!* !*J **<=  SW888(+8KO8	8,nn+.n?_n	n@>: $..* 2&T Tr%   