
    3j.-                         S SK Jr  S SKrS SKJr  SSKJr  SSKJrJ	r	J
r
Jr  SSKJr  SSKJr  SSKJrJrJrJr  \R*                  " \5      r " S	 S
\R0                  5      r\ " S S5      5       r\ " S S5      5       r\ " S S5      5       rg)    )partialN   )Cache)BaseModelOutputWithPastQuestionAnsweringModelOutput SequenceClassifierOutputWithPastTokenClassifierOutput)	AutoModel)Unpack)TransformersKwargsauto_docstringcan_return_tupleloggingc                   0   ^  \ rS rSrSrSrU 4S jrSrU =r$ )GradientCheckpointingLayer"   a  Base class for layers with gradient checkpointing.

This class enables gradient checkpointing functionality for a layer. By default, gradient checkpointing is disabled
(`gradient_checkpointing = False`). When `model.set_gradient_checkpointing()` is called, gradient checkpointing is
enabled by setting `gradient_checkpointing = True` and assigning a checkpointing function to `_gradient_checkpointing_func`.

Important:

    When using gradient checkpointing with `use_reentrant=True`, inputs that require gradients (e.g. hidden states)
    must be passed as positional arguments (`*args`) rather than keyword arguments to properly propagate gradients.

    Example:

        ```python
        >>> # Correct - hidden_states passed as positional arg
        >>> out = self.layer(hidden_states, attention_mask=attention_mask)

        >>> # Incorrect - hidden_states passed as keyword arg
        >>> out = self.layer(hidden_states=hidden_states, attention_mask=attention_mask)
        ```
Fc                   > U R                   (       a  U R                  (       a  SnU R                  R                  nSU S3nSU;   a  US   (       a  SUS'   US-  nSnSU;   a  US   b  S US'   US-  nSnS	U;   a  US	   b  S US	'   US
-  nSnSU;   a  US   b  S US'   US-  nSnU(       a)  UR	                  S5      S-   n[
        R                  U5        U R                  " [        [        TU ](  40 UD6/UQ76 $ [        TU ](  " U0 UD6$ )NFz7Caching is incompatible with gradient checkpointing in z	. Setting	use_cachez `use_cache=False`,Tpast_key_valuez `past_key_value=None`,past_key_valuesz `past_key_values=None`,
layer_pastz `layer_past=None`,,.)gradient_checkpointingtraining	__class____name__rstriploggerwarning_once_gradient_checkpointing_funcr   super__call__)selfargskwargsdo_warn
layer_namemessager   s         V/home/wildlama/miniconda3/lib/python3.13/site-packages/transformers/modeling_layers.pyr#   #GradientCheckpointingLayer.__call__;   sK   &&4==G00JOPZ|[deGf$)<&+{#00  6)f5E.F.R+/'(44 F*v6G/H/T,0()55v%&*>*J'+|$00 !..-3##G,44WUW=M5XQW5X`[_``w000     )	r   
__module____qualname____firstlineno____doc__r   r#   __static_attributes____classcell__r   s   @r*   r   r   "   s    , #"1 "1r,   r   c                     ^  \ rS rSrSrU 4S jr\\       SS\R                  S-  S\R                  S-  S\R                  S-  S\S-  S	\R                  S-  S
\R                  S-  S\S-  S\\   S\4S jj5       5       rSrU =r$ ) GenericForSequenceClassification`   modelc                 4  > [         TU ]  U5        UR                  U l        [        X R                  [
        R                  " U5      5        [        R                  " UR                  5       R                  U R                  SS9U l        U R                  5         g )NF)bias)r"   __init__
num_labelssetattrbase_model_prefixr
   from_confignnLinearget_text_confighidden_sizescore	post_initr$   configr   s     r*   r;   )GenericForSequenceClassification.__init__d   sl      ++,,i.C.CF.KLYYv557CCT__[`a
 	r,   N	input_idsattention_maskposition_idsr   inputs_embedslabelsr   r&   returnc           	         [        X R                  5      " U4UUUUUS.UD6n	U	R                  n
U R                  U
5      nUb  UR                  S   nOUR                  S   nU R
                  R                  5       R                  c  US:w  a  [        S5      eU R
                  R                  5       R                  c  SnOUb  XR
                  R                  5       R                  :g  R                  UR                  [        R                  5      n[        R                  " UR                  S   UR                  [        R                  S9nX-  R                  S5      nO.Sn[        R!                  U R"                  R$                   S35        U[        R                  " XR                  S9U4   nS nUb  U R'                  XUU R
                  S	9n[)        UUU	R*                  U	R,                  U	R.                  S
9$ )NrJ   rK   r   rL   r   r   r   z=Cannot handle batch sizes > 1 if no padding token is defined.)devicedtypez will not detect padding tokens in `inputs_embeds`. Results may be unexpected if using padding tokens in conjunction with `inputs_embeds.`)rR   )logitsrM   pooled_logitsrG   )lossrT   r   hidden_states
attentions)getattrr>   last_hidden_staterD   shaperG   rB   pad_token_id
ValueErrortorR   torchint32arangeargmaxr   r    r   r   loss_functionr   r   rW   rX   )r$   rI   rJ   rK   r   rL   rM   r   r&   transformer_outputsrW   rT   
batch_sizelast_non_pad_tokennon_pad_masktoken_indicesrU   rV   s                     r*   forward(GenericForSequenceClassification.forwardn   s    8?tE[E[7\8
)%+'8
 8
 ,==M* "+J&,,Q/J;;&&(55=*PQ/\]];;&&(55=!#"%)D)D)F)S)SSWWX^XeXeglgrgrsL!LL)<V]]Z_ZeZefM"/">!F!Fr!J!#>>**+ ,Z Z
 u||J}}MOaab%%VR_hlhshs%tD/ /??-;;*55
 	
r,   )r<   rD   NNNNNNN)r   r.   r/   r0   r>   r;   r   r   r_   
LongTensorTensorr   FloatTensorboolr   r   r   ri   r2   r3   r4   s   @r*   r6   r6   `   s      .2.204(,26*.!%8
##d*8
 t+8
 &&-	8

 8
 ((4/8
   4'8
 $;8
 +,8
 
*8
  8
r,   r6   c                   4  ^  \ rS rSrSrU 4S jrS rS r\\	       SS\
R                  S-  S\
R                  S-  S	\
R                  S-  S
\S-  S\
R                  S-  S\
R                  S-  S\
R                  S-  S\\   S\4S jj5       5       rSrU =r$ )GenericForQuestionAnswering   r8   c                    > [         TU ]  U5        [        X R                  [        R
                  " U5      5        [        R                  " UR                  S5      U l	        U R                  5         g )N   )r"   r;   r=   r>   r
   r?   r@   rA   rC   
qa_outputsrE   rF   s     r*   r;   $GenericForQuestionAnswering.__init__   sO     ,,i.C.CF.KL))F$6$6: 	r,   c                 @    [        X R                  5      R                  $ NrY   r>   embed_tokens)r$   s    r*   get_input_embeddings0GenericForQuestionAnswering.get_input_embeddings   s    t334AAAr,   c                 8    U[        X R                  5      l        g rx   ry   )r$   values     r*   set_input_embeddings0GenericForQuestionAnswering.set_input_embeddings   s    =B,,-:r,   NrI   rJ   rK   r   rL   start_positionsend_positionsr&   rN   c                    [        X R                  5      " U4UUUUS.UD6n	U	R                  n
U R                  U
5      nUR	                  SSS9u  pUR                  S5      R                  5       nUR                  S5      R                  5       nS nUb  Ub  U R                  " XXg40 UD6n[        UUUU	R                  U	R                  S9$ )N)rJ   rK   r   rL   r   rQ   )dim)rV   start_logits
end_logitsrW   rX   )rY   r>   rZ   ru   splitsqueeze
contiguousrc   r   rW   rX   )r$   rI   rJ   rK   r   rL   r   r   r&   outputssequence_outputrT   r   r   rV   s                  r*   ri   #GenericForQuestionAnswering.forward   s     ,349O9O+P,
)%+',
 ,
 "331#)<<r<#: #++B/::<''+668
&=+D%%libhiD+%!!//))
 	
r,   )ru   rk   )r   r.   r/   r0   r>   r;   r{   r   r   r   r_   rl   rm   r   rn   r   r   r   ri   r2   r3   r4   s   @r*   rq   rq      s    BC  .2.204(,263715%
##d*%
 t+%
 &&-	%

 %
 ((4/%
 ))D0%
 ''$.%
 +,%
 
&%
  %
r,   rq   c                     ^  \ rS rSrSrU 4S jr\\       SS\R                  S-  S\R                  S-  S\R                  S-  S\S-  S	\R                  S-  S
\R                  S-  S\S-  S\\   S\4S jj5       5       rSrU =r$ )GenericForTokenClassification   r8   c                   > [         TU ]  U5        UR                  U l        [        X R                  [
        R                  " U5      5        [        USS 5      b  UR                  nO[        USS 5      b  UR                  nOSn[        R                  " U5      U l        [        R                  " UR                  5       R                  UR                  5      U l        U R#                  5         g )Nclassifier_dropouthidden_dropoutg?)r"   r;   r<   r=   r>   r
   r?   rY   r   r   r@   DropoutdropoutrA   rB   rC   rD   rE   )r$   rG   r   r   s      r*   r;   &GenericForTokenClassification.__init__   s      ++,,i.C.CF.KL6/6B!'!:!:V-t4@!'!6!6!$zz"45YYv557CCVEVEVW
 	r,   NrI   rJ   rK   r   rL   rM   r   r&   rN   c           	      $   [        X R                  5      " U4UUUUUS.UD6n	U	R                  n
U R                  U
5      n
U R	                  U
5      nS nUb  U R                  XU R                  5      n[        UUU	R                  U	R                  S9$ )NrP   )rV   rT   rW   rX   )
rY   r>   rZ   r   rD   rc   rG   r	   rW   rX   )r$   rI   rJ   rK   r   rL   rM   r   r&   r   r   rT   rV   s                r*   ri   %GenericForTokenClassification.forward   s     ,349O9O+P,
)%+',
 ,
 "33,,7O,%%fdkkBD$!//))	
 	
r,   )r   r<   rD   rk   )r   r.   r/   r0   r>   r;   r   r   r_   rl   rm   r   rn   ro   r   r   r	   ri   r2   r3   r4   s   @r*   r   r      s    "  .2.204(,26*.!%!
##d*!
 t+!
 &&-	!

 !
 ((4/!
   4'!
 $;!
 +,!
 
!
  !
r,   r   )	functoolsr   r_   torch.nnr@   cache_utilsr   modeling_outputsr   r   r   r	   models.autor
   processing_utilsr   utilsr   r   r   r   
get_loggerr   r   Moduler   r6   rq   r   r-   r,   r*   <module>r      s         # $ P P 
		H	%;1 ;1| G
 G
 G
T 9
 9
 9
x 7
 7
 7
r,   