
    3j?|                        S r SSKJr  SSKJr  SSKrSSKJr  SSKJr  SSK	J
r  SS	KJr  SS
KJr  SSKJr  SSKJrJrJr  SSKJrJr  SSKJr  SSKJr  SSKJrJrJ r J!r!J"r"  SSK#J$r$  SSK%J&r&  SSK'J(r(  \!RR                  " \*5      r+ " S S\RX                  5      r- S?S\RX                  S\R\                  S\R\                  S\R\                  S\R\                  S-  S\/S\/4S jjr0 " S S \RX                  5      r1 " S! S"\RX                  5      r2 " S# S$\RX                  5      r3 " S% S&\RX                  5      r4 " S' S(\RX                  5      r5 " S) S*\5      r6 " S+ S,\RX                  5      r7\ " S- S.\5      5       r8\ " S/ S0\85      5       r9 " S1 S2\RX                  5      r: " S3 S4\RX                  5      r;\ " S5 S6\85      5       r<\" S7S89\ " S9 S:\5      5       5       r=\" S;S89 " S< S=\85      5       r>/ S>Qr?g)@zPyTorch Splinter model.    )Callable)	dataclassN)nn)CrossEntropyLoss   )initialization)ACT2FN)create_bidirectional_mask)GradientCheckpointingLayer)BaseModelOutputModelOutputQuestionAnsweringModelOutput)ALL_ATTENTION_FUNCTIONSPreTrainedModel)Unpack)apply_chunking_to_forward)TransformersKwargsauto_docstringcan_return_tupleloggingtorch_compilable_check)merge_with_config_defaults)capture_outputs   )SplinterConfigc                      ^  \ rS rSrSrU 4S jr    SS\R                  S-  S\R                  S-  S\R                  S-  S\R                  S-  S	\	4
S
 jjr
SrU =r$ )SplinterEmbeddings(   zGConstruct the embeddings from word, position and token_type embeddings.c                 v  > [         TU ]  5         [        R                  " UR                  UR
                  UR                  S9U l        [        R                  " UR                  UR
                  5      U l	        [        R                  " UR                  UR
                  5      U l        [        R                  " UR
                  UR                  S9U l        [        R                  " UR                  5      U l        U R#                  S[$        R&                  " UR                  5      R)                  S5      SS9  g )N)padding_idxepsposition_idsr   F)
persistent)super__init__r   	Embedding
vocab_sizehidden_sizepad_token_idword_embeddingsmax_position_embeddingsposition_embeddingstype_vocab_sizetoken_type_embeddings	LayerNormlayer_norm_epsDropouthidden_dropout_probdropoutregister_buffertorcharangeexpandselfconfig	__class__s     h/home/wildlama/miniconda3/lib/python3.13/site-packages/transformers/models/splinter/modeling_splinter.pyr(   SplinterEmbeddings.__init__+   s    !||F,=,=v?Q?Q_e_r_rs#%<<0N0NPVPbPb#c %'\\&2H2H&J\J\%]"f&8&8f>S>STzz&"<"<= 	ELL)G)GHOOPWXej 	 	
    N	input_idstoken_type_idsr#   inputs_embedsreturnc                    Ub  UR                  5       nOUR                  5       S S nUS   nUc  U R                  S S 2S U24   nUc8  [        R                  " U[        R                  U R                  R
                  S9nUc  U R                  U5      nU R                  U5      nXG-   nU R                  U5      n	X-  nU R                  U5      nU R                  U5      nU$ )Nr%   r   dtypedevice)sizer#   r8   zeroslongrI   r-   r1   r/   r2   r6   )
r<   rB   rC   r#   rD   input_shape
seq_lengthr1   
embeddingsr/   s
             r?   forwardSplinterEmbeddings.forward9   s      #..*K',,.s3K ^
,,Q^<L!"[[EJJtO`O`OgOghN  00;M $ : :> J":
"66|D)
^^J/
\\*-
rA   )r2   r6   r/   r1   r-   )NNNN)__name__
__module____qualname____firstlineno____doc__r(   r8   
LongTensorFloatTensortuplerP   __static_attributes____classcell__r>   s   @r?   r   r   (   s    Q
  .2260426##d* ((4/ &&-	
 ((4/ 
 rA   r   modulequerykeyvalueattention_maskscalingr6   c                    [         R                  " XR                  SS5      5      U-  nUb  X-   n[        R                  R                  US[         R                  S9R                  UR                  5      n[        R                  R                  XU R                  S9n[         R                  " X5      n	U	R                  SS5      R                  5       n	X4$ )N   r   r%   )dimrH   )ptrainingr   )r8   matmul	transposer   
functionalsoftmaxfloat32torH   r6   rg   
contiguous)
r]   r^   r_   r`   ra   rb   r6   kwargsattn_weightsattn_outputs
             r?   eager_attention_forwardrr   [   s     <<}}Q':;gEL!#4==((2U]](SVVW\WbWbcL==((6??([L,,|3K''1-88:K$$rA   c                      ^  \ rS rSrU 4S jr S
S\R                  S\R                  S-  S\\	   S\
\R                  \R                  S-  4   4S jjrS	rU =r$ )SplinterSelfAttentionr   c                 6  > [         TU ]  5         UR                  UR                  -  S:w  a7  [	        US5      (       d&  [        SUR                   SUR                   S35      eXl        UR                  U l        [        UR                  UR                  -  5      U l        U R                  U R                  -  U l	        [        R                  " UR                  U R                  5      U l        [        R                  " UR                  U R                  5      U l        [        R                  " UR                  U R                  5      U l        [        R                  " UR                   5      U l        UR                   U l        U R                  S-  U l        g )Nr   embedding_sizezThe hidden size (z6) is not a multiple of the number of attention heads ()g      )r'   r(   r+   num_attention_headshasattr
ValueErrorr=   intattention_head_sizeall_head_sizer   Linearr^   r_   r`   r4   attention_probs_dropout_probr6   attention_dropoutrb   r;   s     r?   r(   SplinterSelfAttention.__init__s   sD    : ::a?PVXhHiHi#F$6$6#7 8 445Q8 
 #)#=#= #&v'9'9F<V<V'V#W !558P8PPYYv1143E3EF
99V//1C1CDYYv1143E3EF
zz&"E"EF!'!D!D//5rA   Nhidden_statesra   ro   rE   c                    UR                   S S n/ UQSPU R                  P7nU R                  U5      R                  U5      R	                  SS5      nU R                  U5      R                  U5      R	                  SS5      nU R                  U5      R                  U5      R	                  SS5      n[        R                  " U R                  R                  [        5      n	U	" U UUUU4U R                  (       d  SOU R                  U R                  S.UD6u  pU
R                  " / UQSP76 R!                  5       n
X4$ )Nr%   r   rd           )r6   rb   )shaper}   r^   viewri   r_   r`   r   get_interfacer=   _attn_implementationrr   rg   r   rb   reshapern   )r<   r   ra   ro   rM   hidden_shapequery_states
key_statesvalue_statesattention_interfacerq   rp   s               r?   rP   SplinterSelfAttention.forward   s8    $))#2.CCbC$*B*BCzz-055lCMMaQRSXXm,11,?II!QO
zz-055lCMMaQRS(?(M(MKK,,.E)
 %8	%
  $}}C$2H2HLL	%
 	%
! "));;;;FFH((rA   )
r~   r   r}   r=   r6   r_   ry   r^   rb   r`   N)rR   rS   rT   rU   r(   r8   TensorrX   r   r   rY   rP   rZ   r[   r\   s   @r?   rt   rt   r   si    60 48)||) ))D0) +,	)
 
u||U\\D00	1) )rA   rt   c                   z   ^  \ rS rSrU 4S jrS\R                  S\R                  S\R                  4S jrSrU =r	$ )SplinterSelfOutput   c                 (  > [         TU ]  5         [        R                  " UR                  UR                  5      U l        [        R                  " UR                  UR                  S9U l        [        R                  " UR                  5      U l
        g Nr!   )r'   r(   r   r   r+   denser2   r3   r4   r5   r6   r;   s     r?   r(   SplinterSelfOutput.__init__   s`    YYv1163E3EF
f&8&8f>S>STzz&"<"<=rA   r   input_tensorrE   c                 p    U R                  U5      nU R                  U5      nU R                  X-   5      nU$ r   r   r6   r2   r<   r   r   s      r?   rP   SplinterSelfOutput.forward   5    

=1]3}'CDrA   r2   r   r6   
rR   rS   rT   rU   r(   r8   r   rP   rZ   r[   r\   s   @r?   r   r      6    >U\\  RWR^R^  rA   r   c            	          ^  \ rS rSrU 4S jr S
S\R                  S\R                  S-  S\\	   S\R                  4S jjr
S	rU =r$ )SplinterAttention   c                 b   > [         TU ]  5         [        U5      U l        [	        U5      U l        g r   )r'   r(   rt   r<   r   outputr;   s     r?   r(   SplinterAttention.__init__   s&    )&1	(0rA   Nr   ra   ro   rE   c                 Z    UnU R                   " U4SU0UD6u  pU R                  X5      nU$ Nra   )r<   r   )r<   r   ra   ro   residual_s         r?   rP   SplinterAttention.forward   sE     !99
)
 

 M<rA   )r   r<   r   )rR   rS   rT   rU   r(   r8   r   rX   r   r   rP   rZ   r[   r\   s   @r?   r   r      sV    1 48|| ))D0 +,	
 
 rA   r   c                   b   ^  \ rS rSrU 4S jrS\R                  S\R                  4S jrSrU =r	$ )SplinterIntermediate   c                   > [         TU ]  5         [        R                  " UR                  UR
                  5      U l        [        UR                  [        5      (       a  [        UR                     U l        g UR                  U l        g r   )r'   r(   r   r   r+   intermediate_sizer   
isinstance
hidden_actstrr	   intermediate_act_fnr;   s     r?   r(   SplinterIntermediate.__init__   s`    YYv1163K3KL
f''--'-f.?.?'@D$'-'8'8D$rA   r   rE   c                 J    U R                  U5      nU R                  U5      nU$ r   r   r   )r<   r   s     r?   rP   SplinterIntermediate.forward   s&    

=100?rA   r   r   r\   s   @r?   r   r      s(    9U\\ ell  rA   r   c                   z   ^  \ rS rSrU 4S jrS\R                  S\R                  S\R                  4S jrSrU =r	$ )SplinterOutput   c                 (  > [         TU ]  5         [        R                  " UR                  UR
                  5      U l        [        R                  " UR
                  UR                  S9U l        [        R                  " UR                  5      U l        g r   )r'   r(   r   r   r   r+   r   r2   r3   r4   r5   r6   r;   s     r?   r(   SplinterOutput.__init__   s`    YYv779K9KL
f&8&8f>S>STzz&"<"<=rA   r   r   rE   c                 p    U R                  U5      nU R                  U5      nU R                  X-   5      nU$ r   r   r   s      r?   rP   SplinterOutput.forward   r   rA   r   r   r\   s   @r?   r   r      r   rA   r   c            	          ^  \ rS rSrU 4S jr SS\R                  S\R                  S-  S\\	   S\R                  4S jjr
S	 rS
rU =r$ )SplinterLayer   c                    > [         TU ]  5         UR                  U l        SU l        [	        U5      U l        [        U5      U l        [        U5      U l	        g )Nr   )
r'   r(   chunk_size_feed_forwardseq_len_dimr   	attentionr   intermediater   r   r;   s     r?   r(   SplinterLayer.__init__   sI    '-'E'E$*6208$V,rA   Nr   ra   ro   rE   c                     U R                   " U4SU0UD6n[        U R                  U R                  U R                  U5      nU$ r   )r   r   feed_forward_chunkr   r   )r<   r   ra   ro   s       r?   rP   SplinterLayer.forward   sW     
)
 
 2##T%A%A4CSCSUb
 rA   c                 J    U R                  U5      nU R                  X!5      nU$ r   )r   r   )r<   attention_outputintermediate_outputlayer_outputs       r?   r    SplinterLayer.feed_forward_chunk	  s)    "//0@A{{#6IrA   )r   r   r   r   r   r   )rR   rS   rT   rU   r(   r8   r   rX   r   r   rP   r   rZ   r[   r\   s   @r?   r   r      s[    - 48|| ))D0 +,	
 
$ rA   r   c            	       |   ^  \ rS rSrU 4S jr S
S\R                  S\R                  S-  S\\	   S\
4S jjrS	rU =r$ )SplinterEncoderi  c                    > [         TU ]  5         Xl        [        R                  " [        UR                  5       Vs/ s H  n[        U5      PM     sn5      U l        SU l	        g s  snf )NF)
r'   r(   r=   r   
ModuleListrangenum_hidden_layersr   layergradient_checkpointing)r<   r=   ir>   s      r?   r(   SplinterEncoder.__init__  sR    ]]5IaIaCb#cCbaM&$9Cb#cd
&+# $ds   A&Nr   ra   ro   rE   c                 N    U R                    H  nU" UU40 UD6nM     [        US9$ )Nlast_hidden_state)r   r   )r<   r   ra   ro   layer_modules        r?   rP   SplinterEncoder.forward  s>     !JJL( M ' +
 	
rA   )r=   r   r   r   )rR   rS   rT   rU   r(   r8   r   rX   r   r   r   rP   rZ   r[   r\   s   @r?   r   r     sR    , 48
||
 ))D0
 +,	

 

 
rA   r   c                   F   ^  \ rS rSr% \\S'   SrSr\\	S.r
U 4S jrSrU =r$ )SplinterPreTrainedModeli)  r=   splinterT)r   
attentionsc                   > [         TU ]  U5        [        U[        5      (       a\  [        R
                  " UR                  [        R                  " UR                  R                  S   5      R                  S5      5        g g )Nr%   r$   )r'   _init_weightsr   r   initcopy_r#   r8   r9   r   r:   )r<   r]   r>   s     r?   r   %SplinterPreTrainedModel._init_weights3  s^    f%f011JJv**ELL9L9L9R9RSU9V,W,^,^_f,gh 2rA    )rR   rS   rT   rU   r   __annotations__base_model_prefixsupports_gradient_checkpointingr   rt   _can_record_outputsr   rZ   r[   r\   s   @r?   r   r   )  s/    "&*#&+
i irA   r   c                     ^  \ rS rSrSrU 4S jrS rS r\\	\
     SS\R                  S-  S\R                  S-  S	\R                  S-  S
\R                  S-  S\R                  S-  S\\   S\\-  4S jj5       5       5       rSrU =r$ )SplinterModeli9  a"  
The model is an encoder (with only self-attention) following the architecture described in [Attention is all you
need](https://huggingface.co/papers/1706.03762) by Ashish Vaswani, Noam Shazeer, Niki Parmar, Jakob Uszkoreit, Llion Jones,
Aidan N. Gomez, Lukasz Kaiser and Illia Polosukhin.
c                    > [         TU ]  U5        Xl        [        U5      U l        [        U5      U l        U R                  5         g r   )r'   r(   r=   r   rO   r   encoder	post_initr;   s     r?   r(   SplinterModel.__init__A  s9     ,V4&v. 	rA   c                 .    U R                   R                  $ r   rO   r-   )r<   s    r?   get_input_embeddings"SplinterModel.get_input_embeddingsK  s    ...rA   c                 $    XR                   l        g r   r   )r<   r`   s     r?   set_input_embeddings"SplinterModel.set_input_embeddingsN  s    */'rA   NrB   ra   rC   r#   rD   ro   rE   c                    Ub  Ub  [        S5      eUb"  U R                  X5        UR                  5       nO"Ub  UR                  5       SS nO[        S5      eUu  pUb  UR                  OUR                  n
Uc  [        R
                  " X4U
S9nUc$  [        R                  " U[        R                  U
S9nU R                  UUUUS9n[        U R                  UUS9nU R                  " U4S	U0UD6nUS
   n[        US9$ )a  
token_type_ids (`torch.LongTensor` of shape `batch_size, sequence_length`, *optional*):
    Segment token indices to indicate first and second portions of the inputs. Indices are selected in `[0,
    1]`:

    - 0 corresponds to a *sentence A* token,
    - 1 corresponds to a *sentence B* token.

    [What are token type IDs?](../glossary#token-type-ids)
position_ids (`torch.LongTensor` of shape `batch_size, sequence_length`, *optional*):
    Indices of positions of each input sequence tokens in the position embeddings. Selected in the range `[0,
    config.max_position_embeddings - 1]`.

    [What are position IDs?](../glossary#position-ids)
NzDYou cannot specify both input_ids and inputs_embeds at the same timer%   z5You have to specify either input_ids or inputs_embeds)rI   rG   )rB   r#   rC   rD   )r=   rD   ra   ra   r   r   )r{   %warn_if_padding_and_no_attention_maskrJ   rI   r8   onesrK   rL   rO   r
   r=   r   r   )r<   rB   ra   rC   r#   rD   ro   rM   
batch_sizerN   rI   embedding_outputencoder_outputssequence_outputs                 r?   rP   SplinterModel.forwardQ  s1   6  ]%>cdd"66yQ#..*K&',,.s3KTUU!,
%.%:!!@T@T!"ZZ*)A6RN!"[[EJJvVN??%)'	 + 
 3;;*)
 ,,
)
 

 *!,-
 	
rA   )r=   rO   r   )NNNNN)rR   rS   rT   rU   rV   r(   r   r   r   r   r   r8   r   r   r   rY   r   rP   rZ   r[   r\   s   @r?   r   r   9  s    /0   *..2.2,0-1@
<<$&@
 t+@
 t+	@

 llT)@
 ||d*@
 +,@
 
	 @
    @
rA   r   c                   f   ^  \ rS rSrSU 4S jjrS\R                  S\R                  4S jrSrU =r	$ )SplinterFullyConnectedLayeri  c                    > [         TU ]  5         Xl        X l        [        R
                  " U R                  U R                  5      U l        [        U   U l        [        R                  " U R                  5      U l	        g r   )
r'   r(   	input_dim
output_dimr   r   r   r	   act_fnr2   )r<   r  r	  r   r>   s       r?   r(   $SplinterFullyConnectedLayer.__init__  sR    "$YYt~~t?
Z(doo6rA   inputsrE   c                 l    U R                  U5      nU R                  U5      nU R                  U5      nU$ r   )r   r
  r2   )r<   r  r   s      r?   rP   #SplinterFullyConnectedLayer.forward  s2    

6*M2}5rA   )r2   r
  r   r  r	  )gelur   r\   s   @r?   r  r    s(    7ell u||  rA   r  c                   2   ^  \ rS rSrSrU 4S jrS rSrU =r$ )QuestionAwareSpanSelectionHeadi  z^
Implementation of Question-Aware Span Selection (QASS) head, described in Splinter's paper:

c                   > [         TU ]  5         [        UR                  UR                  5      U l        [        UR                  UR                  5      U l        [        UR                  UR                  5      U l        [        UR                  UR                  5      U l        [        R                  " UR                  UR                  SS9U l
        [        R                  " UR                  UR                  SS9U l        g )NF)bias)r'   r(   r  r+   query_start_transformquery_end_transformstart_transformend_transformr   r   start_classifierend_classifierr;   s     r?   r(   'QuestionAwareSpanSelectionHead.__init__  s    %@ASASU[UgUg%h"#>v?Q?QSYSeSe#f :6;M;MvOaOab89K9KVM_M_` "		&*<*<f>P>PW\ ] ii(:(:F<N<NUZ[rA   c                    UR                  5       u    p4UR                  S5      R                  SSU5      n[        R                  " USUS9nU R                  U5      nU R                  U5      nU R                  U5      n	U R                  U5      n
U R                  U5      nU	R                  SSS5      n	[        R                  " X5      nU R                  U5      nU
R                  SSS5      n
[        R                  " X5      nX4$ )Nr%   r   )re   indexr   rd   )rJ   	unsqueezerepeatr8   gatherr  r  r  r  r  permuterh   r  )r<   r  	positionsr   re   r  gathered_repsquery_start_repsquery_end_reps
start_repsend_repsr   start_logits
end_logitss                 r?   rP   &QuestionAwareSpanSelectionHead.forward  s    KKM	1##B'..q!S9V%@55mD11-@))&1
%%f---.>?''1a0
||M>++N;##Aq!,\\-:
''rA   )r  r  r  r  r  r  )	rR   rS   rT   rU   rV   r(   rP   rZ   r[   r\   s   @r?   r  r    s    
	\( (rA   r  c                   ^  ^  \ rS rSrU 4S jr\\        SS\R                  S-  S\R                  S-  S\R                  S-  S\R                  S-  S\R                  S-  S	\R                  S-  S
\R                  S-  S\R                  S-  S\
\   S\\-  4S jj5       5       rSrU =r$ )SplinterForQuestionAnsweringi  c                    > [         TU ]  U5        [        U5      U l        [	        U5      U l        UR                  U l        U R                  5         g r   r'   r(   r   r   r  splinter_qassquestion_token_idr   r;   s     r?   r(   %SplinterForQuestionAnswering.__init__  C     %f-;FC!'!9!9 	rA   NrB   ra   rC   r#   rD   start_positionsend_positionsquestion_positionsro   rE   c	                    Sn
Uc  UbB  [         R                  " [         R                  " XR                  5      R	                  5       SS9nOH[         R
                  " UR                  S5      [         R                  UR                  UR                  S9nUR                  S5      nSn
U R                  " U4UUUUS.U	D6nUS   nU R                  X5      u  pU
(       a!  UR                  S	5      UR                  S	5      pUbf  US	U-
  [         R                  " UR                  5      R                   -  -   nUS	U-
  [         R                  " UR                  5      R                   -  -   nSnUb  Ub  [#        UR                  5       5      S	:  a  UR                  S5      n[#        UR                  5       5      S	:  a  UR                  S5      nUR                  S	5      nUR%                  SU5        UR%                  SU5        ['        US
9nU" X5      nU" X5      nUU-   S-  n[)        UUUUR*                  UR,                  S9$ )aI  
token_type_ids (`torch.LongTensor` of shape `batch_size, sequence_length`, *optional*):
    Segment token indices to indicate first and second portions of the inputs. Indices are selected in `[0,
    1]`:

    - 0 corresponds to a *sentence A* token,
    - 1 corresponds to a *sentence B* token.

    [What are token type IDs?](../glossary#token-type-ids)
position_ids (`torch.LongTensor` of shape `batch_size, sequence_length`, *optional*):
    Indices of positions of each input sequence tokens in the position embeddings. Selected in the range `[0,
    config.max_position_embeddings - 1]`.

    [What are position IDs?](../glossary#position-ids)
question_positions (`torch.LongTensor` of shape `(batch_size, num_questions)`, *optional*):
    The positions of all question tokens. If given, start_logits and end_logits will be of shape `(batch_size,
    num_questions, sequence_length)`. If None, the first question token in each sequence in the batch will be
    the only one for which start_logits and end_logits are calculated and they will be of shape `(batch_size,
    sequence_length)`.
FNr%   )re   r   )rH   layoutrI   Tra   rC   r#   rD   r   ignore_indexrd   lossr'  r(  r   r   )r8   argmaxeqr/  r|   rK   rJ   rL   r6  rI   r  r   r.  squeezefinforH   minlenclamp_r   r   r   r   )r<   rB   ra   rC   r#   rD   r2  r3  r4  ro   question_positions_were_none"question_position_for_each_exampleoutputsr  r'  r(  
total_lossignored_indexloss_fct
start_lossend_losss                        r?   rP   $SplinterForQuestionAnswering.forward  s7   D (-$%$5:\\XXi)?)?@EEGR62 6;[[!&&q)MDXDXanauau62 "D!M!Mb!Q+/(--
))%'
 
 "!*#'#5#5o#Z ''3';';A'>
@R@RST@U*%'1~+=\M_M_A`AdAd*ddL#q>'9U[[IYIY=Z=^=^&^^J
&=+D?'')*Q."1"9"9""==%%'(1, - 5 5b 9(--a0M""1m4  M2']CH!,@J
:H$x/14J+%!!//))
 	
rA   r/  r   r.  NNNNNNNN)rR   rS   rT   rU   r(   r   r   r8   r   rW   r   r   rY   r   rP   rZ   r[   r\   s   @r?   r+  r+    s     *..2.2,0-137156:W
<<$&W
 t+W
 t+	W

 llT)W
 ||d*W
 ))D0W
 ''$.W
 ",,t3W
 +,W
 
-	-W
  W
rA   r+  zB
    Class for outputs of Splinter as a span selection model.
    )custom_introc                       \ rS rSr% SrSr\R                  S-  \S'   Sr	\R                  S-  \S'   Sr
\R                  S-  \S'   Sr\\R                     S-  \S'   Sr\\R                     S-  \S'   S	rg)
SplinterForPreTrainingOutputi7  a  
loss (`torch.FloatTensor` of shape `(1,)`, *optional*, returned when start and end positions are provided):
    Total span extraction loss is the sum of a Cross-Entropy for the start and end positions.
start_logits (`torch.FloatTensor` of shape `(batch_size, num_questions, sequence_length)`):
    Span-start scores (before SoftMax).
end_logits (`torch.FloatTensor` of shape `(batch_size, num_questions, sequence_length)`):
    Span-end scores (before SoftMax).
Nr;  r'  r(  r   r   r   )rR   rS   rT   rU   rV   r;  r8   rX   r   r'  r(  r   rY   r   rZ   r   rA   r?   rP  rP  7  s|     &*D%

d
")-1L%##d*1+/J!!D(/59M5**+d2926Je''(4/6rA   rP  z
    Splinter Model for the recurring span selection task as done during the pretraining. The difference to the QA task
    is that we do not have a question, but multiple question tokens that replace the occurrences of recurring spans
    instead.
    c                     ^  \ rS rSrU 4S jr\\        SS\R                  S-  S\R                  S-  S\R                  S-  S\R                  S-  S\R                  S-  S	\R                  S-  S
\R                  S-  S\R                  S-  S\
\   S\\-  4S jj5       5       rS\R                  S\R                  4S jrSrU =r$ )SplinterForPreTrainingiN  c                    > [         TU ]  U5        [        U5      U l        [	        U5      U l        UR                  U l        U R                  5         g r   r-  r;   s     r?   r(   SplinterForPreTraining.__init__V  r1  rA   NrB   ra   rC   r#   rD   r2  r3  r4  ro   rE   c	                    Uc  Ub  Ub  [        S5      eUc  Uc  [        S5      eUc  U R                  U5      nU R                  " U4UUUUS.U	D6n
U
S   nUR                  5       u  pnU R	                  X5      u  nnUR                  S5      nUb  UR                  S5      R                  UUU5      nUSU-
  [        R                  " UR                  5      R                  -  -   nUSU-
  [        R                  " UR                  5      R                  -  -   nSnUb  Ub  UR                  S[        SUS-
  5      5        UR                  S[        SUS-
  5      5        [        U R                  R                  S9nU" UR!                  UU-  U5      UR!                  UU-  5      5      nU" UR!                  UU-  U5      UR!                  UU-  5      5      nUU-   S-  n[#        UUUU
R$                  U
R&                  S	9$ )
a
  
input_ids (`torch.LongTensor` of shape `(batch_size, num_questions, sequence_length)`):
    Indices of input sequence tokens in the vocabulary.

    Indices can be obtained using [`AutoTokenizer`]. See [`PreTrainedTokenizer.encode`] and
    [`PreTrainedTokenizer.__call__`] for details.

    [What are input IDs?](../glossary#input-ids)
token_type_ids (`torch.LongTensor` of shape `batch_size, num_questions, sequence_length`, *optional*):
    Segment token indices to indicate first and second portions of the inputs. Indices are selected in `[0,
    1]`:

    - 0 corresponds to a *sentence A* token,
    - 1 corresponds to a *sentence B* token.

    [What are token type IDs?](../glossary#token-type-ids)
position_ids (`torch.LongTensor` of shape `batch_size, num_questions, sequence_length`, *optional*):
    Indices of positions of each input sequence tokens in the position embeddings. Selected in the range `[0,
    config.max_position_embeddings - 1]`.

    [What are position IDs?](../glossary#position-ids)
inputs_embeds (`torch.FloatTensor` of shape `(batch_size, num_questions, sequence_length, hidden_size)`, *optional*):
    Optionally, instead of passing `input_ids` you can choose to directly pass an embedded representation. This
    is useful if you want more control over how to convert *input_ids* indices into associated vectors than the
    model's internal embedding lookup matrix.
start_positions (`torch.LongTensor` of shape `(batch_size, num_questions)`, *optional*):
    Labels for position (index) of the start of the labelled span for computing the token classification loss.
    Positions are clamped to the length of the sequence (`sequence_length`). Position outside of the sequence
    are not taken into account for computing the loss.
end_positions (`torch.LongTensor` of shape `(batch_size, num_questions)`, *optional*):
    Labels for position (index) of the end of the labelled span for computing the token classification loss.
    Positions are clamped to the length of the sequence (`sequence_length`). Position outside of the sequence
    are not taken into account for computing the loss.
question_positions (`torch.LongTensor` of shape `(batch_size, num_questions)`, *optional*):
    The positions of all question tokens. If given, start_logits and end_logits will be of shape `(batch_size,
    num_questions, sequence_length)`. If None, the first question token in each sequence in the batch will be
    the only one for which start_logits and end_logits are calculated and they will be of shape `(batch_size,
    sequence_length)`.
NzCquestion_positions must be specified in order to calculate the lossz?question_positions must be specified when inputs_embeds is usedr7  r   r   r8  rd   r:  )	TypeError_prepare_question_positionsr   rJ   r.  r  r:   r8   r?  rH   r@  rB  maxr   r=   r,   r   rP  r   r   )r<   rB   ra   rC   r#   rD   r2  r3  r4  ro   rE  r  r   sequence_lengthre   r'  r(  num_questions attention_mask_for_each_questionrF  rH  rI  rJ  s                          r?   rP   SplinterForPreTraining.forward`  s=   j %/*E-Jcabb'I,=]^^'!%!A!A)!L--
))%'
 
 "!*+:+?+?+A(
S#'#5#5o#Z j*//2%/=/G/G/J/Q/QM?0, (1/O+OSXS^S^_k_q_qSrSvSv*vvL#q+K'Ku{{[e[k[kOlOpOp&ppJ
&=+D""1c!_q-@&AB  C?Q+>$?@ (T[[5M5MNH!!!*}"<oN$$Z-%?@J  
] :OL"":#=>H %x/14J+%!!//))
 	
rA   c                 6   [         R                  " XR                  R                  :H  5      u  p#[         R                  " U5      n[         R
                  " UR                  S5      UR                  5       4U R                  R                  [         R                  UR                  S9n[        UR                  S5      UR                  S5      :H  S5        [         R                  " U Vs/ s H  n[         R                  " U5      PM     sn5      nX5X'4'   U$ s  snf )Nr   rG   z?All samples in the batch must have at least one question token.)r8   wherer=   r/  bincountfullrJ   rX  r,   rL   rI   r   catr9   )r<   rB   rowsflat_positionsrZ  r!  ncolss           r?   rW  2SplinterForPreTraining._prepare_question_positions  s    ${{98U8U+UVt,JJ^^A 1 1 34KK$$**##	
	 	q!Y^^A%66M	
 yy=A=a%,,q/=AB .$* Bs   ( DrL  rM  )rR   rS   rT   rU   r(   r   r   r8   r   rW   r   r   rY   rP  rP   rW  rZ   r[   r\   s   @r?   rR  rR  N  s'     *..2.2,0-137156:n
<<$&n
 t+n
 t+	n

 llT)n
 ||d*n
 ))D0n
 ''$.n
 ",,t3n
 +,n
 
-	-n
  n
`U\\ ell  rA   rR  )r+  rR  r   r   r   )r   )@rV   collections.abcr   dataclassesr   r8   r   torch.nnr    r   r   activationsr	   masking_utilsr
   modeling_layersr   modeling_outputsr   r   r   modeling_utilsr   r   processing_utilsr   pytorch_utilsr   utilsr   r   r   r   r   utils.genericr   utils.output_capturingr   configuration_splinterr   
get_loggerrR   loggerModuler   r   floatrr   rt   r   r   r   r   r   r   r   r   r  r  r+  rP  rR  __all__r   rA   r?   <module>r{     s    $ !   % & ! 6 9 Z Z F & 6 j j 7 5 2 
		H	%/ /t %II%<<% 
% <<	%
 LL4'% % %.3)BII 3)n 		 .299  RYY . D
bii 
2 io i i Z
+ Z
 Z
z")) $#(RYY #(L d
#: d
 d
N 
 7; 7 7" L4 LL^rA   