
    3j~                        S SK Jr  S SKJr  S SKJr  S SKrS SKJr  SSKJ	r	  SSK
JrJr  SS	KJrJr  SS
KJr  SSKJr  SSKJrJr  SSKJrJr  SSKJrJr  SSKJr  SSKJ r J!r!J"r"J#r#  SSK$J%r%J&r&  SSK'J(r(  SSK)J*r*  SSK+J,r,J-r-   " S S\R\                  5      r/ " S S\R\                  5      r0S r1\" S5      SBS j5       r2S\Rf                  S\4S \Rf                  4S! jr5 SCS"\R\                  S#\Rf                  S$\Rf                  S%\Rf                  S&\Rf                  S-  S'\6S(\6S)\\!   4S* jjr7\" \25       " S+ S,\R\                  5      5       r8 " S- S.\R\                  5      r9 " S/ S0\R\                  5      r: " S1 S2\5      r;\" " S3 S4\5      5       r<\"" S5S69\ " S7 S8\5      5       5       r=\"" S9S69 " S: S;\<5      5       r>\ " S< S=\ 5      5       r?\"" S>S69 " S? S@\<\5      5       r@/ SAQrAg)D    )Callable)	dataclass)OptionalN)nn   )ACT2FN)CompileConfigGenerationMixin)use_kernel_func_from_hubuse_kernelized_func)create_bidirectional_mask)GradientCheckpointingLayer)BaseModelOutputWithPoolingCausalLMOutput)ROPE_INIT_FUNCTIONSdynamic_rope_update)ALL_ATTENTION_FUNCTIONSPreTrainedModel)Unpack)ModelOutputTransformersKwargsauto_docstringcan_return_tuple)maybe_autocastmerge_with_config_defaults)capture_outputs   )	AutoModel   )LasrCTCConfigLasrEncoderConfigc                   j   ^  \ rS rSrS\4U 4S jjrS\R                  S\R                  4S jrSr	U =r
$ )LasrEncoderSubsampling,   configc                 &  > [         TU ]  5         [        R                  " UR                  UR
                  5      U l        [        R                  " UR
                  UR
                  UR                  UR                  S9U l
        [        R                  " UR
                  UR                  UR                  UR                  S9U l        [        R                  " UR                  UR
                  5      U l        [        R                  " 5       U l        g )N)kernel_sizestride)super__init__r   Linearnum_mel_binshidden_sizedense_0Conv1dsubsampling_conv_kernel_sizesubsampling_conv_strideconv_0subsampling_conv_channelsconv_1dense_1ReLUact_fnselfr%   	__class__s     `/home/wildlama/miniconda3/lib/python3.13/site-packages/transformers/models/lasr/modeling_lasr.pyr*   LasrEncoderSubsampling.__init__-   s    yy!4!4f6H6HIii;;11	
 ii,,;;11	
 yy!A!A6CUCUVggi    input_featuresreturnc                 ,   U R                  U R                  U5      5      nUR                  SS5      nU R                  U R                  U5      5      nU R                  U R	                  U5      5      nUR                  SS5      nU R                  U5      $ )Nr   r   )r7   r.   	transposer2   r4   r5   )r9   r>   hidden_statess      r;   forwardLasrEncoderSubsampling.forward?   sz    DLL$@A%//15DKK$>?DKK$>?%//15||M**r=   )r7   r2   r4   r.   r5   )__name__
__module____qualname____firstlineno__r!   r*   torchTensorrC   __static_attributes____classcell__r:   s   @r;   r#   r#   ,   s0     0  $+ell +u|| + +r=   r#   c                      ^  \ rS rSr% \R
                  \S'   SS\4U 4S jjjr\	   SS\S-  S\
S   S\S-  S	\S
\4   4S jj5       r\R                  " 5       \S 5       5       rSrU =r$ )LasrEncoderRotaryEmbeddingH   inv_freqNr%   c                   > [         TU ]  5         UR                  U l        UR                  U l        Xl        U R
                  R                  S   U l        U R                  nU R                  S:w  a  [        U R                     nU" U R
                  U5      u  o@l
        U R                  SUSS9  U R                  SUR                  5       SS9  g )N	rope_typedefaultrQ   F)
persistentoriginal_inv_freq)r)   r*   max_position_embeddingsmax_seq_len_cachedoriginal_max_seq_lenr%   rope_parametersrS   compute_default_rope_parametersr   attention_scalingregister_bufferclone)r9   r%   devicerope_init_fnrQ   r:   s        r;   r*   #LasrEncoderRotaryEmbedding.__init__K   s    "("@"@$*$B$B!44[A!%!E!E>>Y&.t~~>L+7V+L((ZeD0(..2BuUr=   r_   ztorch.deviceseq_lenr?   ztorch.Tensorc           	         U R                   S   n[        U SS5      =(       d    U R                  U R                  -  nSnSU[        R
                  " SUS[        R                  S9R                  U[        R                  S9U-  -  -  nXe4$ )	aH  
Computes the inverse frequencies according to the original RoPE implementation
Args:
    config ([`~transformers.PreTrainedConfig`]):
        The model configuration.
    device (`torch.device`):
        The device to use for initialization of the inverse frequencies.
    seq_len (`int`, *optional*):
        The current sequence length. Unused for this type of RoPE.
Returns:
    Tuple of (`torch.Tensor`, `float`), containing the inverse frequencies for the RoPE embeddings and the
    post-processing scaling factor applied to the computed cos/sin (unused in this type of RoPE).

rope_thetahead_dimNg      ?r   r   dtype)r_   rg   )	rZ   getattrr-   num_attention_headsrI   arangeint64tofloat)r%   r_   rb   basedimattention_factorrQ   s          r;   r[   :LasrEncoderRotaryEmbedding.compute_default_rope_parameters[   s    & %%l3fj$/c63E3EIcIc3c U\\!S!5;;?BB&X]XcXcBdgjjk
 ))r=   c                 L   U R                   S S S 2S 4   R                  5       R                  UR                  S   SS5      R	                  UR
                  5      nUS S 2S S S 24   R                  5       n[        UR
                  R                  [        5      (       a0  UR
                  R                  S:w  a  UR
                  R                  OSn[        USS9   UR                  5       UR                  5       -  R                  SS5      n[        R                  " Xf4SS	9nUR                  5       U R                  -  nUR                  5       U R                  -  n	S S S 5        WR	                  UR                   S
9W	R	                  UR                   S
94$ ! , (       d  f       N@= f)Nr   r   mpscpuF)device_typeenabledr   ro   rf   )rQ   rm   expandshaperl   r_   
isinstancetypestrr   rA   rI   catcosr\   sinrg   )
r9   xposition_idsinv_freq_expandedposition_ids_expandedrv   freqsembr   r   s
             r;   rC   "LasrEncoderRotaryEmbedding.forwardy   sN    !MM$4-8>>@GGHZHZ[\H]_acdehhijiqiqr ,QaZ 8 > > @'1!((--'E'E!((--[`J`ahhmmfkUC&,,.1F1L1L1NNYYZ[]^_E))UN3C'')d444C'')d444C	 D vvAGGv$cff177f&;;; DCs   BF
F#)r\   r%   rX   rY   rS   N)NNN)rE   rF   rG   rH   rI   rJ   __annotations__r!   r*   staticmethodr   inttuplerm   r[   no_gradr   rC   rK   rL   rM   s   @r;   rO   rO   H   s    llV0 V V  +/+/"*!D(*(* t* 
~u$	%	* *: ]]_<  <r=   rO   c                     U SSU R                   S   S-  24   nU SU R                   S   S-  S24   n[        R                  " U* U4SS9$ )z*Rotates half the hidden dims of the input..Nrs   r   rx   )rz   rI   r~   )r   x1x2s      r;   rotate_halfr      sZ    	
3"!''"+"""	#B	
3q ""	#B99rc2YB''r=   rotary_pos_embc                     UR                  U5      nUR                  U5      nX-  [        U 5      U-  -   nX-  [        U5      U-  -   nXV4$ )aI  Applies Rotary Position Embedding to the query and key tensors.

Args:
    q (`torch.Tensor`): The query tensor.
    k (`torch.Tensor`): The key tensor.
    cos (`torch.Tensor`): The cosine part of the rotary embedding.
    sin (`torch.Tensor`): The sine part of the rotary embedding.
    unsqueeze_dim (`int`, *optional*, defaults to 1):
        The 'unsqueeze_dim' argument specifies the dimension along which to unsqueeze cos[position_ids] and
        sin[position_ids] so that they can be properly broadcasted to the dimensions of q and k. For example, note
        that cos[position_ids] and sin[position_ids] have the shape [batch_size, seq_len, head_dim]. Then, if q and
        k have the shape [batch_size, heads, seq_len, head_dim], then setting unsqueeze_dim=1 makes
        cos[position_ids] and sin[position_ids] broadcastable to the shapes of q and k. Similarly, if q and k have
        the shape [batch_size, seq_len, heads, head_dim], then set unsqueeze_dim=2.
Returns:
    `tuple(torch.Tensor)` comprising of the query and key tensors rotated using the Rotary Position Embedding.
)	unsqueezer   )qkr   r   unsqueeze_dimq_embedk_embeds          r;   apply_rotary_pos_embr      sS    & --
&C
--
&Cw;q>C/0Gw;q>C/0Gr=   rB   n_repr?   c                     U R                   u  p#pEUS:X  a  U $ U SS2SS2SSS2SS24   R                  X#XU5      n U R                  X#U-  XE5      $ )z
This is the equivalent of torch.repeat_interleave(x, dim=1, repeats=n_rep). The hidden states go from (batch,
num_key_value_heads, seqlen, head_dim) to (batch, num_attention_heads, seqlen, head_dim)
r   N)rz   ry   reshape)rB   r   batchnum_key_value_headsslenre   s         r;   	repeat_kvr      s_    
 2?1D1D.Ez!!Qa"23::5W\dlmM  e(CTTTr=   modulequerykeyvalueattention_maskscalingdropoutkwargsc                    [        X R                  5      n[        X0R                  5      n	[        R                  " XR	                  SS5      5      U-  n
Ub  X-   n
[
        R                  R                  U
S[        R                  S9R                  UR                  5      n
[
        R                  R                  XU R                  S9n
[        R                  " X5      nUR	                  SS5      R                  5       nX4$ )Nr   r   rs   ro   rg   ptrainingr   )r   num_key_value_groupsrI   matmulrA   r   
functionalsoftmaxfloat32rl   rg   r   r   
contiguous)r   r   r   r   r   r   r   r   
key_statesvalue_statesattn_weightsattn_outputs               r;   eager_attention_forwardr      s     3 ; ;<JU$?$?@L<<';';Aq'ABWLL!#4==((2U]](SVVW\WbWbcL==((6??([L,,|:K''1-88:K$$r=   c                      ^  \ rS rSrSrS\S\4U 4S jjr  SS\R                  S\
\R                  \R                  4   S-  S	\R                  S-  S
\\   S\
\R                  \R                  4   4
S jjrSrU =r$ )LasrEncoderAttention   z=Multi-headed attention from 'Attention Is All You Need' paperr%   	layer_idxc                 P  > [         TU ]  5         Xl        X l        [	        USUR
                  UR                  -  5      U l        UR                  UR                  -  U l	        U R                  S-  U l
        UR                  U l        SU l        [        R                  " UR
                  UR                  U R                  -  UR                  S9U l        [        R                  " UR
                  UR                  U R                  -  UR                  S9U l        [        R                  " UR
                  UR                  U R                  -  UR                  S9U l        [        R                  " UR                  U R                  -  UR
                  UR                  S9U l        g )Nre   g      Fbias)r)   r*   r%   r   rh   r-   ri   re   r   r   r   attention_dropout	is_causalr   r+   attention_biasq_projk_projv_projo_projr9   r%   r   r:   s      r;   r*   LasrEncoderAttention.__init__   sI   "
F4F4F&JdJd4de$*$>$>&B\B\$\!}}d*!'!9!9ii : :T]] JQWQfQf
 ii : :T]] JQWQfQf
 ii : :T]] JQWQfQf
 ii&&68J8JQWQfQf
r=   NrB   position_embeddingsr   r   r?   c                    UR                   S S n/ UQSPU R                  P7nU R                  U5      R                  U5      R	                  SS5      nU R                  U5      R                  U5      R	                  SS5      nU R                  U5      R                  U5      R	                  SS5      n	Uu  p[        XxX5      u  px[        R                  " U R                  R                  [        5      nU" U UUU	U4U R                  (       d  SOU R                  U R                  S.UD6u  pUR                   " / UQSP76 R#                  5       nU R%                  U5      nX4$ )Nrs   r   r           )r   r   )rz   re   r   viewrA   r   r   r   r   get_interfacer%   _attn_implementationr   r   r   r   r   r   r   )r9   rB   r   r   r   input_shapehidden_shapequery_statesr   r   r   r   attention_interfacer   r   s                  r;   rC   LasrEncoderAttention.forward   s\    $))#2.88b8$--8{{=166|DNNqRST[[/44\BLLQPQR
{{=166|DNNqRST&#7RU#[ (?(M(MKK,,.E)
 %8	%
  $}}C$2H2HLL	%
 	%
! "));;;;FFHkk+.((r=   )r   r%   re   r   r   r   r   r   r   r   r   NN)rE   rF   rG   rH   __doc__r!   r   r*   rI   rJ   r   r   r   rC   rK   rL   rM   s   @r;   r   r      s    G
0 
S 
4 IM.2	")||") #5<<#=>E") t+	")
 +,") 
u||U\\)	*") ")r=   r   c                   >   ^  \ rS rSrSS\4U 4S jjjrSS jrSrU =r$ )LasrEncoderConvolutionModulei  r%   c           
      F  > [         TU ]  5         UR                  nUc&  UR                  n[        [        USS5         U l        O#US   n[        UR                  SS5         U l        SU l        [        R                  " USU-  SSS	UR                  S
9U l        [        R                  " UUUSU R                  UUR                  S9U l        [        R                  " UR                  UR                  S9U l        [        R                  " X3SSS	UR                  S
9U l        g)z
Args:
    config (LasrEncoderConfig): Configuration for the model.
    module_config (dict): Configuration for the module (e.g., encoder or decoder).
N
hidden_actsilur'   
activationsamer   r   r   )r'   r(   paddingr   )r(   r   groupsr   )momentum)r)   r*   r-   conv_kernel_sizer   rh   r   getr   r   r/   convolution_biaspointwise_conv1depthwise_convBatchNorm1dbatch_norm_momentumnormpointwise_conv2)r9   r%   module_configchannelsr'   r:   s        r;   r*   %LasrEncoderConvolutionModule.__init__  s    	%%  11K$WV\6%JKDO'6K$]%6%6|V%LMDO!yya(l!QVMdMd 
 !iiLL((
 NN6#5#5@Z@Z[	!yyAaI`I` 
r=   c                    UR                  SS5      nU R                  U5      n[        R                  R	                  USS9nUb`  UR
                  [        R                  :X  a  [        R                  " U) SS9nO[        R                  " US:H  ) SS9nUR                  US5      nU R                  U5      nU R                  U5      nU R                  U5      nU R                  U5      nUR                  SS5      $ )a!  
Compute convolution module.

Args:
    hidden_states (`torch.Tensor` of shape `(batch, time, channels)`): Input tensor.
    attention_mask (`torch.Tensor` of shape `(batch, 1, time, time)`): Attention mask.

Returns:
    `torch.Tensor`: Output tensor of shape `(batch, time, channels)`.

r   r   rx   r   )rA   r   r   r   glurg   rI   boolallmasked_fillr   r   r   r   )r9   rB   r   all_masked_rowss       r;   rC   $LasrEncoderConvolutionModule.forward2  s     &//15 ,,];))-Q)? %##uzz1"'))^O"C"'))n.C,D!"L)55osKM ++M:		-06,,];&&q!,,r=   )r   r   r   r   r   r   r   	rE   rF   rG   rH   r!   r*   rC   rK   rL   rM   s   @r;   r   r     s      
0  
  
D"- "-r=   r   c                   6   ^  \ rS rSrS\4U 4S jjrS rSrU =r$ )LasrEncoderFeedForwardiW  r%   c                 X  > [         TU ]  5         [        R                  " UR                  UR
                  UR                  S9U l        [        UR                     U l
        [        R                  " UR
                  UR                  UR                  S9U l        UR                  U l        g )Nr   )r)   r*   r   r+   r-   intermediate_sizer   linear1r   r   r   linear2activation_dropoutr8   s     r;   r*   LasrEncoderFeedForward.__init__X  s|    yy!3!3V5M5MTZTiTij !2!23yy!9!96;M;MTZTiTij"(";";r=   c                     U R                  U R                  U5      5      n[        R                  R	                  XR
                  U R                  S9nU R                  U5      nU$ )Nr   )r   r   r   r   r   r   r   r   )r9   rB   s     r;   rC   LasrEncoderFeedForward.forward_  sS    ](CD--m?V?Vaeanan-o]3r=   )r   r   r   r   r   rM   s   @r;   r   r   W  s    <0 < r=   r   c                      ^  \ rS rSrS\S\4U 4S jjr  SS\R                  S\R                  S-  S\R                  S-  S	\	\
   S
\R                  4
S jjrSrU =r$ )LasrEncoderBlockif  r%   r   c                   > [         TU ]  5         SU l        [        U5      U l        [        X5      U l        [        U5      U l        [        U5      U l	        [        R                  " UR                  UR                  SS9U l        [        R                  " UR                  UR                  SS9U l        [        R                  " UR                  UR                  SS9U l        [        R                  " UR                  UR                  SS9U l        [        R                  " UR                  UR                  SS9U l        UR&                  U l        UR(                  U l        g )NFr   )r)   r*   gradient_checkpointingr   feed_forward1r   	self_attnr   convfeed_forward2r   	LayerNormr-   layer_norm_epsnorm_feed_forward1norm_self_att	norm_convnorm_feed_forward2norm_outfeed_forward_residual_weightsconv_residual_weightsr   s      r;   r*   LasrEncoderBlock.__init__g  s   &+#3F;-f@08	3F;"$,,v/A/A6CXCX_d"e\\&*<*<f>S>SZ_`f&8&8&:O:OV[\"$,,v/A/A6CXCX_d"eV%7%79N9NUZ[-3-Q-Q*%+%A%A"r=   NrB   r   r   r   r?   c                 &   UnU R                  U R                  U5      5      nU R                  S   U-  U R                  S   U-  -   nU R                  U5      nU R                  " SUUUS.UD6u  pxX-   nU R                  U R                  U5      US9n	U R                  S   U-  U R                  S   U	-  -   nUnU R                  U R                  U5      5      nU R                  S   U-  U R                  S   U-  -   nU R                  U5      nU$ )Nr   r   )rB   r   r   )r    )r  r	  r  r
  r  r  r  r  r  r  r  )
r9   rB   r   r   r   residualnormalized_hidden_statesr   _conv_outputs
             r;   rC   LasrEncoderBlock.forwardy  sD    !**4+B+B=+QR..q1H<t?a?abc?dgt?tt 	 $(#5#5m#D  
2) 3
 	
 &3ii} =ni]2215EHbHbcdHehsHss **4+B+B=+QR..q1H<t?a?abc?dgt?tt 	 m4r=   )r  r  r  r  r  r  r  r	  r  r  r
  r  r   )rE   rF   rG   rH   r!   r   r*   rI   rJ   r   r   rC   rK   rL   rM   s   @r;   r   r   f  s|    B0 BS B* /337	!||! t+! #\\D0	!
 +,! 
! !r=   r   c                      ^  \ rS rSr% \\S'   SrSrSrSr	S/r
SrSrSrSrSrSr\\S	.r\R*                  " 5       U 4S
 j5       rS\R.                  4S jrSS\R.                  S\S-  4S jjrSrU =r$ )LasrPreTrainedModeli  r%   modelr>   audioTr   F)rB   
attentionsc                 $   > [         TU ]  U5        g r   )r)   _init_weights)r9   r   r:   s     r;   r  !LasrPreTrainedModel._init_weights  s    f%r=   input_lengthsc                     [        U R                  [        5      (       a  U R                  R                  OU R                  nUR                  nUR
                  nSn[        U5       H  nX-
  U-  S-   nM     U$ )Nr   r   )r{   r%   r    encoder_configr0   r1   range)r9   r   r"  r'   r(   
num_layersr  s          r;   _get_subsampling_output_length2LasrPreTrainedModel._get_subsampling_output_length  sn    7A$++}7]7]33cgcncn$AA77
z"A*8VCaGM # r=   Nr   target_lengthc                     U R                  UR                  S5      5      nUb  UOUR                  5       n[        R                  " XAR
                  S9USS2S4   :  nU$ )z
Convert the input attention mask to its subsampled form. `target_length` sets the desired output length, useful
when the attention mask length differs from `sum(-1).max()` (i.e., when the longest sequence in the batch is padded)
rs   Nr_   )r%  summaxrI   rj   r_   )r9   r   r'  output_lengths
max_lengths        r;   _get_output_attention_mask.LasrPreTrainedModel._get_output_attention_mask  sa    
 <<^=O=OPR=ST&3&?]^EWEWEY
j9N9NOR`abdhahRiir=   r  r   )rE   rF   rG   rH   r    r   base_model_prefixmain_input_nameinput_modalitiessupports_gradient_checkpointing_no_split_modules_supports_flat_attention_mask_supports_sdpa_supports_flex_attn_supports_flash_attn_can_compile_fullgraph_supports_attention_backendr   r   _can_record_outputsrI   r   r  rJ   r%  r   r.  rK   rL   rM   s   @r;   r  r    s    &O&*#+,$(!N !!"&)*
 ]]_& &	ELL 		 	VY\`V` 	 	r=   r  z
    Extends [~modeling_outputs.BaseModelOutputWithPooling] to include the output attention mask since sequence length
    is not preserved in the model's forward.
    )custom_introc                   B    \ rS rSr% SrSr\R                  S-  \S'   Sr	g)LasrEncoderModelOutputi  a~  
attention_mask (`torch.Tensor` of shape `(batch_size, sequence_length)`, *optional*):
    Mask to avoid performing attention on padding token indices after sequence compression. Returned because the
    sequence length may differ from the input sequence length. Mask values selected in `[0, 1]`:

    - 1 for tokens that are **not masked**,
    - 0 for tokens that are **masked**.
Nr   r  )
rE   rF   rG   rH   r   r   rI   rJ   r   rK   r  r=   r;   r>  r>    s     +/NELL4'.r=   r>  zh
    The LasrEncoder model, based on the Conformer architecture](https://arxiv.org/abs/2005.08100).
    c                      ^  \ rS rSr% \\S'   SrS\4U 4S jjr\\	\
\  SS\R                  S\R                  S-  S\S-  S	\\   S
\4
S jj5       5       5       5       rSrU =r$ )LasrEncoderi  r%   encoderc           	        > [         TU ]  U5        SU l        UR                  U l        UR                  U l        UR
                  U l        [        U5      U l        [        U5      U l	        [        R                  " [        UR                  5       Vs/ s H  n[        X5      PM     sn5      U l        [        R                   " UR"                  UR$                  SS9U l        U R)                  5         g s  snf )NF)epsr   )r)   r*   r  r   dropout_positions	layerdropr#   
subsamplerrO   
rotary_embr   
ModuleListr#  num_hidden_layersr   layersr  r-   r  out_norm	post_initr   s      r;   r*   LasrEncoder.__init__  s     &+#~~!'!9!9))084V<mmBGH`H`BabBaYf0Bab
 V%7%7V=R=RY^_	 cs   C3Nr>   r   output_attention_maskr   r?   c                    U R                  U5      nU R                  U[        R                  " UR                  S   UR
                  S9R                  S5      5      u  pg[        R                  R                  XPR                  U R                  S9n[        R                  R                  X`R                  U R                  S9n[        R                  R                  XpR                  U R                  S9nSnUb  U R                  X%R                  S   S9nUn[        U R                  UUS9nU R                   HS  n	Sn
U R                  (       a'  [        R                   " / 5      nXR"                  :  a  S	n
U
(       a  MF  U	" U4UXg4S
.UD6nMU     U R%                  U5      n['        UU(       a  Ub  UR)                  5       S9$ SS9$ )a  
output_attention_mask (`bool`, *optional*):
    Whether to return the output attention mask.

Example:

```python
>>> from transformers import AutoProcessor, LasrEncoder
>>> from datasets import load_dataset, Audio

>>> model_id = "google/medasr"
>>> processor = AutoProcessor.from_pretrained(model_id)
>>> encoder = ParakeetEncoder.from_pretrained(model_id)

>>> ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation")
>>> ds = ds.cast_column("audio", Audio(sampling_rate=processor.feature_extractor.sampling_rate))

>>> inputs = processor(ds[0]["audio"]["array"])
>>> encoder_outputs = encoder(**inputs)

>>> print(encoder_outputs.last_hidden_state.shape)
```
r   r)  r   r   Nr'  )r%   inputs_embedsr   FT)r   r   )last_hidden_stater   )rF  rG  rI   rj   rz   r_   r   r   r   r   r   rD  r.  r   r%   rJ  randrE  rK  r>  r   )r9   r>   r   rN  r   rB   r   r   output_maskencoder_layerto_dropdropout_probabilitys               r;   rC   LasrEncoder.forward  s   F 7??5<<(;(;A(>}G[G[\ffghi
 --m||VZVcVc-dmm##C+A+ADMM#Zmm##C+A+ADMM#Z%99.XkXklmXn9oK(N2;;')
 "[[MG}}&+jjn#&7"G7 -!!#1),
! 	! )  m4%+0E+Ja;??,
 	
gk
 	
r=   )r   rD  r  rE  rJ  rK  rG  rF  r   )rE   rF   rG   rH   r!   r   r0  r*   r   r   r   r   rI   rJ   r   r   r   r>  rC   rK   rL   rM   s   @r;   r@  r@    s     !0 "  /3-1	H
H
 t+H
  $d{	H

 +,H
 
 H
     H
r=   r@  c                       \ rS rSr% Sr\R                  \S'   Sr\	\R                     S-  \S'   Sr\	\	\R                        S-  \S'   Sr\	\	\R                        S-  \S'   Srg)	LasrCTCGenerateOutputiL  a6  
Outputs of Lasr CTC model generation.

Args:
    sequences (`torch.LongTensor` of shape `(batch_size, sequence_length)`):
        The generated sequences. The second dimension (sequence_length) is either equal to `max_length` or shorter
        if all batches finished early due to the `eos_token_id`.
    logits (`tuple(torch.FloatTensor)` *optional*, returned when `output_logits=True`):
        Unprocessed prediction scores of the language modeling head (scores for each vocabulary token before SoftMax)
        at each generation step. Tuple of `torch.FloatTensor` with up to `max_new_tokens` elements (one element for
        each generated token), with each tensor of shape `(batch_size, config.vocab_size)`.
    attentions (`tuple(tuple(torch.FloatTensor))`, *optional*, returned when `output_attentions=True`):
        Tuple (one element for each generated token) of tuples (one element for each layer of the decoder) of
        `torch.FloatTensor` of shape `(batch_size, num_heads, generated_length, sequence_length)`.
    hidden_states (`tuple(tuple(torch.FloatTensor))`, *optional*, returned when `output_hidden_states=True`):
        Tuple (one element for each generated token) of tuples (one element for each layer of the decoder) of
        `torch.FloatTensor` of shape `(batch_size, generated_length, hidden_size)`.
	sequencesNlogitsr  rB   r  )rE   rF   rG   rH   r   rI   
LongTensorr   r\  r   FloatTensorr  rB   rK   r  r=   r;   rZ  rZ  L  sm    & .2FE%##$t+29=JeE--./$6=<@M5u0012T9@r=   rZ  zO
    Lasr Encoder with a Connectionist Temporal Classification (CTC) head.
    c                   l  ^  \ rS rSr% \\S'   S\4U 4S jjr\\  SS\	R                  S\	R                  S-  S\	R                  S-  S\\   S	\4
S
 jj5       5       r\	R                  " 5          SS\	R                  S\	R                  S-  S\S\S-  S\\   S	\\	R&                  -  4S jj5       rSrU =r$ )
LasrForCTCig  r%   c                    > [         TU ]  U5        [        R                  " UR                  5      U l        [        R                  " UR                  R                  UR                  SS9U l
        U R                  5         g )Nr   )r'   )r)   r*   r   from_configr"  rA  r   r/   r-   
vocab_sizectc_headrL  r8   s     r;   r*   LasrForCTC.__init__o  sY      ,,V-B-BC		&"7"7"C"CVEVEVdefr=   Nr>   r   labelsr   r?   c                    Ub  UR                  SS5        U R                  " SUUS.UD6nUR                  nU R                  UR	                  SS5      5      R	                  SS5      nSnUGb!  UR
                  R                  S5      n	X0R                  R                  :g  n
U
R                  S5      nUR                  U
5      n[        R                  R                  US[        R                  S9R	                  S	S5      n[        R                  R                   R#                  S
S9   [        R                  R%                  UUU	UU R                  R                  U R                  R&                  U R                  R(                  S9nSSS5        [+        UUUR,                  UR.                  S9$ ! , (       d  f       N.= f)aJ  
Example:

```python
>>> from transformers import AutoProcessor, LasrForCTC
>>> from datasets import load_dataset, Audio

>>> model_id = "nvidia/lasr-ctc-1.1b"
>>> processor = AutoProcessor.from_pretrained(model_id)
>>> model = LasrForCTC.from_pretrained(model_id)

>>> ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation")
>>> ds = ds.cast_column("audio", Audio(sampling_rate=processor.feature_extractor.sampling_rate))

>>> inputs = processor(ds[0]["audio"]["array"], text=ds[0]["text"])
>>> outputs = model(**inputs)

>>> print(outputs.loss)
```NrN  Tr>   r   r   r   rs   r   r   F)rw   )blank	reductionzero_infinity)lossr\  rB   r  r  )
setdefaultrA  rR  rd  rA   r   r*  r%   pad_token_idmasked_selectr   r   log_softmaxrI   r   backendscudnnflagsctc_lossctc_loss_reductionctc_zero_infinityr   rB   r  )r9   r>   r   rf  r   encoder_outputsrB   r\  rl  encoder_lengthslabels_masktarget_lengthsflattened_targets	log_probss                 r;   rC   LasrForCTC.forwardw  s   : 5t<,, 
))
 
 (99}66q!<=GG1M-<<@@DO !KK$<$<<K(__R0N & 4 4[ A 11&b1V``abdefI%%++E+:}}--%#"++22"kk<<"&++"?"? .  ; )77&11	
 	
 ;:s   'A F//
F=return_dict_in_generatecompile_configc                 j   Ub  U R                  U5      OU R                  nSUS'   U" S	UUS.UD6nUR                  R                  SS9nUb5  U R	                  X(R
                  S   S9nU R                  R                  X) '   U(       a*  [        UUR                  UR                  UR                  S9$ U$ )
a  
Example:

```python
>>> from transformers import AutoProcessor, LasrForCTC
>>> from datasets import load_dataset, Audio

>>> model_id = "google/medasr"
>>> processor = AutoProcessor.from_pretrained(model_id)
>>> model = LasrForCTC.from_pretrained(model_id)

>>> ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation")
>>> ds = ds.cast_column("audio", Audio(sampling_rate=processor.feature_extractor.sampling_rate))

>>> inputs = processor(ds[0]["audio"]["array"], text=ds[0]["text"])
>>> predicted_ids = model.generate(**inputs)
>>> transcription = processor.batch_decode(predicted_ids, skip_special_tokens=True)

>>> print(transcription)
```
Treturn_dictrh  rs   rx   r   rP  )r[  r\  r  rB   r  )get_compiled_call__call__r\  argmaxr.  rz   r%   rn  rZ  r  rB   )	r9   r>   r   r~  r  r   model_forwardoutputsr[  s	            r;   generateLasrForCTC.generate  s    < CQB\..~>bfbobo $}"/ #
))#
 #
 NN))b)1	 %!<<^[j[jkl[m<nN)-)A)AIo&"(#~~"--%33	  r=   )rd  rA  r   )NFN)rE   rF   rG   rH   r    r   r*   r   r   rI   rJ   r   r   r   rC   r   r   r	   rZ  r]  r  rK   rL   rM   s   @r;   r`  r`  g  s    }   /3&*	B
B
 t+B
 t#	B

 +,B
 
B
  B
H ]]_ /3(-/366 t+6 "&	6
 &,6 +,6 
!1!1	16 6r=   r`  )r`  r@  r  )r   )r   )Bcollections.abcr   dataclassesr   typingr   rI   r   activationsr   
generationr	   r
   integrationsr   r   masking_utilsr   modeling_layersr   modeling_outputsr   r   modeling_rope_utilsr   r   modeling_utilsr   r   processing_utilsr   utilsr   r   r   r   utils.genericr   r   utils.output_capturingr   autor   configuration_lasrr    r!   Moduler#   rO   r   r   rJ   r   r   rm   r   r   r   r   r   r  r>  r@  rZ  r`  __all__r  r=   r;   <module>r     s<  * % !    ! 8 I 6 9 J K F & V V G 5  @+RYY +8>< ><B( *+ ,2	UU\\ 	U# 	U%,, 	U& %II%<<% 
% <<	%
 LL4'% % % '(%2 )*<)299 <) +<)~E-299 E-PRYY 41 4n ./ . .b  
/7 
/ 
/ 
a
% a

a
H AK A A4 
H$o H
HV ?r=   