
    3ji                        S SK Jr  S SKJr  S SKJr  SSKJr  SSKJ	r	  SSK
Jr  SSKJr  SS	KJrJrJrJr  SS
KJrJr  SSKJrJr  SSKJr  SSKJrJrJrJr  SSK J!r!J"r"J#r#  SSK$J%r%  SSK&J'r'  SSK(J)r)J*r*  \" 5       (       a
  S SK+r+S SK+J,r,   " S S\,RZ                  5      r.S r/S\+R`                  S\1S\+R`                  4S jr2 S?S\,RZ                  S\+R`                  S\+R`                  S \+R`                  S!\+R`                  S-  S"\3S#\3S$\\   4S% jjr4S@S& jr5 " S' S(\,RZ                  5      r6 " S) S*\,RZ                  5      r7 " S+ S,\5      r8\ " S- S.\5      5       r9 " S/ S0\95      r: " S1 S2\,RZ                  5      r;\ " S3 S4\5      5       r<\" S5S69 " S7 S8\95      5       r=\" S9S69\ " S: S;\5      5       5       r>\" S5S69 " S< S=\9\5      5       r?/ S>Qr@g)A    )Callable)	dataclass)Optional   )ACT2FN)Cache)GenerationMixin)GradientCheckpointingLayer)BaseModelOutputWithPastBaseModelOutputWithPoolingCausalLMOutputWithPastModelOutput)ROPE_INIT_FUNCTIONSdynamic_rope_update)ALL_ATTENTION_FUNCTIONSPreTrainedModel)Unpack)TransformersKwargsauto_docstringis_torch_availabletorch_compilable_check)can_return_tuplemaybe_autocastmerge_with_config_defaults)capture_outputs   )	AutoModel   )GlmAsrConfigGlmAsrEncoderConfigN)nnc                      ^  \ rS rSr% \R
                  \S'   SS\4U 4S jjjr\	   SS\S-  S\
S   S\S-  S	\S
\4   4S jj5       r\R                  " 5       \S 5       5       rSrU =r$ )GlmAsrRotaryEmbedding2   inv_freqNconfigc                   > [         TU ]  5         UR                  U l        UR                  U l        Xl        U R
                  R                  S   U l        U R                  nU R                  S:w  a  [        U R                     nU" U R
                  U5      u  o@l
        U R                  SUSS9  U R                  SUR                  5       SS9  g )N	rope_typedefaultr%   F)
persistentoriginal_inv_freq)super__init__max_position_embeddingsmax_seq_len_cachedoriginal_max_seq_lenr&   rope_parametersr(   compute_default_rope_parametersr   attention_scalingregister_bufferclone)selfr&   devicerope_init_fnr%   	__class__s        d/home/wildlama/miniconda3/lib/python3.13/site-packages/transformers/models/glmasr/modeling_glmasr.pyr-   GlmAsrRotaryEmbedding.__init__5   s    "("@"@$*$B$B!44[A!%!E!E>>Y&.t~~>L+7V+L((ZeD0(..2BuU    r7   ztorch.deviceseq_lenreturnztorch.Tensorc           	      j   U R                   S   nU R                   R                  SS5      n[        U SS5      =(       d    U R                  U R                  -  n[        XT-  5      nSnSU[        R                  " SUS[        R                  S9R                  U[        R                  S	9U-  -  -  nX4$ )
aH  
Computes the inverse frequencies according to the original RoPE implementation
Args:
    config ([`~transformers.PreTrainedConfig`]):
        The model configuration.
    device (`torch.device`):
        The device to use for initialization of the inverse frequencies.
    seq_len (`int`, *optional*):
        The current sequence length. Unused for this type of RoPE.
Returns:
    Tuple of (`torch.Tensor`, `float`), containing the inverse frequencies for the RoPE embeddings and the
    post-processing scaling factor applied to the computed cos/sin (unused in this type of RoPE).

rope_thetapartial_rotary_factorg      ?head_dimNr   r   dtype)r7   rD   )r1   getgetattrhidden_sizenum_attention_headsinttorcharangeint64tofloat)	r&   r7   r=   baserA   rB   dimattention_factorr%   s	            r:   r2   5GlmAsrRotaryEmbedding.compute_default_rope_parametersE   s    & %%l3 & 6 6 : :;RTW X6:t4h8J8JfNhNh8h(23 U\\!S!5;;?BB&X]XcXcBdgjjk
 ))r<   c                 L   U R                   S S S 2S 4   R                  5       R                  UR                  S   SS5      R	                  UR
                  5      nUS S 2S S S 24   R                  5       n[        UR
                  R                  [        5      (       a0  UR
                  R                  S:w  a  UR
                  R                  OSn[        USS9   UR                  5       UR                  5       -  R                  SS5      n[        R                  " Xf4SS	9nUR                  5       U R                  -  nUR                  5       U R                  -  n	S S S 5        WR	                  UR                   S
9W	R	                  UR                   S
94$ ! , (       d  f       N@= f)Nr   r   mpscpuF)device_typeenabledr   rP   rC   )r%   rN   expandshaperM   r7   
isinstancetypestrr   	transposerJ   catcosr3   sinrD   )
r6   xposition_idsinv_freq_expandedposition_ids_expandedrW   freqsembra   rb   s
             r:   forwardGlmAsrRotaryEmbedding.forwarde   sN    !MM$4-8>>@GGHZHZ[\H]_acdehhijiqiqr ,QaZ 8 > > @'1!((--'E'E!((--[`J`ahhmmfkUC&,,.1F1L1L1NNYYZ[]^_E))UN3C'')d444C'')d444C	 D vvAGGv$cff177f&;;; DCs   BF
F#)r3   r&   r/   r0   r(   N)NNN)__name__
__module____qualname____firstlineno__rJ   Tensor__annotations__r   r-   staticmethodr   rI   tuplerN   r2   no_gradr   ri   __static_attributes____classcell__r9   s   @r:   r#   r#   2   s    llV| V V  &*+/"*t#*(* t* 
~u$	%	* *> ]]_<  <r<   r#   c                     U SSU R                   S   S-  24   nU SU R                   S   S-  S24   n[        R                  " U* U4SS9$ )z*Rotates half the hidden dims of the input..NrT   r   rY   )r[   rJ   r`   )rc   x1x2s      r:   rotate_halfr{   u   sZ    	
3"!''"+"""	#B	
3q ""	#B99rc2YB''r<   hidden_statesn_repr>   c                     U R                   u  p#pEUS:X  a  U $ U SS2SS2SSS2SS24   R                  X#XU5      n U R                  X#U-  XE5      $ )z
This is the equivalent of torch.repeat_interleave(x, dim=1, repeats=n_rep). The hidden states go from (batch,
num_key_value_heads, seqlen, head_dim) to (batch, num_attention_heads, seqlen, head_dim)
r   N)r[   rZ   reshape)r|   r}   batchnum_key_value_headsslenrB   s         r:   	repeat_kvr   |   s_    
 2?1D1D.Ez!!Qa"23::5W\dlmM  e(CTTTr<   modulequerykeyvalueattention_maskscalingdropoutkwargsc                    [        X R                  5      n[        X0R                  5      n	[        R                  " XR	                  SS5      5      U-  n
Ub  X-   n
[
        R                  R                  U
S[        R                  S9R                  UR                  5      n
[
        R                  R                  XU R                  S9n
[        R                  " X5      nUR	                  SS5      R                  5       nX4$ )Nr   r   rT   )rP   rD   )ptrainingr   )r   num_key_value_groupsrJ   matmulr_   r!   
functionalsoftmaxfloat32rM   rD   r   r   
contiguous)r   r   r   r   r   r   r   r   
key_statesvalue_statesattn_weightsattn_outputs               r:   eager_attention_forwardr      s     3 ; ;<JU$?$?@L<<';';Aq'ABWLL!#4==((2U]](SVVW\WbWbcL==((6??([L,,|:K''1-88:K$$r<   c                 R   UR                  U5      nUR                  U5      nUR                  S   nU SS U24   U SUS 24   pUSS U24   USUS 24   pXr-  [        U5      U-  -   nX-  [        U	5      U-  -   n[        R                  " X/SS9n[        R                  " X/SS9nX4$ )NrT   .rY   )	unsqueezer[   r{   rJ   r`   )qkra   rb   rd   unsqueeze_dim
rotary_dimq_rotq_passk_rotk_passq_embedk_embeds                r:   apply_rotary_pos_embr      s    
--
&C
--
&C2Jc;J;&'3
+;)<6c;J;&'3
+;)<6 {{51C78G{{51C78G ii)r2Gii)r2Gr<   c                      ^  \ rS rSrSrS\S\4U 4S jjr SS\R                  S\
\R                  \R                  4   S-  S	\\   S
\
\R                  \R                  4   4S jjrSrU =r$ )GlmAsrAttention   z=Multi-headed attention from 'Attention Is All You Need' paperr&   	layer_idxc                    > [         TU ]  5         Xl        X l        [	        USUR
                  UR                  -  5      U l        UR                  UR                  -  U l	        U R                  S-  U l
        UR                  U l        SU l        [        R                  " UR
                  UR                  U R                  -  SS9U l        [        R                  " UR
                  UR                  U R                  -  SS9U l        [        R                  " UR
                  UR                  U R                  -  SS9U l        [        R                  " UR                  U R                  -  UR
                  SS9U l        g )NrB   g      FTbias)r,   r-   r&   r   rF   rG   rH   rB   r   r   r   attention_dropout	is_causalr!   Linearq_projk_projv_projo_projr6   r&   r   r9   s      r:   r-   GlmAsrAttention.__init__   s(   "
F4F4F&JdJd4de$*$>$>&B\B\$\!}}d*!'!9!9ii 2 2F4N4NQUQ^Q^4^eijii 2 2F4N4NQUQ^Q^4^ejkii 2 2F4N4NQUQ^Q^4^eijii : :T]] JFL^L^eijr<   Nr|   position_embeddingsr   r>   c                    UR                   S S n/ UQSPU R                  P7nU R                  U5      R                  U5      R	                  SS5      nU R                  U5      R                  U5      R	                  SS5      nU R                  U5      R                  U5      R	                  SS5      nUu  p[        XgX5      u  pg[        R                  " U R                  R                  [        5      nU" U UUU4S U R                  (       d  SOU R                  U R                  S.UD6u  pUR                   " / UQSP76 R#                  5       nU R%                  U5      nX4$ )NrT   r   r           )r   r   r   )r[   rB   r   viewr_   r   r   r   r   get_interfacer&   _attn_implementationr   r   r   r   r   r   r   )r6   r|   r   r   input_shapehidden_shapequery_statesr   r   ra   rb   attention_interfacer   r   s                 r:   ri   GlmAsrAttention.forward   s\    $))#2.88b8$--8{{=166|DNNqRST[[/44\BLLQPQR
{{=166|DNNqRST&#7RU#[ (?(M(MKK,,.E)
 %8		%

  #}}C$2H2HLL	%
 	%
! "));;;;FFHkk+.((r<   )r   r&   rB   r   r   r   r   r   r   r   r   rk   )rl   rm   rn   ro   __doc__r   rI   r-   rJ   rp   rs   r   r   ri   ru   rv   rw   s   @r:   r   r      s    Gk| k k" IM!)||!) #5<<#=>E!) +,	!)
 
u||U\\)	*!) !)r<   r   c                   J   ^  \ rS rSrU 4S jrS\R                  4S jrSrU =r	$ )	GlmAsrMLP   c                   > [         TU ]  5         [        R                  " UR                  UR
                  5      U l        [        R                  " UR
                  UR                  5      U l        [        UR                     U l
        g rk   )r,   r-   r!   r   rG   intermediate_sizefc1fc2r   
hidden_actact_fnr6   r&   r9   s     r:   r-   GlmAsrMLP.__init__   s\    99V//1I1IJ99V55v7I7IJV../r<   r|   c                 l    U R                  U5      nU R                  U5      nU R                  U5      nU$ rk   )r   r   r   )r6   r|   s     r:   ri   GlmAsrMLP.forward   s2    /M2/r<   )r   r   r   )
rl   rm   rn   ro   r-   rJ   rp   ri   ru   rv   rw   s   @r:   r   r      s    0U\\  r<   r   c            	          ^  \ rS rSrS\S\4U 4S jjr SS\R                  S\	\R                  \R                  4   S-  S\
\   S	\R                  4S
 jjrSrU =r$ )GlmAsrEncoderLayer   r&   r   c                   > [         TU ]  5         UR                  U l        [        XS9U l        [        U5      U l        [        R                  " UR                  5      U l	        [        R                  " UR                  5      U l
        g )N)r&   r   )r,   r-   rG   r   	self_attnr   mlpr!   	LayerNorminput_layernormpost_attention_layernormr   s      r:   r-   GlmAsrEncoderLayer.__init__   sb    !--(LV$!||F,>,>?(*V5G5G(H%r<   Nr|   r   r   r>   c                     UnU R                  U5      nU R                  " SUUS.UD6u  pXA-   nUnU R                  U5      nU R                  U5      nXA-   nU$ )N)r|   r    )r   r   r   r   )r6   r|   r   r   residual_s         r:   ri   GlmAsrEncoderLayer.forward  s|     !,,];>> 
' 3
 

 !0 !55mD/ 0r<   )rG   r   r   r   r   rk   )rl   rm   rn   ro   r   rI   r-   rJ   rp   rs   r   r   ri   ru   rv   rw   s   @r:   r   r      su    I| I I IM|| #5<<#=>E +,	
 
 r<   r   c                   D    \ rS rSr% \\S'   SrSrSrS/r	S/r
SrSrSrSrg	)
GlmAsrPreTrainedModeli  r&   model)audiotextTr   past_key_valuesr   N)rl   rm   rn   ro   r   rq   base_model_prefixinput_modalitiessupports_gradient_checkpointing_no_split_modules_skip_keys_device_placement_supports_flash_attn_supports_sdpa_supports_attention_backendru   r   r<   r:   r   r     s=    (&*#*+#4"5N"&r<   r   c                      ^  \ rS rSr% \\S'   SrSrS/r\	\
S.rS\4U 4S jjr\\\S\\   4S	 j5       5       5       rS
rU =r$ )GlmAsrEncoderi'  r&   input_featuresr   r   )r|   
attentionsc           	        > [         TU ]  U5        [        R                  " UR                  UR
                  SSS9U l        [        R                  " UR
                  UR
                  SSSS9U l        [        R                  " [        UR                  5       Vs/ s H  n[        X5      PM     sn5      U l        [        R                  " UR
                  5      U l        [        US9U l        SU l        U R%                  5         g s  snf )Nr   r   )kernel_sizepaddingr   )r   strider   )r&   F)r,   r-   r!   Conv1dnum_mel_binsrG   conv1conv2
ModuleListrangenum_hidden_layersr   layersr   normr#   
rotary_embgradient_checkpointing	post_initr   s      r:   r-   GlmAsrEncoder.__init__1  s     YYv22F4F4FTU_`a
YYv1163E3EST]^hij
mmDI&JbJbDcdDcy2Dcd
 LL!3!34	/v>&+# es   Dr   c                    [         R                  R                  U R                  U5      5      n[         R                  R                  U R	                  U5      5      nUR                  SS5      nUnU R                  U[        R                  " UR                  S   UR                  S9S S S 24   S9nU R                   H  nU" U4SU0UD6nM     U R                  U5      n[        US9$ )Nr   r   r7   )rd   r   )last_hidden_state)r!   r   gelur   r   r_   r   rJ   rK   r[   r7   r   r   r   )r6   r   r   inputs_embedsr|   r   encoder_layers          r:   ri   GlmAsrEncoder.forward>  s     **4::n+EF**4::m+DE%//15%"oo]5H5H5KTaThTh(ijnpqjq(r . 
 "[[M)-kM`kdjkM ) 		-0)MJJr<   )r   r   r   r   r   r   )rl   rm   rn   ro   r    rq   main_input_namer   r   r   r   _can_record_outputsr-   r   r   r   r   r   ri   ru   rv   rw   s   @r:   r   r   '  sl    &O-.+%
2   K7I0J K    Kr<   r   c                   :   ^  \ rS rSrSrS\4U 4S jjrS rSrU =r	$ )GlmAsrMultiModalProjectoriR  z
Audio adaptor (small MLP) that projects GlmAsrEncoder features
to the LLM embedding space so they can replace `<sound>` tokens.
r&   c                 n  > [         TU ]  5         [        R                  " UR                  R
                  UR                  R                  S-  5      U l        [        UR                     U l        [        R                  " UR                  R                  S-  UR                  R                  5      U l        g )Nr   )r,   r-   r!   r   audio_configr   text_configrG   linear_1r   projector_hidden_actactlinear_2r   s     r:   r-   "GlmAsrMultiModalProjector.__init__X  s    		&"5"5"G"GI[I[IgIgjkIkl&556		&"4"4"@"@1"DfFXFXFdFder<   c                 l    U R                  U5      nU R                  U5      nU R                  U5      nU$ rk   )r  r  r  )r6   audio_featuresr|   s      r:   ri   !GlmAsrMultiModalProjector.forward^  s2    n5/m4r<   )r  r  r  )
rl   rm   rn   ro   r   r   r-   ri   ru   rv   rw   s   @r:   r  r  R  s     
f| f r<   r  c                   B    \ rS rSr% SrSr\R                  S-  \S'   Sr	g)GlmAsrModelOutputWithPastie  z[
audio_hidden_states (`torch.FloatTensor`, *optional*):
    Projected audio hidden states.
Naudio_hidden_statesr   )
rl   rm   rn   ro   r   r  rJ   FloatTensorrq   ru   r   r<   r:   r  r  e  s    
 59**T18r<   r  z~
    The GlmAsr model which consists of a fine-tuned Whisper encoder, a multi-modal projector and a Llama language model.
    custom_introc                     ^  \ rS rSrSrSrSrU 4S jr\\	" SS9S\
R                  S\
R                  S\\   S	\\-  4S
 j5       5       rS\
R$                  S\
R                  S\
R                  4S jr\\	        SS\
R$                  S-  S\
R                  S-  S\
R                  S-  S\
R                  S-  S\
R$                  S-  S\S-  S\
R                  S-  S\S-  S\\   S	\\-  4S jj5       5       rSrU =r$ )GlmAsrModelio  Nc                    > [         TU ]  U5        [        R                  " UR                  5      U l        [        R                  " UR                  5      U l        [        U5      U l	        U R                  5         g rk   )r,   r-   r   from_configr  audio_towerr  language_modelr  multi_modal_projectorr   r   s     r:   r-   GlmAsrModel.__init__y  sY     $001D1DE'33F4F4FG%>v%F"r<   zgCompute audio embeddings from log-mel input features using the audio encoder and multi-modal projector.r  r   input_features_maskr   r>   c                 $   U R                   " U4SS0UD6nUR                  nUR                  UR                  S   SU R                  R
                  R                  5      nU R                  U5      nUR                  S5      nS H  u  pn
USU-  -   U	S-
  -
  S-
  U
-  S-   nM     SnX{-
  U-  S-   n[        R                  " UR                  S   UR                  S	9S
S
S
24   US
S
2S
4   :  nXmR                  UR                  5         Ul        U$ )a	  
input_features (`torch.FloatTensor`):
    Float values of mel features extracted from the raw speech waveform.
input_features_mask (`torch.Tensor` of shape `(batch_size, feature_sequence_length)`):
    Mask to avoid performing attention on padded feature indices.
return_dictTr   rT   ))r   r   r   )r   r   r   r   r      r  N)r"  r  r   r[   r&   r  r   r$  sumrJ   rK   r7   rM   pooler_output)r6   r   r&  r   audio_outputsr  audio_embedsaudio_lengthsr   r   r   merge_factorpost_lengths
valid_masks                 r:   get_audio_featuresGlmAsrModel.get_audio_features  s.     ((TTTVT+==199  #R)A)A)S)S
 112EF+//3,B(G&*Q[8K!OLqPU[[^__M -C%4EI\\,"4"4Q"7@S@STUY[\U\]`lmnptmt`uu
&2==ATAT3U&V#r<   	input_idsr  r  c           	      &   Ucj  X R                  5       " [        R                  " U R                  R                  [        R
                  UR                  S95      :H  nUR                  S5      nOXR                  R                  :H  nUR                  5       nUR                  S   nUR                  S5      R                  U5      R                  UR                  5      n[        X$   R                  5       UR                  5       :H  SU SU 35        U$ )z
Obtains multimodal placeholder mask from `input_ids` or `inputs_embeds`, and checks that the placeholder token count is
equal to the length of multimodal features. If the lengths are different, an error is raised.
)rD   r7   rT   r   z6Audio features and audio tokens do not match, tokens: z, features: )get_input_embeddingsrJ   tensorr&   audio_token_idlongr7   allr*  r[   r   	expand_asrM   r   numel)r6   r4  r  r  special_audio_maskn_audio_tokensn_audio_featuress          r:   get_placeholder_mask GlmAsrModel.get_placeholder_mask  s     !.2K2K2MT[[77uzzR_RfRfg3 " "4!7!7!;!*kk.H.H!H+//1)//2/99"=GGVYYZgZnZno-3359M9M9OOD^DTT`aq`rs	
 "!r<   r   rd   r   	use_cachec	           	         Uc  U R                  5       " U5      nSn
UbW  UbT  U R                  X#SS9R                  n
U R                  XU
S9nUR	                  XR                  UR                  5      5      nU R                  " SUUUUUS.U	D6n[        UR                  UR                  UR                  UR                  U
S9$ )z
input_features_mask (`torch.Tensor` of shape `(batch_size, feature_sequence_length)`):
    Mask to avoid performing attention on padding feature indices.
NT)r(  )r  r  )r  r   rd   r   rB  )r  r   r|   r   r  r   )r6  r2  r+  r@  masked_scatterrM   r7   r#  r  r  r   r|   r   )r6   r4  r   r&  r   rd   r   r  rB  r   r-  r=  outputss                r:   ri   GlmAsrModel.forward  s    $   557	BM%)*?22>dh2iwwL "&!:!:| "; " *889K__]j]q]qMrsM%% 
')%+
 
 )%77#33!//)) ,
 	
r<   )r"  r#  r$  )NNNNNNNN)rl   rm   rn   ro   _tp_plan_pp_plan_keep_in_fp32_modules_strictr-   r   r   rJ   r  rp   r   r   rs   r   r2  
LongTensorr@  r   boolr  ri   ru   rv   rw   s   @r:   r  r  o  s    HH#'  ~)) #\\ +,	
 
+	+ <"))":?:K:K"]b]n]n"0  .23737.204(,26!%,
##d*,
 ))D0,
 #\\D0	,

 t+,
 &&-,
 ,
 ((4/,
 $;,
 +,,
 
*	*,
  ,
r<   r  zR
    Base class for GlmAsr causal language model (or autoregressive) outputs.
    c                      \ rS rSr% SrSr\R                  S-  \S'   Sr	\R                  S-  \S'   Sr
\S-  \S'   Sr\\R                     S-  \S'   Sr\\R                     S-  \S'   Sr\R                  S-  \S	'   S
rg)GlmAsrCausalLMOutputWithPasti  a2  
loss (`torch.FloatTensor` of shape `(1,)`, *optional*, returned when `labels` is provided):
    Language modeling loss (for next-token prediction).
logits (`torch.FloatTensor` of shape `(batch_size, sequence_length, config.vocab_size)`):
    Prediction scores of the language modeling head.
past_key_values (`Cache`, *optional*, returned when `use_cache=True` is passed or when `config.use_cache=True`):
    It is a [`~cache_utils.Cache`] instance.
audio_hidden_states (`torch.FloatTensor`, *optional*):
    Hidden states of the audio encoder after projection.
Nlosslogitsr   r|   r   r  r   )rl   rm   rn   ro   r   rN  rJ   r  rq   rO  r   r   r|   rs   r   r  ru   r   r<   r:   rM  rM    s    	 &*D%

d
")'+FE$+$(OUT\(59M5**+d2926Je''(4/648**T18r<   rM  c                     ^  \ rS rSrS/rSS0rU 4S jrS r\\	          SS\
R                  S-  S	\
R                  S-  S
\
R                  S-  S\
R                  S-  S\
R                  S-  S\S-  S\
R                  S-  S\
R                  S-  S\S-  S\\
R                  -  S\\   S\4S jj5       5       rSS.S\4U 4S jjjrSrU =r$ )GlmAsrForConditionalGenerationi  embed_positionszlm_head.weightz(model.language_model.embed_tokens.weightc                    > [         TU ]  U5        [        U5      U l        [        R
                  " UR                  R                  UR                  R                  SS9U l	        U R                  5         g )NFr   )r,   r-   r  r   r!   r   r  rG   
vocab_sizelm_headr   r   s     r:   r-   'GlmAsrForConditionalGeneration.__init__  sS      (
yy!3!3!?!?ASASA^A^ejkr<   c                 <    U R                   R                  " X40 UD6$ rk   )r   r2  )r6   r   r&  r   s       r:   r2  1GlmAsrForConditionalGeneration.get_audio_features  s    zz,,^[TZ[[r<   Nr4  r   r&  r   rd   r   r  labelsrB  logits_to_keepr   r>   c                    U R                   " SUUUUUUUU	S.UD6nUR                  n[        U
[        5      (       a  [	        U
* S5      OU
nU R                  USS2USS24   5      nSnUb3  U R                  " SXU R                  R                  R                  S.UD6n[        UUUR                  UR                  UR                  UR                  S9$ )ap  
input_features_mask (`torch.Tensor` of shape `(batch_size, feature_sequence_length)`):
    Mask to avoid performing attention on padding feature indices. Mask values selected in `[0, 1]`:

    - 1 for tokens that are **not masked**,
    - 0 for tokens that are **masked**.
labels (`torch.LongTensor` of shape `(batch_size, sequence_length)`, *optional*):
    Labels for computing the masked language modeling loss. Indices should either be in `[0, ...,
    config.vocab_size]` or -100 (see `input_ids` docstring). Tokens with indices set to `-100` are ignored
    (masked), the loss is only computed for the tokens with labels in `[0, ..., config.vocab_size]`.

Example:

```python
>>> from transformers import GlmAsrForConditionalGeneration, AutoProcessor

>>> model_id = "zai-org/GLM-ASR-Nano-2512"
>>> processor = AutoProcessor.from_pretrained(model_id)
>>> model = GlmAsrForConditionalGeneration.from_pretrained(model_id, dtype="auto", device_map="auto")
>>> inputs = processor.apply_transcription_request("https://huggingface.co/datasets/hf-internal-testing/dummy-audio-samples/resolve/main/bcn_weather.mp3")

>>> inputs = inputs.to(model.device, dtype=model.dtype)

>>> outputs = model.generate(**inputs, do_sample=False, max_new_tokens=500)

>>> decoded_outputs = processor.batch_decode(outputs[:, inputs.input_ids.shape[1] :], skip_special_tokens=True)
>>> print(decoded_outputs)
```)r4  r   r&  r   rd   r   r  rB  N)rO  rY  rT  )rN  rO  r   r|   r   r  r   )r   r  r\   rI   slicerU  loss_functionr&   r  rT  rM  r   r|   r   r  )r6   r4  r   r&  r   rd   r   r  rY  rB  rZ  r   rE  r|   slice_indicesrO  rN  s                    r:   ri   &GlmAsrForConditionalGeneration.forward  s    X ** 

) 3)%+'

 

  118B>SV8W8W~ot4]kmA}a,?@A%% 9P9P9[9[_eD ,#33!//)) ' ; ;
 	
r<   F)is_first_iterationr`  c                   > UR                  SS 5      nUR                  SS 5      n[        TU ]  " U0 UD6nU(       d  UR                  SS5      (       d  Ub  XFS'   Ub  XVS'   U$ )Nr   r&  rB  F)popr,   prepare_inputs_for_generationrE   )r6   r`  argsr   r   r&  model_inputsr9   s          r:   rc  <GlmAsrForConditionalGeneration.prepare_inputs_for_generationb  ss    $4d;$jj)>Ew<dMfM\%5%5k5%I%I)1?-.".6I23r<   )rU  r   )
NNNNNNNNNr   )rl   rm   rn   ro   rI  _tied_weights_keysr-   r2  r   r   rJ   rJ  r  rp   r   rK  rI   r   r   r   ri   rc  ru   rv   rw   s   @r:   rQ  rQ    s^    %6#6 *,VW\  .23737.204(,26*.!%-.G
##d*G
 ))D0G
 #\\D0	G

 t+G
 &&-G
 G
 ((4/G
   4'G
 $;G
 ell*G
 +,G
 
 G
  G
R OT t  r<   rQ  )r   rQ  r  r   )r   )Nr   )Acollections.abcr   dataclassesr   typingr   activationsr   cache_utilsr   
generationr	   modeling_layersr
   modeling_outputsr   r   r   r   modeling_rope_utilsr   r   modeling_utilsr   r   processing_utilsr   utilsr   r   r   r   utils.genericr   r   r   utils.output_capturingr   autor   configuration_glmasrr   r    rJ   r!   Moduler#   r{   rp   rI   r   rN   r   r   r   r   r   r   r   r  r  r  rM  rQ  __all__r   r<   r:   <module>rz     s#  * % !  !   ) 9  L F & c c Y Y 5  C @<BII @<F(	UU\\ 	U# 	U%,, 	U& %II%<<% 
% <<	%
 LL4'% % % '(%2$2)bii 2)j		  3  F 	'O 	' 	'(K) (KV		 & 9 7 9 9 
t
' t

t
n 
 9; 9 9( 
d%:O d
dN fr<   