
    3j.                        S r SSKrSSKJr  SSKJr  SSKJr  SSK	J
r
  SSKJr  SSKJr  SS	KJr  S
SKJrJrJrJrJrJrJr  SSKJr  Sr " S S\R8                  5      r " S S\5      r " S S\5      r " S S\R8                  5      r  " S S\5      r! " S S\5      r"\ " S S\5      5       r# " S S\\#5      r$ " S S\5      r% " S  S!\5      r&/ S"Qr'g)#zPyTorch Hubert model.    N   )initialization)ACT2FN)is_deepspeed_zero3_enabled)BaseModelOutput)PreTrainedModel)auto_docstring   )Wav2Vec2EncoderWav2Vec2EncoderStableLayerNormWav2Vec2FeatureEncoderWav2Vec2ForCTC!Wav2Vec2ForSequenceClassificationWav2Vec2ModelWav2Vec2SamePadLayer   )HubertConfigc                   .   ^  \ rS rSrU 4S jrS rSrU =r$ )HubertPositionalConvEmbedding(   c                 4  > [         TU ]  5         [        R                  " UR                  UR                  UR
                  UR
                  S-  UR                  S9U l        S U l        [        USS5      (       a'  [        R                  " UR                  5      U l        GO[        R                  R                  n[        [        R                  R                  S5      (       a$  [        R                  R                  R                  n[        5       (       Ga%  SS KnUR"                  R%                  U R                  R&                  SS9   U" U R                  SSS	9U l        S S S 5        [        U R                  S
5      (       aU  U R                  R                  R&                  R(                  nU R                  R                  R&                  R*                  nO,U R                  R,                  nU R                  R.                  nUR"                  R1                  X5        UR"                  R1                  X5        OU" U R                  SSS	9U l        [3        UR
                  5      U l        [6        UR8                     U l        g ! , (       d  f       GN,= f)Nr
   )kernel_sizepaddinggroupsconv_pos_batch_normFweight_normr   modifier_rankweight)namedimparametrizations)super__init__nnConv1dhidden_sizenum_conv_pos_embeddingsnum_conv_pos_embedding_groupsconv
batch_normgetattrBatchNorm1dutilsr   hasattrr"   r   	deepspeedzeroGatheredParametersr   	original0	original1weight_gweight_vregister_external_parameterHubertSamePadLayerr   r   feat_extract_activation
activation)selfconfigr   r0   r5   r6   	__class__s         c/home/wildlama/miniconda3/lib/python3.13/site-packages/transformers/models/hubert/modular_hubert.pyr$   &HubertPositionalConvEmbedding.__init__)   s   II6622a777
	 60%88 nnV-?-?@DO((..Krxx00-@@ hh77CC)++ ^^66tyy7G7GWX6Y +DIIH! LDI Z499&899#yy99@@JJH#yy99@@JJH#yy11H#yy11H::4J::4J'		aH	)&*H*HI !?!?@ ZYs   J
Jc                     UR                  SS5      nU R                  b  U R                  U5      nU R                  U5      nU R                  U5      nU R	                  U5      nUR                  SS5      nU$ )Nr   r
   )	transposer+   r*   r   r:   r;   hidden_statess     r>   forward%HubertPositionalConvEmbedding.forwardN   sn    %//15??& OOM:M		-0]36%//15    )r:   r+   r*   r   __name__
__module____qualname____firstlineno__r$   rD   __static_attributes____classcell__r=   s   @r>   r   r   (   s    #AJ	 	rF   r   c                       \ rS rSrSrg)r8   Z    NrH   rI   rJ   rK   rL   rQ   rF   r>   r8   r8   Z       rF   r8   c                       \ rS rSrSrg)HubertFeatureEncoder^   rQ   NrR   rQ   rF   r>   rU   rU   ^   rS   rF   rU   c                   .   ^  \ rS rSrU 4S jrS rSrU =r$ )HubertFeatureProjectionb   c                 x  > [         TU ]  5         UR                  U l        U R                  (       a1  [        R                  " UR
                  S   UR                  S9U l        [        R                  " UR
                  S   UR                  5      U l
        [        R                  " UR                  5      U l        g )N)eps)r#   r$   feat_proj_layer_normr%   	LayerNormconv_dimlayer_norm_eps
layer_normLinearr'   
projectionDropoutfeat_proj_dropoutdropoutr;   r<   r=   s     r>   r$    HubertFeatureProjection.__init__c   s}    $*$?$?!$$ ll6??2+>FDYDYZDO))FOOB$79K9KLzz&":":;rF   c                     U R                   (       a  U R                  U5      nU R                  U5      nU R                  U5      nU$ )N)r]   ra   rc   rf   rB   s     r>   rD   HubertFeatureProjection.forwardk   s;    $$ OOM:M6]3rF   )rf   r]   ra   rc   rG   rN   s   @r>   rX   rX   b   s    < rF   rX   c                       \ rS rSrSrg)HubertEncodert   rQ   NrR   rQ   rF   r>   rl   rl   t   rS   rF   rl   c                       \ rS rSrSrg)HubertEncoderStableLayerNormx   rQ   NrR   rQ   rF   r>   ro   ro   x   rS   rF   ro   c                       \ rS rSr% \\S'   SrSrSrSS/r	Sr
SrSrSr\R                  " 5       S	 5       rS
\R"                  \-  4S jrS\S\R"                  4S jrSrg)HubertPreTrainedModel|   r<   hubertinput_valuesaudioHubertEncoderLayerParametrizedConv1dTc                    [        U[        R                  5      (       ac  [        R                  " UR
                  SU R                  R                  S9  UR                  b!  [        R                  " UR                  5        gg[        U[        R                  [        R                  [        R                  45      (       a  [        R                  " UR                  5        [        R                  " UR
                  5        [        USS5      ba  [        R                  " UR                  5        [        R                  " UR                   5        [        R                  " UR"                  5        gg[        U[        R$                  5      (       Ga,  ['        5       (       a  SSKn[+        US5      (       ak  [+        US5      (       aZ  UR,                  R/                  UR0                  UR2                  /SS9   [        R4                  " UR
                  5        SSS5        OnUR,                  R/                  UR
                  SS9   [        R4                  " UR
                  5        SSS5        O [        R4                  " UR
                  5        UR                  b!  [        R                  " UR                  5        gg[        U[6        5      (       a3  [+        US	5      (       a!  [        R8                  " UR:                  5        gg[        U[<        5      (       aN  [+        US
5      (       a<  [        R>                  " UR@                  SU R                  RB                  S-   -  5        ggg! , (       d  f       N= f! , (       d  f       N= f)zInitialize the weights        )meanstdNrunning_meanr   r6   r5   r   masked_spec_embedlayer_weightsg      ?r   )"
isinstancer%   rb   initnormal_r   r<   initializer_rangebiaszeros_r^   	GroupNormr-   ones_r,   r}   running_varnum_batches_trackedr&   r   r0   r/   r1   r2   r6   r5   kaiming_normal_HubertModeluniform_r~   HubertForSequenceClassification	constant_r   num_hidden_layers)r;   moduler0   s      r>   _init_weights#HubertPreTrainedModel._init_weights   sV    fbii((LLSdkk6S6ST{{&FKK( 'r||R^^ LMMKK$JJv}}%v~t4@F//0

6--.F667 A 		**)++ 6:..76:3N3N"::FOOV__;]mn:o,,V]]; po #::6==XY:Z,,V]]; [Z $$V]]3{{&FKK( ',,v233f667 4 ?@@v//v33SDKK<Y<Y\]<]5^_ 0 A po [Zs   6!M)!M:)
M7:
Ninput_lengthsc                     S n[        U R                  R                  U R                  R                  5       H  u  p4U" XU5      nM     U$ )z8
Computes the output length of the convolutional layers
c                 8    [         R                  " X-
  USS9S-   $ )Nfloor)rounding_moder   )torchdiv)input_lengthr   strides      r>   _conv_out_lengthPHubertPreTrainedModel._get_feat_extract_output_lengths.<locals>._conv_out_length   s      99\7wWZ[[[rF   )zipr<   conv_kernelconv_stride)r;   r   r   r   r   s        r>    _get_feat_extract_output_lengths6HubertPreTrainedModel._get_feat_extract_output_lengths   sG    
	\
 $'t{{'>'>@W@W#XK,]PM $Y rF   feature_vector_lengthattention_maskc                    U R                  UR                  S5      5      R                  [        R                  5      nUR
                  S   n[        R                  " XA4UR                  UR                  S9nSU[        R                  " UR
                  S   UR                  S9US-
  4'   UR                  S/5      R                  S5      R                  S/5      R                  5       nU$ )Nr[   r   )dtypedevicer   )r   )r   sumtor   longshapezerosr   r   arangeflipcumsumbool)r;   r   r   output_lengths
batch_sizes        r>   "_get_feature_vector_attention_mask8HubertPreTrainedModel._get_feature_vector_attention_mask   s    >>~?Q?QRT?UVYYZ_ZdZde#))!,
/~7K7KTbTiTi
 uv^%9%9!%<^EZEZ[]kno]opq',,bT299"=BBB4HMMOrF   rQ   N)rH   rI   rJ   rK   r   __annotations__base_model_prefixmain_input_nameinput_modalities_no_split_modulessupports_gradient_checkpointing_supports_flash_attn_supports_sdpa_supports_flex_attnr   no_gradr   
LongTensorintr   r   rL   rQ   rF   r>   rr   rr   |   s     $O-/CD&*#N
]]_!` !`Fe>N>NQT>T 
 
]b]m]m 
rF   rr   c                      ^  \ rS rSrS\4U 4S jjrS r     SS\R                  S-  S\R                  S-  S\R                  S-  S	\
S-  S
\
S-  S\
S-  S\\-  4S jjrSrU =r$ )r      r<   c                   > [         TU ]  U5        Xl        [        U5      U l        [        U5      U l        UR                  S:  d  UR                  S:  aG  [        R                  " [        R                  " UR                  5      R                  5       5      U l        UR                   (       a  [#        U5      U l        O['        U5      U l        U R)                  5         U ?g )Nrz   )r#   r$   r<   rU   feature_extractorrX   feature_projectionmask_time_probmask_feature_probr%   	Parameterr   Tensorr'   r   r~   do_stable_layer_normro   encoderrl   	post_initadapterrg   s     r>   r$   HubertModel.__init__   s     !5f!="9&"A  3&&*B*BS*H%'\\%,,v?Q?Q2R2[2[2]%^D"&&7?DL(0DL 	LrF   c                     [        S5      e)NzNot needed for Hubert)AttributeError)r;   s    r>   freeze_feature_encoder"HubertModel.freeze_feature_encoder   s    455rF   Nru   r   mask_time_indicesoutput_attentionsoutput_hidden_statesreturn_dictreturnc                    Ub  UOU R                   R                  nUb  UOU R                   R                  nUb  UOU R                   R                  nU R	                  U5      nUR                  SS5      nUb  U R                  UR                  S   U5      nU R                  U5      n	U R                  XS9n	U R                  U	UUUUS9n
U
S   n	U(       d	  U	4U
SS -   $ [        U	U
R                  U
R                  S9$ )a  
mask_time_indices (`torch.BoolTensor` of shape `(batch_size, sequence_length)`, *optional*):
    Indices to mask extracted features for contrastive loss. When in training mode, model learns to predict
    masked extracted features in *config.proj_codevector_dim* space.

Example:

```python
>>> from transformers import AutoProcessor, HubertModel
>>> from datasets import load_dataset

>>> processor = AutoProcessor.from_pretrained("facebook/hubert-large-ls960-ft")
>>> model = HubertModel.from_pretrained("facebook/hubert-large-ls960-ft")


>>> def map_to_array(example):
...     example["speech"] = example["audio"]["array"]
...     return example


>>> ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation")
>>> ds = ds.map(map_to_array)

>>> input_values = processor(ds["speech"][0], return_tensors="pt").input_values  # Batch size 1
>>> hidden_states = model(input_values).last_hidden_state
```Nr   r
   )r   )r   r   r   r   r   )last_hidden_staterC   
attentions)r<   r   r   r   r   rA   r   r   r   _mask_hidden_statesr   r   rC   r   )r;   ru   r   r   r   r   r   kwargsextract_featuresrC   encoder_outputss              r>   rD   HubertModel.forward   s)   H 2C1N-TXT_T_TqTq$8$D $++JjJj 	 &1%<k$++BYBY11,?+55a;%!DDEUE[E[\]E^`noN//0@A000d,,)/!5# ' 
 (*!#oab&999+)77&11
 	
rF   )r<   r   r   r   r~   )NNNNN)rH   rI   rJ   rK   r   r$   r   r   r   FloatTensorr   tupler   rD   rL   rM   rN   s   @r>   r   r      s    | &6 /36:)-,0#'E
llT)E
 t+E
 !,,t3	E

  $;E
 #TkE
 D[E
 
	 E
 E
rF   r   c                       \ rS rSrSrg)HubertForCTCi'  rQ   NrR   rQ   rF   r>   r   r   '  rS   rF   r   c                       \ rS rSrSrg)r   i+  rQ   NrR   rQ   rF   r>   r   r   +  rS   rF   r   )r   r   r   rr   )(__doc__r   torch.nnr%    r   r   activationsr   integrations.deepspeedr   modeling_outputsr   modeling_utilsr   r.   r	   wav2vec2.modeling_wav2vec2r   r   r   r   r   r   r   configuration_hubertr   _HIDDEN_STATES_START_POSITIONModuler   r8   rU   rX   rl   ro   rr   r   r   r   __all__rQ   rF   r>   <module>r      s       & ! @ / - #   / !" /BII /d	- 		1 	bii $	O 		#A 	 HO H HV\
-!6 \
~	> 		&G 	 frF   