
    3j<                         S r SSKrSSKrSSKrSSKJr  SSKJr  SSK	J
r
  SSKJrJr  SSKJr  S	S
KJr  \R$                  " \5      r\ " S S\5      5       rS/rg)z
Processor class for Bark
    N   )BatchFeature)ProcessorMixin)BatchEncoding)auto_docstringlogging)cached_file   )AutoTokenizerc                     ^  \ rS rSrSSSS.rSU 4S jjr\ SS j5       r   SS\4U 4S	 jjjr	\
S
\S\S\4S j5       rSS\S-  4S jjrSS\S-  4S jjr\S\4S j5       rSS\4S jjr\       SS\4S jj5       rSrU =r$ )BarkProcessor"      r
   semantic_promptcoarse_promptfine_promptNc                 0   > [         TU ]  U5        X l        g)a  
speaker_embeddings (`dict[dict[str]]`, *optional*):
    Optional nested speaker embeddings dictionary. The first level contains voice preset names (e.g
    `"en_speaker_4"`). The second level contains `"semantic_prompt"`, `"coarse_prompt"` and `"fine_prompt"`
    embeddings. The values correspond to the path of the corresponding `np.ndarray`. See
    [here](https://suno-ai.notion.site/8b8e8749ed514b0cbf3f699013548683?v=bc67cff786b04b50b3ceb756fd05f68c) for
    a list of `voice_preset_names`.
N)super__init__speaker_embeddings)self	tokenizerr   	__class__s      b/home/wildlama/miniconda3/lib/python3.13/site-packages/transformers/models/bark/processing_bark.pyr   BarkProcessor.__init__*   s     	#"4    c                 Z   UR                  S5      nUb  [        UUUR                  SS5      UR                  SS5      UR                  SS5      UR                  SS5      UR                  SS5      UUR                  S	S5      SSSS
9nUc9  [        R	                  S[
        R                  R                  X5       S35        SnO-[        U5       n[        R                  " U5      nSSS5        OSnWb
  SU;   a  XS'   [        R                  " U40 UD6nU " XS9$ ! , (       d  f       N8= f)ac  
Instantiate a Bark processor associated with a pretrained model.

Args:
    pretrained_model_name_or_path (`str` or `os.PathLike`):
        This can be either:

        - a string, the *model id* of a pretrained [`BarkProcessor`] hosted inside a model repo on
          huggingface.co.
        - a path to a *directory* containing a processor saved using the [`~BarkProcessor.save_pretrained`]
          method, e.g., `./my_model_directory/`.
    speaker_embeddings_dict_path (`str`, *optional*, defaults to `"speaker_embeddings_path.json"`):
        The name of the `.json` file containing the speaker_embeddings dictionary located in
        `pretrained_model_name_or_path`. If `None`, no speaker_embeddings is loaded.
    **kwargs
        Additional keyword arguments passed along to both
        [`~tokenization_utils_base.PreTrainedTokenizer.from_pretrained`].
tokenN	subfolder	cache_dirforce_downloadFproxieslocal_files_onlyrevision
r    r!   r"   r#   r$   r   r%    _raise_exceptions_for_gated_repo%_raise_exceptions_for_missing_entries'_raise_exceptions_for_connection_errors`z` does not exists
                    , no preloaded speaker embeddings will be used - Make sure to provide a correct path to the json
                    dictionary if wanted, otherwise set `speaker_embeddings_dict_path=None`.repo_or_path)r   r   )getr	   poploggerwarningospathjoinopenjsonloadr   from_pretrained)	cls!pretrained_processor_name_or_pathspeaker_embeddings_dict_pathkwargsr   speaker_embeddings_pathr   speaker_embeddings_jsonr   s	            r   r6   BarkProcessor.from_pretrained7   s8   , 

7#'3&11, **[$7 **[$7%zz*:EB

9d3!',>!FJ5166;8='# '."'',,'Hgh i] `
 &*"126M)-3J)K& 32 "&)!335V>2!112S^W]^	YNN 32s   D
D*push_to_hubc           	        > U R                   GbU  [        R                  " [        R                  R	                  XS5      SS9  0 nXS'   [        R                  R	                  X5      nU R
                   H  nU R                  U5      n	0 n
U R                   U    Hx  n[        R                  R	                  Xx SU 35      nU R                  X|U5        [        R                  " XU   SS9  [        R                  R	                  X8 SU S	35      X'   Mz     XU'   M     [        [        R                  R	                  X5      S
5       n[        R                  " Xm5        SSS5        [        TU ]8  " X40 UD6  g! , (       d  f       N = f)a  
Saves the attributes of this processor (tokenizer...) in the specified directory so that it can be reloaded
using the [`~BarkProcessor.from_pretrained`] method.

Args:
    save_directory (`str` or `os.PathLike`):
        Directory where the tokenizer files and the speaker embeddings will be saved (directory will be created
        if it does not exist).
    speaker_embeddings_dict_path (`str`, *optional*, defaults to `"speaker_embeddings_path.json"`):
        The name of the `.json` file that will contains the speaker_embeddings nested path dictionary, if it
        exists, and that will be located in `pretrained_model_name_or_path/speaker_embeddings_directory`.
    speaker_embeddings_directory (`str`, *optional*, defaults to `"speaker_embeddings/"`):
        The name of the folder in which the speaker_embeddings arrays will be saved.
    push_to_hub (`bool`, *optional*, defaults to `False`):
        Whether or not to push your model to the Hugging Face model hub after saving it. You can specify the
        repository you want to push to with `repo_id` (will default to the name of `save_directory` in your
        namespace).
    kwargs:
        Additional key word arguments passed along to the [`~utils.PushToHubMixin.push_to_hub`] method.
Nv2T)exist_okr+   _F)allow_picklez.npyw)r   r0   makedirsr1   r2   available_voice_presets_load_voice_preset_reject_path_traversalnpsaver3   r4   dumpr   save_pretrained)r   save_directoryr9   speaker_embeddings_directoryr>   r:   embeddings_dictembeddings_subdir
prompt_keyvoice_presettmp_dictkeytarget_filepathfpr   s                 r   rL   BarkProcessor.save_pretrainedq   sN   8 "".KK^SWXcgh O.<N+ "^ Z"::
#66zB22:>C&(ggll3DTUVYUZF[&\O//0AT^_GGO#->US$&GGLL1MQ]]^_b^ccgOh$iHM	 ? /7
+ ; bggll>PRUVZ\		/. W 	FvF WVs   E77
Fbase_dirtarget_pathoffending_valuec                    [         R                  R                  U 5      n[         R                  R                  U5      n [         R                  R                  X4/5      U:H  nU(       d  [	        SU< 35      eg ! [         a    Sn N%f = f)NFzInvalid voice preset path: )r0   r1   abspath
commonpath
ValueError)rX   rY   rZ   basetarget	containeds         r   rH   $BarkProcessor._reject_path_traversal   s|     wwx(-	**D>:dBI :?:MNOO   	I	s    #A: :B	B	rR   c                    U R                   U   n0 nUR                  S5      nU R                   R                  SS5      nS GHG  nXs;  a  [        SU SU S35      eU R                  U[        R
                  R                  XcU   5      X7   5        [        U R                   R                  SS5      X7   UR                  SS 5      UR                  S	S 5      UR                  S
S5      UR                  SS 5      UR                  SS5      UUR                  SS 5      SSSS9nUcL  [        S[        R
                  R                  U R                   R                  SS5      X7   5       SU S35      e[        R                  " U5      XG'   GMJ     U$ )Nr   r+   /r   #Voice preset unrecognized, missing z% as a key in self.speaker_embeddings[z].r    r!   r"   Fr#   r$   r%   r&   r*   z{` does not exists
                    , no preloaded voice preset will be used - Make sure to provide correct paths to the z 
                    embeddings.)r   r,   r^   rH   r0   r1   r2   r	   r-   rI   r5   )	r   rR   r:   voice_preset_pathsvoice_preset_dictr   r+   rT   r1   s	            r   rG    BarkProcessor._load_voice_preset   s   !44\B

7#..22>3GFC, 9#>cdpcqqst  ''bggll<C9PQSeSj ''++NC@"' **[$7 **[$7%zz*:EB

9d3!',>!FJ5166;8=D | "'',,t'>'>'B'B>SV'WYkYpqr sjjviw x #  &(WWT]"= G@ ! r   c           	      l   S H  nX!;  a  [        SU S35      e[        X   [        R                  5      (       d'  [	        U S[        U R                  U   5       S35      e[        X   R                  5      U R                  U   :w  d  M  [        U S[        U R                  U   5       S35      e   g )Nr   re   z
 as a key.z voice preset must be a z
D ndarray.)	r^   
isinstancerI   ndarray	TypeErrorstrpreset_shapelenshape)r   rR   rT   s      r   _validate_voice_preset_dict)BarkProcessor._validate_voice_preset_dict   s    FC& #Fse:!VWWl/<<3%'?DDUDUVYDZ@[?\\f ghh<$**+t/@/@/EE C5(@TEVEVWZE[A\@]]g!hii Gr   returnc                     U R                   c  / $ [        U R                   R                  5       5      nSU;   a  UR                  S5        U$ )ze
Returns a list of available voice presets.

Returns:
    `list[str]`: A list of voice preset names.
r+   )r   listkeysremove)r   voice_presetss     r   rF   %BarkProcessor.available_voice_presets   sJ     ""*IT4499;<]*  0r   remove_unavailablec                 h   / nU R                   b  U R                   H&  n U R                  U5      nU R                  U5        M(     U(       a%  [        R                  S[        U5       SU S35        U(       a  U H  nU R                   U	 M     g g g ! [         a    UR	                  U5         M  f = f)NzThe following z' speaker embeddings are not available: zU If you would like to use them, please check the paths or try downloading them again.)	r   rF   rG   r^   appendrq   r.   r/   ro   )r   rz   unavailable_keysrR   rg   s        r   _verify_speaker_embeddings(BarkProcessor._verify_speaker_embeddings   s    "". $ < <(,(?(?(M%
 001BC !=  $S)9%:$;;bcsbt uk k
 "$4L//= %5 "! / " $++L9s   BB10B1c           
         Ub  [        U[        5      (       d  [        U[        5      (       a.  U R                  b!  X R                  ;   a  U R	                  U5      nOF[        U[        5      (       a  UR                  S5      (       d  US-   n[        R                  " U5      nUb  U R                  " U40 UD6  [        X#S9nU R                  " U4USUUUUS.UD6n	Ub  X)S'   U	$ )a  
voice_preset (`str`, `dict[np.ndarray]`):
    The voice preset, i.e the speaker embeddings. It can either be a valid voice_preset name, e.g
    `"en_speaker_1"`, or directly a dictionary of `np.ndarray` embeddings for each submodel of `Bark`. Or
    it can be a valid file name of a local `.npz` single voice preset containing the keys
    `"semantic_prompt"`, `"coarse_prompt"` and `"fine_prompt"`.

Returns:
    [`BatchEncoding`]: A [`BatchEncoding`] object containing the output of the `tokenizer`.
    If a voice preset is provided, the returned object will include a `"history_prompt"` key
    containing a [`BatchFeature`], i.e the voice preset with the right tensors type.
z.npz)datatensor_type
max_length)return_tensorspaddingr   return_attention_maskreturn_token_type_idsadd_special_tokenshistory_prompt)rj   dictrm   r   rG   endswithrI   r5   rq   r   r   )
r   textrR   r   r   r   r   r   r:   encoded_texts
             r   __call__BarkProcessor.__call__  s    0 #J|T,J,J<--++7 $;$;;#66|D lC009N9Nv9V9V#/&#8L!ww|4#,,\DVD'\VL~~	
) !"7"71	
 	
 #-9)*r   )r   )N)speaker_embeddings_path.json)r   r   F)T)NNpt   FTF)__name__
__module____qualname____firstlineno__rn   r   classmethodr6   boolrL   staticmethodrm   rH   rG   r   rq   propertyru   rF   r~   r   r   r   __static_attributes____classcell__)r   s   @r   r   r   "   s    L5 Mk7O 7Ox &D%9!3G
 3G 3Gj P P3 PQT P P$&!sTz &!P	jt 	j   >T >.   "#7 
7 7r   r   )__doc__r4   r0   numpyrI   feature_extraction_utilsr   processing_utilsr   tokenization_utils_baser   utilsr   r   	utils.hubr	   autor   
get_loggerr   r.   r   __all__ r   r   <module>r      sb     	  4 . 4 , $   
		H	% hN h hV	 
r   