ó
    °3j±<  ã                   ó¶   • S r SSKrSSKrSSKrSSKJr  SSKJr  SSK	J
r
  SSKJrJr  SSKJr  S	S
KJr  \R$                  " \5      r\ " S S\5      5       rS/rg)z
Processor class for Bark
é    Né   )ÚBatchFeature)ÚProcessorMixin)ÚBatchEncoding)Úauto_docstringÚlogging)Úcached_fileé   )ÚAutoTokenizerc                   ó  ^ • \ rS rSrSSSS.rSU 4S jjr\ SS j5       r   SS\4U 4S	 jjjr	\
S
\S\S\4S j5       rSS\S-  4S jjrSS\S-  4S jjr\S\4S j5       rSS\4S jjr\       SS\4S jj5       rSrU =r$ )ÚBarkProcessoré"   é   r
   ©Úsemantic_promptÚcoarse_promptÚfine_promptNc                 ó0   >• [         TU ]  U5        X l        g)aò  
speaker_embeddings (`dict[dict[str]]`, *optional*):
    Optional nested speaker embeddings dictionary. The first level contains voice preset names (e.g
    `"en_speaker_4"`). The second level contains `"semantic_prompt"`, `"coarse_prompt"` and `"fine_prompt"`
    embeddings. The values correspond to the path of the corresponding `np.ndarray`. See
    [here](https://suno-ai.notion.site/8b8e8749ed514b0cbf3f699013548683?v=bc67cff786b04b50b3ceb756fd05f68c) for
    a list of `voice_preset_names`.
N)ÚsuperÚ__init__Úspeaker_embeddings)ÚselfÚ	tokenizerr   Ú	__class__s      €Úb/home/wildlama/miniconda3/lib/python3.13/site-packages/transformers/models/bark/processing_bark.pyr   ÚBarkProcessor.__init__*   s   ø€ ô 	‰Ñ˜Ô#à"4Õó    c                 óZ  • UR                  S5      nUbÛ  [        UUUR                  SS5      UR                  SS5      UR                  SS5      UR                  SS5      UR                  SS5      UUR                  S	S5      SSSS
9nUc9  [        R	                  S[
        R                  R                  X5       S35        SnO-[        U5       n[        R                  " U5      nSSS5        OSnWb
  SU;   a  XS'   [        R                  " U40 UD6nU " X†S9$ ! , (       d  f       N8= f)ac  
Instantiate a Bark processor associated with a pretrained model.

Args:
    pretrained_model_name_or_path (`str` or `os.PathLike`):
        This can be either:

        - a string, the *model id* of a pretrained [`BarkProcessor`] hosted inside a model repo on
          huggingface.co.
        - a path to a *directory* containing a processor saved using the [`~BarkProcessor.save_pretrained`]
          method, e.g., `./my_model_directory/`.
    speaker_embeddings_dict_path (`str`, *optional*, defaults to `"speaker_embeddings_path.json"`):
        The name of the `.json` file containing the speaker_embeddings dictionary located in
        `pretrained_model_name_or_path`. If `None`, no speaker_embeddings is loaded.
    **kwargs
        Additional keyword arguments passed along to both
        [`~tokenization_utils_base.PreTrainedTokenizer.from_pretrained`].
ÚtokenNÚ	subfolderÚ	cache_dirÚforce_downloadFÚproxiesÚlocal_files_onlyÚrevision©
r    r!   r"   r#   r$   r   r%   Ú _raise_exceptions_for_gated_repoÚ%_raise_exceptions_for_missing_entriesÚ'_raise_exceptions_for_connection_errorsÚ`zã` does not exists
                    , no preloaded speaker embeddings will be used - Make sure to provide a correct path to the json
                    dictionary if wanted, otherwise set `speaker_embeddings_dict_path=None`.Úrepo_or_path)r   r   )Úgetr	   ÚpopÚloggerÚwarningÚosÚpathÚjoinÚopenÚjsonÚloadr   Úfrom_pretrained)	ÚclsÚ!pretrained_processor_name_or_pathÚspeaker_embeddings_dict_pathÚkwargsr   Úspeaker_embeddings_pathr   Úspeaker_embeddings_jsonr   s	            r   r6   ÚBarkProcessor.from_pretrained7   s8  € ð, —
‘
˜7Ó#ˆØ'Ñ3Ü&1Ø1Ø,Ø Ÿ*™* [°$Ó7Ø Ÿ*™* [°$Ó7Ø%Ÿz™zÐ*:¸EÓBØŸ
™
 9¨dÓ3Ø!'§¡Ð,>ÀÓ!FØØŸ™ J°Ó5Ø16Ø6;Ø8=ñ'Ð#ð 'Ñ.Ü—‘Øœ"Ÿ'™'Ÿ,™,Ð'HÓgÐhð i]ð `ôð
 &*Ñ"äÐ1Ô2Ð6MÜ)-¯ªÐ3JÓ)KÐ&÷ 3Ð2ð "&ÐàÑ)ØÐ!3Ó3Ø5V >Ñ2Ü!×1Ò1Ð2SÑ^ÐW]Ñ^ˆ	á˜YÑNÐN÷ 3Õ2ús   ÃDÄ
D*Úpush_to_hubc           	      ó  >• U R                   GbU  [        R                  " [        R                  R	                  XS5      SS9  0 nXS'   [        R                  R	                  X5      nU R
                   H¥  nU R                  U5      n	0 n
U R                   U    Hx  n[        R                  R	                  Xx SU 35      nU R                  X|U5        [        R                  " XÉU   SS9  [        R                  R	                  X8 SU S	35      X«'   Mz     X¦U'   M§     [        [        R                  R	                  X5      S
5       n[        R                  " Xm5        SSS5        [        TU ]8  " X40 UD6  g! , (       d  f       N = f)aì  
Saves the attributes of this processor (tokenizer...) in the specified directory so that it can be reloaded
using the [`~BarkProcessor.from_pretrained`] method.

Args:
    save_directory (`str` or `os.PathLike`):
        Directory where the tokenizer files and the speaker embeddings will be saved (directory will be created
        if it does not exist).
    speaker_embeddings_dict_path (`str`, *optional*, defaults to `"speaker_embeddings_path.json"`):
        The name of the `.json` file that will contains the speaker_embeddings nested path dictionary, if it
        exists, and that will be located in `pretrained_model_name_or_path/speaker_embeddings_directory`.
    speaker_embeddings_directory (`str`, *optional*, defaults to `"speaker_embeddings/"`):
        The name of the folder in which the speaker_embeddings arrays will be saved.
    push_to_hub (`bool`, *optional*, defaults to `False`):
        Whether or not to push your model to the Hugging Face model hub after saving it. You can specify the
        repository you want to push to with `repo_id` (will default to the name of `save_directory` in your
        namespace).
    kwargs:
        Additional key word arguments passed along to the [`~utils.PushToHubMixin.push_to_hub`] method.
NÚv2T)Úexist_okr+   Ú_F)Úallow_picklez.npyÚw)r   r0   Úmakedirsr1   r2   Úavailable_voice_presetsÚ_load_voice_presetÚ_reject_path_traversalÚnpÚsaver3   r4   Údumpr   Úsave_pretrained)r   Úsave_directoryr9   Úspeaker_embeddings_directoryr>   r:   Úembeddings_dictÚembeddings_subdirÚ
prompt_keyÚvoice_presetÚtmp_dictÚkeyÚtarget_filepathÚfpr   s                 €r   rL   ÚBarkProcessor.save_pretrainedq   sN  ø€ ð8 ×"Ñ"Ò.ÜKŠKœŸ™Ÿ™ ^ÐSWÓXÐcgÒhà ˆOà.<˜NÑ+ä "§¡§¡¨^Ó ZÐØ"×:Ô:
Ø#×6Ñ6°zÓBàØ×2Ñ2°:Ô>CÜ&(§g¡g§l¡lÐ3DÈÐTUÐVYÐUZÐF[Ó&\OØ×/Ñ/Ð0AÐT^Ô_Ü—G’G˜O¸#Ñ->ÈUÒSÜ$&§G¡G§L¡LÐ1MÐQ]Ð]^Ð_bÐ^cÐcgÐOhÓ$iH“Mñ	 ?ð /7 
Ó+ñ ;ô ”b—g‘g—l‘l >ÓPÐRUÔVÐZ\Ü—	’	˜/Ô.÷ Wô 	‰Ò ÑF¸vÓF÷ WÕVús   ÅE7Å7
FÚbase_dirÚtarget_pathÚoffending_valuec                 ó  • [         R                  R                  U 5      n[         R                  R                  U5      n [         R                  R                  X4/5      U:H  nU(       d  [	        SU< 35      eg ! [         a    Sn N%f = f)NFzInvalid voice preset path: )r0   r1   ÚabspathÚ
commonpathÚ
ValueError)rX   rY   rZ   ÚbaseÚtargetÚ	containeds         r   rH   Ú$BarkProcessor._reject_path_traversal¦   s|   € ô w‰w‰˜xÓ(ˆÜ—‘—‘ Ó-ˆð	ÜŸ™×*Ñ*¨D¨>Ó:¸dÑBˆIö ÜÐ:¸?Ñ:MÐNÓOÐOð øô ó 	àŠIð	ús   Á #A: Á:B	ÂB	rR   c                 ó  • U R                   U   n0 nUR                  S5      nU R                   R                  SS5      nS GHG  nXs;  a  [        SU SU S35      eU R                  U[        R
                  R                  XcU   5      X7   5        [        U R                   R                  SS5      X7   UR                  SS 5      UR                  S	S 5      UR                  S
S5      UR                  SS 5      UR                  SS5      UUR                  SS 5      SSSS9nUcL  [        S[        R
                  R                  U R                   R                  SS5      X7   5       SU S35      e[        R                  " U5      XG'   GMJ     U$ )Nr   r+   Ú/r   ú#Voice preset unrecognized, missing z% as a key in self.speaker_embeddings[z].r    r!   r"   Fr#   r$   r%   r&   r*   z{` does not exists
                    , no preloaded voice preset will be used - Make sure to provide correct paths to the z 
                    embeddings.)r   r,   r^   rH   r0   r1   r2   r	   r-   rI   r5   )	r   rR   r:   Úvoice_preset_pathsÚvoice_preset_dictr   r+   rT   r1   s	            r   rG   Ú BarkProcessor._load_voice_preset¹   s¡  € Ø!×4Ñ4°\ÑBÐàÐØ—
‘
˜7Ó#ˆØ×.Ñ.×2Ñ2°>À3ÓGˆÜFˆCØÓ,Ü Ø9¸#¸Ð>cÐdpÐcqÐqsÐtóð ð ×'Ñ'ØœbŸg™gŸl™l¨<ÈCÑ9PÓQÐSeÑSjôô Ø×'Ñ'×+Ñ+¨N¸CÓ@Ø"Ñ'Ø Ÿ*™* [°$Ó7Ø Ÿ*™* [°$Ó7Ø%Ÿz™zÐ*:¸EÓBØŸ
™
 9¨dÓ3Ø!'§¡Ð,>ÀÓ!FØØŸ™ J°Ó5Ø16Ø6;Ø8=ñˆDð ‰|Ü Øœ"Ÿ'™'Ÿ,™, t×'>Ñ'>×'BÑ'BÀ>ÐSVÓ'WÐYkÑYpÓqÐrð sjØjvÐiwð x ð#óð ô &(§W¢W¨T£]ÐÔ"ñ= Gð@ !Ð r   c           	      ól  • S H®  nX!;  a  [        SU S35      e[        X   [        R                  5      (       d'  [	        U S[        U R                  U   5       S35      e[        X   R                  5      U R                  U   :w  d  M‰  [        U S[        U R                  U   5       S35      e   g )Nr   re   z
 as a key.z voice preset must be a z
D ndarray.)	r^   Ú
isinstancerI   ÚndarrayÚ	TypeErrorÚstrÚpreset_shapeÚlenÚshape)r   rR   rT   s      r   Ú_validate_voice_preset_dictÚ)BarkProcessor._validate_voice_preset_dictá   sµ   € ÛFˆCØÓ&Ü Ð#FÀsÀeÈ:Ð!VÓWÐWä˜lÑ/´·±×<Ñ<Ü 3 %Ð'?ÄÀD×DUÑDUÐVYÑDZÓ@[Ð?\Ð\fÐ gÓhÐhä<Ñ$×*Ñ*Ó+¨t×/@Ñ/@ÀÑ/EÕEÜ  C 5Ð(@ÄÀT×EVÑEVÐWZÑE[ÓA\Ð@]Ð]gÐ!hÓiÐiò Gr   Úreturnc                 ó˜   • U R                   c  / $ [        U R                   R                  5       5      nSU;   a  UR                  S5        U$ )ze
Returns a list of available voice presets.

Returns:
    `list[str]`: A list of voice preset names.
r+   )r   ÚlistÚkeysÚremove)r   Úvoice_presetss     r   rF   Ú%BarkProcessor.available_voice_presetsì   sJ   € ð ×"Ñ"Ñ*ØˆIä˜T×4Ñ4×9Ñ9Ó;Ó<ˆØ˜]Ó*Ø× Ñ  Ô0ØÐr   Úremove_unavailablec                 óh  • / nU R                   b  U R                   H&  n U R                  U5      nU R                  U5        M(     U(       a%  [        R                  S[        U5       SU S35        U(       a  U H  nU R                   U	 M     g g g ! [         a    UR	                  U5         M”  f = f)NzThe following z' speaker embeddings are not available: zU If you would like to use them, please check the paths or try downloading them again.)	r   rF   rG   r^   Úappendrq   r.   r/   ro   )r   rz   Úunavailable_keysrR   rg   s        r   Ú_verify_speaker_embeddingsÚ(BarkProcessor._verify_speaker_embeddingsü   sÉ   € àÐØ×"Ñ"Ñ.Ø $× <Ô <ðØ(,×(?Ñ(?ÀÓ(MÐ%ð
 ×0Ñ0Ð1BÖCñ !=ö  Ü—‘Ø$¤SÐ)9Ó%:Ð$;Ð;bÐcsÐbtð ukð kôö
 "Û$4LØ×/Ñ/°Ò=ò %5ð "ð! /øô "ó à$×+Ñ+¨LÔ9Úðús    BÂB1Â0B1c           
      óÈ  • Ubž  [        U[        5      (       d‰  [        U[        5      (       a.  U R                  b!  X R                  ;   a  U R	                  U5      nOF[        U[        5      (       a  UR                  S5      (       d  US-   n[        R                  " U5      nUb  U R                  " U40 UD6  [        X#S9nU R                  " U4USUUUUS.UD6n	Ub  X)S'   U	$ )a©  
voice_preset (`str`, `dict[np.ndarray]`):
    The voice preset, i.e the speaker embeddings. It can either be a valid voice_preset name, e.g
    `"en_speaker_1"`, or directly a dictionary of `np.ndarray` embeddings for each submodel of `Bark`. Or
    it can be a valid file name of a local `.npz` single voice preset containing the keys
    `"semantic_prompt"`, `"coarse_prompt"` and `"fine_prompt"`.

Returns:
    [`BatchEncoding`]: A [`BatchEncoding`] object containing the output of the `tokenizer`.
    If a voice preset is provided, the returned object will include a `"history_prompt"` key
    containing a [`BatchFeature`], i.e the voice preset with the right tensors type.
z.npz)ÚdataÚtensor_typeÚ
max_length)Úreturn_tensorsÚpaddingrƒ   Úreturn_attention_maskÚreturn_token_type_idsÚadd_special_tokensÚhistory_prompt)rj   Údictrm   r   rG   ÚendswithrI   r5   rq   r   r   )
r   ÚtextrR   r„   rƒ   rˆ   r†   r‡   r:   Úencoded_texts
             r   Ú__call__ÚBarkProcessor.__call__  sî   € ð0 Ñ#¬J°|ÄT×,JÑ,Jä˜<¬×-Ñ-Ø×+Ñ+Ñ7Ø ×$;Ñ$;Ó;à#×6Ñ6°|ÓD‘ô ˜l¬C×0Ñ0¸×9NÑ9NÈv×9VÑ9VØ#/°&Ñ#8Lä!Ÿwšw |Ó4àÑ#Ø×,Ò,¨\ÑD¸VÒDÜ'¨\ÑVˆLà—~’~Øð	
à)Ø Ø!Ø"7Ø"7Ø1ñ	
ð ñ	
ˆð Ñ#Ø-9Ð)Ñ*àÐr   )r   )N)úspeaker_embeddings_path.json)r   r   F)T)NNÚpté   FTF)Ú__name__Ú
__module__Ú__qualname__Ú__firstlineno__rn   r   Úclassmethodr6   ÚboolrL   Ústaticmethodrm   rH   rG   rŠ   rq   Úpropertyru   rF   r~   r   r   rŽ   Ú__static_attributes__Ú__classcell__)r   s   @r   r   r   "   s  ø† ð ØØñ€L÷5ð àMkó7Oó ð7Oðx &DØ%9Ø!ñ3Gð
 ÷3Gð 3Gðj ðP¨ð P¸3ð PÐQTó Pó ðPñ$&!¨s°T©zõ &!ñP	j¸¸t¹õ 	jð ð¨ó ó ðñ>¸Tõ >ð. ð ØØØØ Ø"Ø#ñ7ð 
ô7ó ö7r   r   )Ú__doc__r4   r0   ÚnumpyrI   Úfeature_extraction_utilsr   Úprocessing_utilsr   Útokenization_utils_baser   Úutilsr   r   Ú	utils.hubr	   Úautor   Ú
get_loggerr“   r.   r   Ú__all__© r   r   Ú<module>r¨      sb   ðñó Û 	ã å 4Ý .Ý 4ß ,Ý $Ý  ð 
×	Ò	˜HÓ	%€ð ôhNó hó ðhðV	 Ð
r   