
    
3jr              
          S SK JrJrJr  S SKrS SKrS SKJr  S SKJ	r	  \(       a  S SK
JrJr  SSKJr  SSKJr  S	r " S
 S\5      r " S S\5      r\\\   -  r " S S5      r " S S\5      r " S S5      r " S S\5      r " S S5      r " S S\5      r " S S\5      r " S S\5      r " S S\5      r " S  S!\5      r " S" S#5      r " S$ S%5      r \\\\\\\\\\ /
r!g)&    )TYPE_CHECKINGAny	TypedDictN)get_torch_device)snapshot_downloadWhisperForConditionalGenerationWhisperProcessor   )log)get_model_path>  c                   B    \ rS rSr% Sr\\S'   \R                  \S'   Sr	g)AudioTensor   z%Comfy's representation of AUDIO data.sample_ratewaveform N)
__name__
__module____qualname____firstlineno____doc__int__annotations__torchTensor__static_attributes__r       B/home/wildlama/comfy/ComfyUI/custom_nodes/comfy_mtb/nodes/audio.pyr   r      s    /llr   r   c                   H    \ rS rSr% Sr\\S'   \\\\	4      \S'   \\S'   Sr
g)WhisperData   z<Whisper transcription data with timestamps and speaker info.textchunkslanguager   N)r   r   r   r   r   strr   listdictr   r   r   r   r    r"   r"      s"    F
Ic3h  Mr   r"   c                       \ rS rSrSr\S\S\4S j5       r\	S\
S\S\
4S j5       r\	S\
S\
4S	 j5       r\S\\
   S\\\
   \\4   4S
 j5       rSrg)MtbAudio&   z Base class for audio processing.audiosreturnc                    ^  [        U[        5      (       a  [        U 4S jU 5       5      $ US   R                  S   S:H  $ )Nc              3   F   >#    U  H  nTR                  U5      v   M     g 7fN)	is_stereo).0audioclss     r    	<genexpr>%MtbAudio.is_stereo.<locals>.<genexpr>/   s     @s}}U++s   !r      r   )
isinstancer(   anyshape)r5   r-   s   ` r    r2   MtbAudio.is_stereo)   s>    
 fd##@@@@*%++A.!33r   r4   common_sample_ratec                     U S   nX!:w  aG  [         R                  " SU SU 35        [        R                  R	                  X!S9nUU" U S   5      S.$ U $ )Nr   zResampling audio from z to )	orig_freqnew_freqr   r   r   )r   debug
torchaudio
transformsResample)r4   r=   current_rate	resamplers       r    resampleMtbAudio.resample3   ss    ]+-II(d;M:NO #--66& 7 I  2%eJ&78 
 Lr   c                 x    U S   R                   S   S:X  a#  U S   [        R                  " U S   U S   /SS9S.$ U $ )Nr   r8   r   dimrA   )r;   r   cat)r4   s    r    	to_stereoMtbAudio.to_stereoD   sS    ""1%*$]3!II:&j(9:  Lr   c                    [        U Vs/ s H  o"S   PM	     sn5      nU Vs/ s H  o R                  X#5      PM     nnU R                  U5      nU(       a   U Vs/ s H  o R                  U5      PM     nnXU4$ s  snf s  snf s  snf )Nr   )maxrH   r2   rN   )r5   r-   r4   max_sample_rateresampled_audiosr2   s         r    preprocess_audiosMtbAudio.preprocess_audiosP   s     H]3HI ?E
>DULL0f 	 
 MM&)	8HI8HummE*8HFI?33 I
 Js   A;B Br   N)r   r   r   r   r   classmethod	AudioDataboolr2   staticmethodr   r   rH   rN   r(   tuplerT   r   r   r   r    r+   r+   &   s    *44 
4 4       	 	 	 	 4+&4	tK $+	,4 4r   r+   c                   .    \ rS rSr% SrS\S'   S\S'   Srg)	WhisperPipelinea   zWhisper model pipeline.r
   	processorr	   modelr   N)r   r   r   r   r   r   r   r   r   r    r\   r\   a   s    !!!,,r   r\   c                   B    \ rS rSrSr\S 5       rSrSrSr	Sr
SS jrS	rg
)MTB_LoadWhisperh   !Load Whisper model and processor.c                 *    S/ SQSS040SSSSS	.40S
.$ )N
model_size)	tinysmallmediumz	medium.enbaselargezlarge-v2zlarge-v3zlarge-v3-turbodefaultrf   download_missingBOOLEANFzSDownload missing models if missing,otherwise they must be in ComfyUI/models/whisper)rk   tooltiprequiredoptionalr   r5   s    r    INPUT_TYPESMTB_LoadWhisper.INPUT_TYPESk   sJ     
 '" ##(O	%#
 	
r   WHISPER_PIPELINE)pipeline	mtb/audioloadc                 j   SSK JnJn  [        S5      nSU 3nXV-  nUR	                  5       (       de  UR	                  5       (       dP  U(       d  [        S5      eUR                  SS9  UR                  SS9  [        SU 3S/ S	QUR                  5       S
S9  [        5       n[        R                  " SU SU SU 35        UR                  " UR                  5       5      n	UR                  " UR                  5       5      R                  U5      n
U
R                  5         U
R                  S
5        XS.4$ )rc   r   r   whisperzwhisper-z+Models not found and download_missing=FalseT)exist_okzopenai/)z	*.msgpackz*.binz*.h5F)repo_idresume_downloadignore_patterns	local_dirlocal_dir_use_symlinkszLoading Whisper model z on z from )r^   r_   )transformersr	   r
   r   existsRuntimeErrormkdirr   as_posixr   r   rB   from_pretrainedtoevalrequires_grad_)selfre   rl   r	   r
   whisper_dirtag	model_dirdevicer^   r_   s              r    ry   MTB_LoadWhisper.load   s2   	

 %Y/%%	""$$	(8(8(:(:#"A  !!4!0.!%cUO$($B'002+0 "#		$ZLVHF9+N	
 %44Y5G5G5IJ	/?? 

"V* 	 	

U#'8::r   r   N)rf   F)r   r   r   r   r   rV   rs   RETURN_TYPESRETURN_NAMESCATEGORYFUNCTIONry   r   r   r   r    ra   ra   h   s3    +
 
@ )L LHH);r   ra   c                   N    \ rS rSrSr\S 5       rSrSrSr	  SS\
S\4S	 jjrS
rg)MTB_AudioToText   'Transcribe audio to text using Whisper.c                 B    SSSS/[        / SQ5      -   SS04SSS04S	.0$ )
Nrp   ru   AUDIOauto)enfresdeitptnlruzhjakork   rm   T)rw   r4   r&   return_timestamps)sortedrr   s    r    rs   MTB_AudioToText.INPUT_TYPES   sQ     1#H  '#& '0)T1B%C-
 	
r   )STRINGWHISPER_OUTPUT
transcriberx   rw   r4   c                 v	   US   nUS   nUR                   nU R                  U[        5      nUS   n[        R                  " SUR
                   35        [        UR
                  5      S:X  a  UR                  S5      n[        UR
                  5      S:X  aW  UR
                  S   S:X  a  UR                  SS9nO4UR
                  S	   S:X  a  UR                  S	S9nOUR                  S5      nUS
   n	Sn
X-  nUR
                  S   nX-  n[        R                  " SUS S35        / n/ n/ nSnSn[        SX5       GH  n[        UU-   U5      nUUU nUU	-  nUR                  U5        [        R                  " SUS SUU	-  S S35        UR                  R                  =(       d    Sn[        R                  " S	U45      nU" UU	SS9R                   R#                  U5      n[        R$                  " 5          UR'                  UUR#                  U5      SUS:X  a  SOUUSSSUS9	nSSS5        UR(                  R+                  WS   5      n/ nU H  nUR-                  S5      (       a  UR/                  S5      (       as   USS nUR1                  S S!5      R3                  5       (       a5  [5        U5      nUU:  a  UU-  nUU-   n UR                  SU S S35        UnM  UR                  U5        M  UR                  U5        M     UR9                  U5        UR;                  US"S#9S   n!UR                  U!5        GM     S$n"US:X  a   [        R                  " S%5        [        R$                  " 5          U" USU U	SS9R                   R#                  U5      n#UR=                  U#5      S   n$UR(                  R+                  U$R?                  S5      RA                  5       5      n%U%R-                  S5      (       a  U%SS OS$n"[        R                  " S&U" 35        SSS5        S(RG                  U5      n'U'U"UUUS).n(U'U(4$ ! , (       d  f       GN"= f! [6         a    UR                  U5         GM"  f = f! , (       d  f       N`= f! [B         a#  n&[        RD                  " S'U& 35         Sn&A&NSn&A&ff = f)*r   r^   r_   r   zProcessed waveform shape:    r   r   rK   r8   r      Audio duration: .2fs        zProcessing chunk z.1fzs - i  r   )sampling_ratereturn_tensorsr   r   N         ?)attention_masktaskr&   r   no_repeat_ngram_size	num_beamslength_penalty
max_length<||>. T)skip_special_tokensr   zDetecting languagezDetected language: zLanguage detection failed:  )r$   r&   tokensr4   chunk_offsets)$r   rH   WHISPER_SAMPLE_RATEr   rB   r;   lensqueezemeanrangeminappendconfigr   r   onesinput_featuresr   no_gradgenerate	tokenizerconvert_ids_to_tokens
startswithendswithreplaceisdigitfloat
ValueErrorextendbatch_decodedetect_languageargmaxitem	Exceptionwarningjoin))r   rw   r4   r&   r   r^   r_   r   r   r   chunk_durationchunk_samplestotal_samplestotal_duration
all_tokensall_textr   	last_timeaccumulated_offsetchunk_start	chunk_endchunk_waveformchunk_offsetr   r   r   predicted_idschunk_tokensadjusted_tokenstokentime_strtime_valadjusted_time
chunk_textdetected_languagefirst_chunk_featurespredicted_probslanguage_tokenefull_transcriptionwhisper_outputs)                                            r    r   MTB_AudioToText.transcribe   s    [)	!e%89$		.x~~.>?@ x~~!#''*Hx~~!#~~a A%#==Q=/"a'#==Q=/#++A.M*&4 r*&4		$^C$8:;
	  MAKK-7GI%k)<N&4L  .II#L#5T)k:QRU9VVWX 007CJ"ZZJ8N&)# nRRZ	   %"#1#4#4V#<%%-%7TX&7)*#&) !/ 
! ! %..DDa L !O%##D))ennT.B.B6#(2;#++C4<<>>',XH  ()3 2i ? 2,47I,IM+22Rc7J"3MN(0I+2259 $**51' &* o."//4 0 J OOJ'} B@ !v?		./]]_+4 -0&1'+, %nRRZ	 ) ',&;&;,''O &/%8%8%N%N'..r2779&N
 *44T:: 'q,! &
 II 34E3FGH% %. !XXh/ ') *
 ">11e !D & 6'..u556 %_(  ?9!=>>?s\   ;/QAQ#Q+R 9B'Q: R 
Q	Q76Q7:
RR R 
R8R33R8r   N)r   T)r   r   r   r   r   rV   rs   r   r   r   r\   r   r   r   r   r   r    r   r      sP    1
 
8 0LHH R2!R2 R2 R2r   r   c                   >    \ rS rSrSr\S 5       rSrSrSr	S
S jr
Srg	)MTB_ProcessWhisperOutputiv  /Process Whisper output into timestamped chunks.c                     SSSSSSSS.4S.0$ )	Nrp   )r   FLOATr   g      $@g?rk   r   rQ   step)r   min_chunk_lengthr   rr   s    r    rs   $MTB_ProcessWhisperOutput.INPUT_TYPESy  s-     "5 #CcJ%
 	
r   )r   WHISPER_CHUNKSprocessrx   c                    US   nUS   n/ nUS   R                   S   US   -  n[        R                  " SUS S35        [        U5       H  u  pxUR	                  S	5      (       d  M  UR                  S
5      (       d  M5   USS n	U	R                  SS5      R                  5       (       aN  [        U	5      n
SU
s=::  a  U::  a4  O  M|  UR                  Xz45        [        R                  " SU SU
 35        M  M  M     / n[        U5      S:  a  [        [        U5      S-
  5       Hu  nXW   u  pXWS-      u  pX-
  U:  a  M  X<S-   U nSR                  S U 5       5      nUR                  5       (       d  MR  UR                  UR                  5       X/S.5        Mw     U(       a  US   u  pU[        U5      S-
  :  a  SR                  S X<S-   S  5       5      nUR                  5       (       a_  U(       a&  US   nUS   S   US   S   -
  n[        UU-   U5      nOUnX:  a+  X-
  U:  a#  UR                  UR                  5       X/S.5        US   UUS   S.nUS   U4$ ! [         a     GM%  f = f)r  r   r4   r   r   r   r   r   r   r   r   r   r   r   r   r   zToken z: r8   r   c              3      #    U  H5  nUR                  S 5      (       a  UR                  S5      (       a  M1  Uv   M7     g7fr   r   Nr   r   r3   ts     r    r6   3MTB_ProcessWhisperOutput.process.<locals>.<genexpr>  s3       )LL..1::d3C A)   0?	?)r$   	timestampc              3      #    U  H5  nUR                  S 5      (       a  UR                  S5      (       a  M1  Uv   M7     g7fr  r  r  s     r    r6   r    s3       4LL..1::d3C A4r  Nr  r$   r&   )r$   r%   r&   )r;   r   rB   	enumerater   r   r   r   r   r   r   r   r   r   stripr   )r   r   r  r   r4   timestamp_tokensaudio_durationir   r   r   r%   	start_pos
start_timeend_posend_timer   r$   
prev_chunkprev_durationresults                        r    r   MTB_ProcessWhisperOutput.process  s   )w'z*004u]7KK		$^C$8:;!&)HA%%%..*>*>$Qr{H''R088::#(?:N:,33QMBIIqcH:&>? ; ;	 *  1$3/0145(8(;%	$4U$;!(+;;%!mg>xx  )   ::<<MM$(JJL*4)? 6, $4R$8!I3v;?*xx  #MO4  
 ::<<%+BZ
&{3A6(5a89 & $'&6$ $2 !-$15EE(,

.8-C #6*&z2
 f%v--C " s   AI".I""
I10I1r   N)r   )r   r   r   r   r   rV   rs   r   r   r   r  r   r   r   r    r  r  v  s.    9	
 	
 0LHHS.r   r  c                   N    \ rS rSrSr\S 5       rSrSrSr	Sr
S\S	\S
\4S jrSrg)MTB_AudioCuti  z%Basic audio cutter, values are in ms.c           	      .    SSSSSSSS.4SSSSSS.4S	.0$ )
Nrp   r   r       @@r   g    ~.Ar8   r  )r4   lengthoffsetr   rr   s    r    rs   MTB_AudioCut.INPUT_TYPES  sH     ##)"' !	  #C!L
 	
r   r   )	cut_audiorx   cutr4   r'  r(  c                     US   n[        X4-  S-  5      n[        U[        X$-  S-  5      -   US   R                  S   5      nUS   S S 2S S 2XV24   nUUS.4$ )Nr     r   r   rA   )r   r   r;   )r   r4   r'  r(  r   	start_idxend_idxcut_waveforms           r    r+  MTB_AudioCut.cut  s    M*,t34	F04788*##B'
 Z(Ay/@)@A  +(
 	
r   r   N)r   r   r   r   r   rV   rs   r   r   r   r   r   r   r+  r   r   r   r    r$  r$    sG    /
 
( L!LHH
 
e 
U 
r   r$  c                   P    \ rS rSrSr\S 5       rSrSrSr	Sr
S\S	\\   4S
 jrSrg)MTB_AudioStacki  zStack/Overlay audio inputs (dynamic inputs).
- pad audios to the longest inputs.
- resample audios to the highest sample rate in the inputs.
- convert them all to stereo if one of the inputs is.
c                 
    S0 0$ )Nrp   r   rr   s    r    rs   MTB_AudioStack.INPUT_TYPES  s    Br   r   )stacked_audiorx   stackkwargsr.   c                    U R                  [        UR                  5       5      5      u  p#n[        U Vs/ s H  oUS   R                  S   PM     sn5      n/ nU Hb  n[
        R                  " SU(       a  SOSXeS   R                  S   -
  45      n[
        R                  " US   U/SS9n	UR                  U	5        Md     [
        R                  " USS9R                  SS9n
UU
S.4$ s  snf )Nr   r   r8   r   rK   r   rA   )rT   r(   valuesrQ   r;   r   zerosrM   r   r7  sum)r   r8  r-   r2   max_rater4   r   padded_audiospaddingpadded_audiostacked_waveforms              r    r7  MTB_AudioStack.stack  s    &*&<&<!'
#8 6J6%
+11"56JK
,.Ekk"Az!2!8!8!<<G !99eJ&7%ArJL  .  !;;}!<@@Q@G  (,
 	
! Ks   C)r   N)r   r   r   r   r   rV   rs   r   r   r   r   r   rZ   r7  r   r   r   r    r3  r3    sI         L%LHH
k 
eK.@ 
r   r3  c                   J    \ rS rSrSr\S 5       rSrSrSr	Sr
S\S	\4S
 jrSrg)MTB_AudioSequencei:  a"  Sequence audio inputs (dynamic inputs).
- adding silence_duration between each segment
  can now also be negative to overlap the clips, safely bound
  to the the input length.
- resample audios to the highest sample rate in the inputs.
- convert them all to stereo if one of the inputs is.
c                     SSSSSSSS.400$ )	Nrp   silence_durationr  r   g     8i  g{Gz?r  r   rr   s    r    rs   MTB_AudioSequence.INPUT_TYPESC  s-     " #F3M%
 	
r   r   )sequenced_audiorx   sequencerF  r8  c           	         U R                  [        UR                  5       5      5      u  p4n/ n[        U5       GH  u  pxUS:  a  US:  a?  [        R
                  " SU(       a  SOS[        X-  5      45      n	UR                  U	5        OUS:  a  [        [        U5      U-  5      n
US   n[        U
UR                  S   US   R                  S   5      n
U
S:  aX  US S 2S S 2U
* S 24   US   S S 2S S 2S U
24   -   nUS S 2S S 2S U
* 24   US'   UR                  U5        US   S S 2S S 2U
S 24   US'   UR                  US   5        GM     [        R                  " USS9nUUS.4$ )Nr   r8   r   r   r   rK   rA   )rT   r(   r:  r  r   r;  r   r   absr   r;   rM   )r   rF  r8  r-   r2   r=  rI  r  r4   silenceoverlapprevious_audiooverlap_partsequenced_waveforms                 r    rI  MTB_AudioSequence.sequenceS  s   &*&<&<!'
#8 (*!&)HA1u#a'#kk!*A 0 ;<G OOG,%)!#&6"7("BCG%-b\N!&,,R0j)//3G
 {*1a'?;#J/1hwh?@ % (6aIgXIo'F 5,1*,=aGHn,Mj)OOE*-.9 *< #YYxR8  (.
 	
r   r   N)r   r   r   r   r   rV   rs   r   r   r   r   r   r   rI  r   r   r   r    rD  rD  :  sD     
 
 L'LHH*
 *
+ *
r   rD  c                   T    \ rS rSrSr\S 5       rSrSrSr	Sr
S\S	\S
\\   4S jrSrg)MTB_AudioResamplei  z*Resample audio to a different sample rate.c           	           SSSSSSSSS	.4S
.0$ )Nrp   r   INTr   r-  i  d   z1Target sample rate in Hz. Whisper requires 16000.rk   r   rQ   r  rn   )r4   r   r   rr   s    r    rs   MTB_AudioResample.INPUT_TYPES  s5     ##(#% ##V	 
 	
r   r   )resampled_audiorx   resample_audior4   r   r.   c                 *    U R                  X5      nU4$ r1   )rH   )r   r4   r   	resampleds       r    rZ   MTB_AudioResample.resample_audio  s     MM%5	|r   r   N)r   r   r   r   r   rV   rs   r   r   r   r   r   r   rZ   rZ  r   r   r   r    rS  rS    sO    4
 
" L'LHH /2	{	r   rS  c                   h    \ rS rSrSr\S 5       rSrSrSr	Sr
  SS\S	\S
\S\S\S\\   4S jjrSrg)MTB_AudioIsolateSpeakeri  z3Isolate or mute specific speakers using WhisperDatac                 :    SSSSSS04SS/SS04S	S
SSSSS.4S.0$ )Nrp   r   r
  r   rk   
SPEAKER_00isolatemuter        Y@r   r&  
   z-Fade duration in milliseconds to avoid clicksrW  )r4   whisper_datatarget_speakermodefade_msr   rr   s    r    rs   #MTB_AudioIsolateSpeaker.INPUT_TYPES  sW     # 3#+i-F"G#V,y).DE#("% "#R	
 	
r   r   )processed_audiorx   process_audior4   rg  rh  ri  rj  r.   c                 .   [        US-  US   -  5      nUS:X  a  [        R                  " US   5      O[        R                  " US   5      nUS    Hx  nUR	                  S5      (       d  M  X8S   ;   n	US:X  a  U	(       d  US:X  d  M7  U	(       a  M@  [        US   S	   US   -  5      n
[        US   S
   US   -  5      nSUS S 2X24'   Mz     US	:  a  [        R
                  " S	S
U5      n[        R                  " US	S
S 24   US	S S24   :g  5      S	   S
-   nU Hg  nX:  d  M
  XR                  S
   U-
  ::  d  M!  US	U4   S
:X  a  US S 2XU-   24==   U-  ss'   MC  US S 2X-
  U24==   UR                  S	5      -  ss'   Mi     US   U-  nUS   US.4$ )Nr&  r   rc  r   r%   speakerrd  r  r   r8   r   r   rA   )	r   r   
zeros_like	ones_likegetlinspacewherer;   flip)r   r4   rg  rh  ri  rj  fade_samplesmaskchunkspeaker_presentstart_sample
end_samplefadetransitions	trans_idxprocessed_waveforms                   r    rm  %MTB_AudioIsolateSpeaker.process_audio  s    Gf,m0DDE y  U:./z!23 	 "(+E99Y'',i0@@O	!o"+&q)E-,@@  !{!3A!6}9M!MN
36Q//0 , !>>!Q5D++d1ab5kT!SbS&\&AB1EIK(	-!ZZ]\%AAAyL)Q.Q	,D DDEMEQ	 89 DDE IIaLE ) #:.5  %]3.
 	
r   r   N)rc  re  )r   r   r   r   r   rV   rs   r   r   r   r   r   r"   r'   r   rZ   rm  r   r   r   r    r_  r_    s~    =
 
( L'LHH 7
7
 "7
 	7

 7
 7
 
{	7
 7
r   r_  c                   P    \ rS rSrSr\S 5       rSrSrSr	S r
S r   SS	 jrS
rg)MTB_ProcessWhisperDiarizationi  zNProcess Whisper chunks with speaker diarization using either pyannote or NeMo.c           	      @    SSSS/SS04SSSS	SS
.4S.SSS/SS040S.$ )Nra  r   pyannotenemork   rU  r   r8   rf  r  )whisper_chunksr4   backendnum_speakersr   cudacpuro   r   rr   s    r    rs   )MTB_ProcessWhisperDiarization.INPUT_TYPES  sY     #6#'09j2IJ !!BB!	 FE?Y,?@
 	
r   ra  r  rx   c                     SSK Jn  SSKJn  UR                  SSS9nUR                  [        R                  " U5      5        U" 5        nU" US   S   US	   S
.UUS9nSSS5        / n	WR                  SS9 H/  u  pnU	R                  U
R                  U
R                  US.5        M1     U	$ ! [         a    [	        S5      ef = f! , (       d  f       Nm= f)z%Process audio using pyannote backend.r   )Pipeline)ProgressHookzBpyannote.audio not found. Install with: pip install pyannote.audioz pyannote/speaker-diarization-3.1N)use_auth_tokenr   r   )r   r   )r  hookT)yield_labelstartendro  )pyannote.audior  #pyannote.audio.pipelines.utils.hookr  ImportErrorr   r   r   r   
itertracksr   r  r  )r   r4   r  r   r  r  rw   r  diarizationspeaker_segmentsturn_ro  s                r    process_pyannote.MTB_ProcessWhisperDiarization.process_pyannote  s    	/H ++.t , 
 	ELL()^t" %j 1! 4#(#7 *K   + 6 64 6 HDW##!ZZ88& !I  ;  	T 	 ^s   B- 
C-C
Cc                 @    SSK Js  Jn  UR                  R
                  R                  S5      R                  U5      nUR                  US   S   US   US9n/ nU H$  nUR                  US   US	   S
US    3S.5        M&     U$ ! [         a    [        S5      ef = f)z!Process audio using NeMo backend.r   Nz;NeMo not found. Install with: pip install nemo_toolkit[asr]z+nvidia/speakerverification_en_titanet_larger   r   )r4   r   r  r  r  SPEAKER_ro  r  )
nemo.collections.asrcollectionsasrr  modelsClusteringDiarizerr   r   diarizer   )	r   r4   r  r   nemo_asrr_   r  r  segments	            r    process_nemo*MTB_ProcessWhisperDiarization.process_nemo4  s    	33 22BB9

"V* 	 mm
#A&m,% $ 
 "G##$W-"5>!)')*<)=> #  3  	M 	s   	B Bc                 <   US:X  a  U R                  X$U5      nOU R                  X$U5      nUS    Hf  nUS   u  p[        5       n
U H-  nUS   U	::  d  M  US   U:  d  M  U
R                  US   5        M/     U
(       a  [	        U
5      S   US'   Ma  SUS'   Mh     U4$ )	Nr  r%   r  r  r  ro  r   unknown)r  r  setaddr(   )r   r  r4   r  r  r   r  rx  r   r   chunk_speakersr  s               r    r  %MTB_ProcessWhisperDiarization.processS  s     j #44V   $00fM#H-E%*;%7"K UN+G$	1+5"&&wy'9: , #'#7#:i #,i  .   r   r   N)r  r   r  )r   r   r   r   r   rV   rs   r   r   r   r  r  r  r   r   r   r    r  r    sD    X
 
  'LHH" H F !r   r  c                   >    \ rS rSrSr\S 5       rSrSrSr	Sr
S rS	rg
)MTB_AudioDurationit  z#Get audio duration in milliseconds.c                     SSS00$ )Nrp   r4   r   r   rr   s    r    rs   MTB_AudioDuration.INPUT_TYPESw  s     
 	
r   )rU  )duration_msget_durationrx   c                     US   nUS   n[        UR                  S   U-  S-  5      n[        R                  " SU SUS-  S S35        U4$ )	Nr   r   r   r-  r   zms (r   zs))r   r;   r   rB   )r   r4   r   r   r  s        r    r  MTB_AudioDuration.get_duration  sc    $M*8>>"-;tCD		{m4d0B3/GrJ	
 ~r   r   N)r   r   r   r   r   rV   rs   r   r   r   r   r  r   r   r   r    r  r  t  s2    -
 
 L#LHH	r   r  )"typingr   r   r   r   rC   comfy.model_managementr   huggingface_hubr   r   r	   r
   r   utilsr   r   r   r"   r(   rW   r+   r\   ra   r   r  r$  r3  rD  rS  r_  r  r  	__nodes__r   r   r    <module>r     s   0 0   3 -
  " ) )  ${++	84 84v-i -R; R;jv2h v2rf. f.R+
8 +
\*
X *
ZC
 C
L BT
h T
ny! y!x : !	r   