
    0j!                     v    S SK r S SKrS SKrS SKJrJr  SSKJrJr  SSK	J
r
  SSKJr  SSKJr   " S S	\5      rg)
    N)zipxrange   )	ErrorCodeLangDetectException)Language)NGram)unicode_blockc                       \ rS rSrSrSrSrSrSrSr	Sr
S	r\R                  " S
5      r\R                  " S5      rS rS rS rS rS rS rS rS rS rS rS rS rS rS rS rS rS r Sr!g)Detector   a  
Detector class is to detect language from specified text.
Its instance is able to be constructed via the factory class DetectorFactory.

After appending a target text to the Detector instance with .append(string),
the detector provides the language detection results for target text via .detect() or .get_probabilities().

.detect() method returns a single language name which has the highest probability.
.get_probabilities() methods returns a list of multiple languages and their probabilities.

The detector has some parameters for language detection.
See set_alpha(double), .set_max_text_length(int) .set_prior_map(dict).

Example:

    from langdetect.detector_factory import DetectorFactory
    factory = DetectorFactory()
    factory.load_profile('/path/to/profile/directory')

    def detect(text):
        detector = factory.create()
        detector.append(text)
        return detector.detect()

    def detect_langs(text):
        detector = factory.create()
        detector.append(text)
        return detector.get_probabilities()
g      ?g?i  g?gwJ?'  unknownz'https?://[-_.?&~;+=/#0-9A-Za-z]{1,2076}z>[-_.0-9A-Za-z]{1,64}@[-_0-9A-Za-z]{1,255}[-_.0-9A-Za-z]{1,255}c                    UR                   U l         UR                  U l        UR                  U l        [        R                  " 5       U l        SU l        S U l        U R                  U l        SU l	        SU l
        S U l        SU l        g )N    r   F)word_lang_prob_maplanglistseedrandomRandomtextlangprobALPHA_DEFAULTalphan_trialmax_text_length	prior_mapverbose)selffactorys     M/home/wildlama/miniconda3/lib/python3.13/site-packages/langdetect/detector.py__init__Detector.__init__8   sm    ")"<"<((LL	mmo	''
$    c                     SU l         g )NT)r   r    s    r"   set_verboseDetector.set_verboseF   s	    r%   c                     Xl         g N)r   )r    r   s     r"   	set_alphaDetector.set_alphaI   s    
r%   c                    S/[        U R                  5      -  U l        Sn[        [        U R                  5      5       HO  nU R                  U   nXA;   d  M  X   nUS:  a  [	        [
        R                  S5      eXPR                  U'   X%-  nMQ     US::  a  [	        [
        R                  S5      e[        [        U R                  5      5       H  nU R                  U==   U-  ss'   M     g)z3Set prior information about language probabilities.        r   z'Prior probability must be non-negative.z/More one of prior probability must be non-zero.N)lenr   r   r   r   r   InitParamError)r    r   sumpilangps         r"   set_prior_mapDetector.set_prior_mapL   s    T]]!33DNN+,A==#D Oq5-i.F.FHqrr$%q!	 - 3;%i&>&>@qrrDNN+,ANN1% -r%   c                     Xl         g)zaSpecify max size of target text to use for language detection.
The default value is 10000(10KB).
N)r   )r    r   s     r"   set_max_text_lengthDetector.set_max_text_length]   s
      /r%   c                 P   U R                   R                  SU5      nU R                  R                  SU5      n[        R                  " U5      nSn[        [        [        U5      U R                  5      5       H*  nX   nUS:w  d  US:w  a  U =R                  U-  sl	        UnM,     g)zAppend the target text for language detection.
If the total size of target text exceeds the limit size specified by
Detector.set_max_text_length(int), the rest is cut down.
 r   N)
URL_REsubMAIL_REr	   normalize_vir   minr0   r   r   )r    r   prer3   chs        r"   appendDetector.appendc   s    
 {{sD)||T*!!$'CIt';';<=ABSyC3J		R	C	 >r%   c                 2   Su  pU R                    HL  nSUs=::  a  S::  a
  O  OUS-  nM  U[        R                  " S5      :  d  M6  [        U5      S:w  d  MG  US-  nMN     US-  U:  a.  SnU R                    H  nUS:  d  SU:  d  M  XC-  nM     X@l         g	g	)
zsCleaning text to detect
(eliminate URL, e-mail address and Latin sentence if it is not written in Latin alphabet).
)r   r   Azr   u   ̀zLatin Extended Additional   r   N)r   sixur
   )r    latin_countnon_latin_countrC   text_without_latins        r"   cleaning_textDetector.cleaning_textr   s     (,$))BbCq suuX&=+<@[+[1$	  ?_,!#ii8sRx&,&   +I -r%   c                 f    U R                  5       nU(       a  US   R                  $ U R                  $ )zcDetect language of the target text and return the language name
which has the highest probability.
r   )get_probabilitiesr4   UNKNOWN_LANG)r    probabilitiess     r"   detectDetector.detect   s2     ..0 #(((   r%   c                 r    U R                   c  U R                  5         U R                  U R                   5      $ r+   )r   _detect_block_sort_probabilityr'   s    r"   rR   Detector.get_probabilities   s-    ==  %%dmm44r%   c                 &   U R                  5         U R                  5       nU(       d  [        [        R                  S5      eS/[        U R                  5      -  U l        U R                  R                  U R                  5        [        U R                  5       GHs  nU R                  5       nU R                  U R                  R                  SS5      U R                  -  -   nSn U R!                  X0R                  R#                  U5      U5        US-  S:X  af  U R%                  U5      U R&                  :  d  XPR(                  :  a  O>U R*                  (       a&  [,        R.                  " SU R1                  U5      5        US-  nM  [        [        U R                  5      5       H)  nU R                  U==   X6   U R                  -  -  ss'   M+     U R*                  (       d  GMM  [,        R.                  " SU R1                  U5      5        GMv     g )	NzNo features in text.r/         ?r      >r   z==>)rO   _extract_ngramsr   r   CantDetectErrorr0   r   r   r   r   r   r   _init_probabilityr   gaussALPHA_WIDTH_update_lang_probchoice_normalize_probCONV_THRESHOLDITERATION_LIMITr   rJ   print_rY   )r    ngramstprobr   r3   js          r"   rX   Detector._detect_block   s   %%'%i&?&?AWXXDMM 22#%A))+DJJ!2!23!<t?O?O!OOEA&&t[[-?-?-GOq5A:++D1D4G4GG1PdPdKd||

3(>(>t(DEQ  C./a DGdll$::  0|||

5$"8"8">?! &r%   c                     U R                   b  [        U R                   5      $ S[        U R                  5      -  /[        U R                  5      -  $ )zjInitialize the map of language probabilities.
If there is the specified prior map, use it as initial map.
r\   )r   listr0   r   r'   s    r"   ra   Detector._init_probability   sB     >>%''#dmm,,-DMM0BBBr%   c                    [        [        S[        R                  S-   5      5      n/ n[        5       nU R                   H  nUR                  U5        UR                  (       a  M'  U Hb  n[        UR                  5      U:  a    MH  UR                  U* S nU(       d  M8  US:w  d  M@  X`R                  ;   d  MQ  UR                  U5        Md     M     U$ )z!Extract n-grams from target text.r   Nr<   )rp   r   r	   N_GRAMr   add_charcapitalwordr0   gramsr   rD   )r    RANGEresultngramrC   nws          r"   r_   Detector._extract_ngrams   s    VAu||a/01))BNN2  u{{#a'KK$1ca+B+B&BMM!$ 	  r%   c           	      \   Ub  X R                   ;  a  gU R                   U   nU R                  (       a?  [        R                  " U< SU R	                  U5      < SU R                  U5      < 35        X0R                  -  n[        [        U5      5       H  nX==   XTU   -   -  ss'   M     g)z:Update language probabilities with N-gram string(N=1,2,3).F(z): T)	r   r   rJ   ri   _unicode_encode_word_prob_to_string	BASE_FREQr   r0   )r    rl   wordr   lang_prob_mapweightr3   s          r"   rd   Detector._update_lang_prob   s    <4'>'>>//5<<JJtT-A-A$-GIbIbcpIqrs'D	"AGva 000G #r%   c                     Sn[        [        U5      5       H&  nX   nUS:  d  M  USU R                  U   U4-  -  nM(     U$ )Nr   gh㈵>z %s:%.5f)r   r0   r   )r    rl   rx   rm   r5   s        r"   r   Detector._word_prob_to_string   sL    D	"AAG|*a(8!'<<< # r%   c                 x    S[        U5      p2[        [        U5      5       H  nX   U-  nX%:  a  UnXQU'   M     U$ )zRNormalize probabilities and check convergence by the maximun probability.
        r/   )sumr   r0   )r    rl   maxpr2   r3   r5   s         r"   rf   Detector._normalize_prob   sE     #d)dD	"A$AxG	 #
 r%   c                     [        U R                  U5       VVs/ s H!  u  p#X0R                  :  d  M  [        X#5      PM#     nnnUR	                  SS9  U$ s  snnf )NT)reverse)r   r   PROB_THRESHOLDr   sort)r    rl   r4   r5   rx   s        r"   rY   Detector._sort_probability   sR    585Mi5M	QRUhUhQh#(4#5MiD! js
   AAc                     SnU Hm  nU[         R                  " S5      :  aL  [        S[        U5      -   5      SS  n[	        U5      S:  a  SU-   n[	        U5      S:  a  M  USUSS	 -   -  nMi  X#-  nMo     U$ )
Nr      i   rI      0z\ur   r]   )rJ   rK   hexordr0   )r    r   bufrC   sts        r"   r   Detector._unicode_encode   s~    BSUU8_$3r7*+AB/"gkrB "gkur!Aw&	  
r%   )r   r   r   r   r   r   r   r   r   r   r   N)"__name__
__module____qualname____firstlineno____doc__r   rc   rh   r   rg   r   rS   recompiler=   r?   r#   r(   r,   r6   r9   rD   rO   rU   rR   rX   ra   r_   rd   r   rf   rY   r   __static_attributes__ r%   r"   r   r      s    < MKONNILZZBCFjjZ[G&"/+$!5
@6C&	

r%   r   )r   r   rJ   	six.movesr   r   lang_detect_exceptionr   r   languager   utils.ngramr	   utils.unicode_blockr
   objectr   r   r%   r"   <module>r      s+     	 
 ! A   .lv lr%   