o
    ‘Ó*jÌR  ã                   @   sÞ   d Z ddlZddlZddlmZ dgZe d¡Ze d¡Ze d¡Z	e d¡Z
e d	¡Ze d
¡Ze d¡Ze d¡Ze d¡Ze d¡Ze dej¡Ze dej¡Ze dej¡Ze d¡Ze d¡ZG dd„ dejƒZdS )zA parser for HTML and XHTML.é    N)ÚunescapeÚ
HTMLParserz[&<]z
&[a-zA-Z#]z%&([a-zA-Z][-.a-zA-Z0-9]*)[^a-zA-Z0-9]z)&#(?:[0-9]+|[xX][0-9a-fA-F]+)[^0-9a-fA-F]z	<[a-zA-Z]z
</[a-zA-Z]ú>z--!?>z-?>z0([a-zA-Z][^\t\n\r\f />]*)(?:[\t\n\r\f ]|/(?!>))*a{  
  (
    (?<=['"\t\n\r\f /])[^\t\n\r\f />][^\t\n\r\f /=>]*  # attribute name
   )
  ([\t\n\r\f ]*=[\t\n\r\f ]*        # value indicator
    ('[^']*'                        # LITA-enclosed value
    |"[^"]*"                        # LIT-enclosed value
    |(?!['"])[^>\t\n\r\f ]*         # bare value
    )
   )?
  (?:[\t\n\r\f ]|/(?!>))*           # possibly followed by a space
a  
  [a-zA-Z][^\t\n\r\f />]*           # tag name
  [\t\n\r\f /]*                     # optional whitespace before attribute name
  (?:(?<=['"\t\n\r\f /])[^\t\n\r\f />][^\t\n\r\f /=>]*  # attribute name
    (?:[\t\n\r\f ]*=[\t\n\r\f ]*    # value indicator
      (?:'[^']*'                    # LITA-enclosed value
        |"[^"]*"                    # LIT-enclosed value
        |(?!['"])[^>\t\n\r\f ]*     # bare value
       )
     )?
    [\t\n\r\f /]*                   # possibly followed by a space
   )*
   >?
aF  
  <[a-zA-Z][^\t\n\r\f />\x00]*       # tag name
  (?:[\s/]*                          # optional whitespace before attribute name
    (?:(?<=['"\s/])[^\s/>][^\s/=>]*  # attribute name
      (?:\s*=+\s*                    # value indicator
        (?:'[^']*'                   # LITA-enclosed value
          |"[^"]*"                   # LIT-enclosed value
          |(?!['"])[^>\s]*           # bare value
         )
        \s*                          # possibly followed by a space
       )?(?:\s|/(?!>))*
     )*
   )?
  \s*                                # trailing whitespace
z#</\s*([a-zA-Z][-.a-zA-Z0-9:_]*)\s*>c                   @   s   e Zd ZdZdZdZdddœdd„Zd	d
„ Zdd„ Zdd„ Z	dZ
dd„ Zddœdd„Zdd„ Zd>dd„Zdd„ Zdd„ Zd>dd„Zd?d d!„Zd"d#„ Zd$d%„ Zd&d'„ Zd(d)„ Zd*d+„ Zd,d-„ Zd.d/„ Zd0d1„ Zd2d3„ Zd4d5„ Zd6d7„ Zd8d9„ Zd:d;„ Zd<d=„ Z dS )@r   aE  Find tags and other markup and call handler functions.

    Usage:
        p = HTMLParser()
        p.feed(data)
        ...
        p.close()

    Start tags are handled by calling self.handle_starttag() or
    self.handle_startendtag(); end tags by self.handle_endtag().  The
    data between tags is passed from the parser to the derived class
    by calling self.handle_data() with the data as argument (the data
    may be split up in arbitrary chunks).  If convert_charrefs is
    True the character references are converted automatically to the
    corresponding Unicode character (and self.handle_data() is no
    longer split in chunks), otherwise they are passed by calling
    self.handle_entityref() or self.handle_charref() with the string
    containing respectively the named or numeric reference as the
    argument.
    )ZscriptÚstyleZxmpZiframeZnoembedZnoframes)ZtextareaÚtitleTF)Úconvert_charrefsÚ	scriptingc                C   s   || _ || _|  ¡  dS )az  Initialize and reset this instance.

        If convert_charrefs is true (the default), all character references
        are automatically converted to the corresponding Unicode characters.

        If *scripting* is false (the default), the content of the
        ``noscript`` element is parsed normally; if it's true,
        it's returned as is without being parsed.
        N)r   r   Úreset)Úselfr   r   © r   á  /home/task_178118931039656/croot/python-split_1781190656190/_h_env_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehol/lib/python3.10/html/parser.pyÚ__init__v   s   
zHTMLParser.__init__c                 C   s4   d| _ d| _t| _d| _d| _d| _tj 	| ¡ dS )z1Reset this instance.  Loses all unprocessed data.Ú z???NT)
ÚrawdataÚlasttagÚinteresting_normalÚinterestingÚ
cdata_elemÚ_support_cdataÚ
_escapableÚ_markupbaseÚ
ParserBaser	   ©r
   r   r   r   r	   „   s   zHTMLParser.resetc                 C   s   | j | | _ |  d¡ dS )z‘Feed data to the parser.

        Call this as often as you want, with as little or as much text
        as you want (may include '\n').
        r   N)r   Úgoahead©r
   Údatar   r   r   ÚfeedŽ   s   zHTMLParser.feedc                 C   s   |   d¡ dS )zHandle any buffered data.é   N)r   r   r   r   r   Úclose—   s   zHTMLParser.closeNc                 C   s   | j S )z)Return full source of start tag: '<...>'.)Ú_HTMLParser__starttag_textr   r   r   r   Úget_starttag_text   s   zHTMLParser.get_starttag_text©Ú	escapablec                C   st   |  ¡ | _|| _| jdkrt d¡| _d S |r*| js*t d| j tjtjB ¡| _d S t d| j tjtjB ¡| _d S )NÚ	plaintextz\Zz&|</%s(?=[\t\n\r\f />])z</%s(?=[\t\n\r\f />]))	Úlowerr   r   ÚreÚcompiler   r   Ú
IGNORECASEÚASCII)r
   Úelemr"   r   r   r   Úset_cdata_mode¡   s   




ÿ

ÿzHTMLParser.set_cdata_modec                 C   s   t | _d | _d| _d S )NT)r   r   r   r   r   r   r   r   Úclear_cdata_mode­   s   
zHTMLParser.clear_cdata_modec                 C   s
   || _ dS )a  Enable or disable support of the CDATA sections.
        If enabled, "<[CDATA[" starts a CDATA section which ends with "]]>".
        If disabled, "<[CDATA[" starts a bogus comments which ends with ">".

        This method is not called by default. Its purpose is to be called
        in custom handle_starttag() and handle_endtag() methods, with
        value that depends on the adjusted current node.
        See https://html.spec.whatwg.org/multipage/parsing.html#markup-declaration-open-state
        for details.
        N)r   )r
   Úflagr   r   r   Ú_set_support_cdata²   s   
zHTMLParser._set_support_cdatac                 C   s  | j }d}t|ƒ}||k rY| jr;| js;| d|¡}|dk r:| dt||d ƒ¡}|dkr8t d¡ 	||¡s8n!|}n| j
 	||¡}|rI| ¡ }n| jrNn|}||k ro| jrf| jrf|  t|||… ƒ¡ n	|  |||… ¡ |  ||¡}||kr{nÞ|j}|d|ƒrt ||¡r|  |¡}	n@|d|ƒr›|  |¡}	n5|d|ƒr¦|  |¡}	n*|d|ƒr±|  |¡}	n|d	|ƒr¼|  |¡}	n|d
 |k sÄ|rÎ|  d¡ |d
 }	nn‰|	dk r‰|sÙn€t ||¡ràn§|d|ƒr|d |krò|  d¡ n•t ||¡rùnŽ|  ||d d … ¡ n‚|d|ƒr0|}dD ]}
| |
|d ¡r"|t|
ƒ8 } nq|  ||d |… ¡ nW|d|ƒrF| jrF|  ||d d … ¡ nA|||d …  ¡ dkr_|  ||d d … ¡ n(|d	|ƒrq|  ||d d … ¡ n|d|ƒrƒ|  ||d d … ¡ nt dƒ‚|}	|  ||	¡}nÅ|d|ƒràt! ||¡}|rÃ| "¡ dd… }|  #|¡ | $¡ }	|d|	d
 ƒs¼|	d
 }	|  ||	¡}q	d||d … v rß|  |||d … ¡ |  ||d ¡}ny|d|ƒrQt% ||¡}|r| "d
¡}|  &|¡ | $¡ }	|d|	d
 ƒs	|	d
 }	|  ||	¡}q	t' ||¡}|r;|r:| "¡ ||d … kr:| $¡ }	|	|kr2|}	|  ||d
 ¡}n|d
 |k rP|  d¡ |  ||d
 ¡}nnJ dƒ‚||k s|r„||k r„| jru| jru|  t|||… ƒ¡ n	|  |||… ¡ |  ||¡}||d … | _ d S )Nr   ú<ú&é"   z[\t\n\r\f ;]ú</ú<!--ú<?ú<!r   é   )z--!z--ú-é   ú	<![CDATA[é   é	   ú	<!doctypezwe should not get here!z&#éÿÿÿÿú;zinteresting.search() lied)(r   Úlenr   r   ÚfindÚrfindÚmaxr%   r&   Úsearchr   Ústartr   Úhandle_datar   Z	updateposÚ
startswithÚstarttagopenÚmatchÚparse_starttagÚparse_endtagÚparse_commentÚparse_piÚparse_html_declarationÚ
endtagopenÚhandle_commentÚendswithr   Úunknown_declr$   Úhandle_declÚ	handle_piÚAssertionErrorÚcharrefÚgroupÚhandle_charrefÚendÚ	entityrefÚhandle_entityrefÚ
incomplete)r
   rW   r   ÚiÚnÚjZampposrG   rE   ÚkÚsuffixÚnamer   r   r   r   Â   sè   
ÿ€







þ




…}zHTMLParser.goaheadc                 C   sp  | j }|||d … dksJ dƒ‚|||d … dkr |  |¡S |||d … dkrJ| jrJ| d|d ¡}|d	k r;d
S |  ||d |… ¡ |d S |||d …  ¡ dkrs| d|d ¡}|d
krdd
S |  ||d |… ¡ |d S |||d … dkr³| d|d ¡}|d	k r‹d
S ||d  dkr¤|  ||d |d … ¡ |d S |  ||d |… ¡ |d S |  |¡S )Nr5   r4   z+unexpected call to parse_html_declaration()r7   r2   r:   r8   z]]>r   r<   r9   r;   r   r   z<![ú])	r   rJ   r   r?   rP   r$   rQ   rN   Úparse_bogus_comment)r
   r[   r   r]   Zgtposr   r   r   rL   N  s4   
ÿ
z!HTMLParser.parse_html_declarationc                 C   sp   | j }| d|¡sJ dƒ‚t ||d ¡}|s#t ||d ¡}|s#dS |r4| ¡ }|  ||d |… ¡ | ¡ S )Nr2   ú"unexpected call to parse_comment()r7   r<   )	r   rE   ÚcommentcloserB   ÚcommentabruptcloserG   rC   rN   rW   )r
   r[   Úreportr   rG   r]   r   r   r   rJ   p  s   zHTMLParser.parse_commentr   c                 C   s`   | j }|||d … dv sJ dƒ‚| d|d ¡}|dkrdS |r,|  ||d |… ¡ |d S )Nr5   )r4   r1   rc   r   r<   r   )r   r?   rN   )r
   r[   rf   r   Úposr   r   r   rb     s   zHTMLParser.parse_bogus_commentc                 C   sd   | j }|||d … dksJ dƒ‚t ||d ¡}|sdS | ¡ }|  ||d |… ¡ | ¡ }|S )Nr5   r3   zunexpected call to parse_pi()r<   )r   ÚpicloserB   rC   rR   rW   ©r
   r[   r   rG   r]   r   r   r   rK   ‹  s   zHTMLParser.parse_pic                 C   s  d | _ |  |¡}|dk r|S | j}|||… | _ g }t ||d ¡}|s(J dƒ‚| ¡ }| d¡ ¡  | _}||k r–t	 ||¡}|sCnS| ddd¡\}	}
}|
sRd }n-|d d… d  krd|dd … ksyn |d d… d  krw|dd … krn n|dd… }|r…t
|ƒ}| |	 ¡ |f¡ | ¡ }||k s:|||…  ¡ }|d	vrÓ|  ¡ \}}d
| j v rÁ|| j  d
¡ }t| j ƒ| j  d
¡ }n|t| j ƒ }|  |||… ¡ |S | d¡rà|  ||¡ |S |  ||¡ || jv sö| jrò|dksö|dkrÿ| j|dd |S || jv r| j|dd |S )Nr   r   z#unexpected call to parse_starttag()r5   r9   ú'r<   ú")r   ú/>Ú
rl   Znoscriptr#   Fr!   T)r   Úcheck_for_whole_start_tagr   Útagfind_tolerantrG   rW   rU   r$   r   Úattrfind_tolerantr   ÚappendÚstripZgetposÚcountr>   r@   rD   rO   Úhandle_startendtagÚhandle_starttagÚCDATA_CONTENT_ELEMENTSr   r*   ÚRCDATA_CONTENT_ELEMENTS)r
   r[   Úendposr   ÚattrsrG   r^   ÚtagÚmÚattrnameÚrestZ	attrvaluerW   ÚlinenoÚoffsetr   r   r   rH   —  sf   
&(ó

ÿ
	ù
ÿþzHTMLParser.parse_starttagc                 C   s>   | j }t ||d ¡}|sJ ‚| ¡ }||d  dkrdS |S )Nr   r   r<   )r   ÚlocatetagendrG   rW   ri   r   r   r   rn   Ð  s   z$HTMLParser.check_for_whole_start_tagc                 C   sà   | j }|||d … dksJ dƒ‚| d|d ¡dk rdS t ||¡s8||d |d … dkr3|d S |  |¡S t ||d ¡}|sDJ ‚| ¡ }||d  dkrRdS t ||d ¡}|s^J ‚| d¡ 	¡ }|  
|¡ |  ¡  |S )	Nr5   r1   zunexpected call to parse_endtagr   r   r<   r9   r   )r   r?   rM   rG   rb   r€   rW   ro   rU   r$   Úhandle_endtagr+   )r
   r[   r   rG   r]   rz   r   r   r   rI   Ú  s&   

zHTMLParser.parse_endtagc                 C   s   |   ||¡ |  |¡ d S ©N)ru   r   ©r
   rz   ry   r   r   r   rt   ø  s   zHTMLParser.handle_startendtagc                 C   ó   d S r‚   r   rƒ   r   r   r   ru   ý  ó   zHTMLParser.handle_starttagc                 C   r„   r‚   r   )r
   rz   r   r   r   r     r…   zHTMLParser.handle_endtagc                 C   r„   r‚   r   ©r
   r`   r   r   r   rV     r…   zHTMLParser.handle_charrefc                 C   r„   r‚   r   r†   r   r   r   rY   	  r…   zHTMLParser.handle_entityrefc                 C   r„   r‚   r   r   r   r   r   rD     r…   zHTMLParser.handle_datac                 C   r„   r‚   r   r   r   r   r   rN     r…   zHTMLParser.handle_commentc                 C   r„   r‚   r   )r
   Zdeclr   r   r   rQ     r…   zHTMLParser.handle_declc                 C   r„   r‚   r   r   r   r   r   rR     r…   zHTMLParser.handle_pic                 C   r„   r‚   r   r   r   r   r   rP     r…   zHTMLParser.unknown_decl)T)r   )!Ú__name__Ú
__module__Ú__qualname__Ú__doc__rv   rw   r   r	   r   r   r   r    r*   r+   r-   r   rL   rJ   rb   rK   rH   rn   rI   rt   ru   r   rV   rY   rD   rN   rQ   rR   rP   r   r   r   r   r   Z   s@    
	
 
"
9
)rŠ   r%   r   Zhtmlr   Ú__all__r&   r   rZ   rX   rT   rF   rM   rh   rd   re   ro   ÚVERBOSErp   r€   Zlocatestarttagend_tolerantZ	endendtagZ
endtagfindr   r   r   r   r   r   Ú<module>   s6    











õóò

