o
    LDi%D                     @  s"  d Z ddlmZ dZdgZddlmZ ddlmZm	Z	m
Z
mZmZmZmZmZmZmZmZ ddlmZmZmZmZmZmZ ddlmZmZ dd	lmZmZm Z m!Z! dd
l"m#Z# eroddl$m%Z% ddlm&Z& ddl'm(Z(m)Z)m*Z* dZ+e	ee,e,f e,e,gdf Z-G dd deeZ.G dd de Z/dS )zCUse the HTMLParser library to parse HTML files that aren't too bad.    )annotationsMITHTMLParserTreeBuilder)
HTMLParser)AnyCallablecastDictIterableListOptionalTYPE_CHECKINGTupleTypeUnion)AttributeDictCDataCommentDeclarationDoctypeProcessingInstruction)EntitySubstitutionUnicodeDammit)DetectsXMLParsedAsHTMLHTMLHTMLTreeBuilderSTRICTParserRejectedMarkup)BeautifulSoup)NavigableString)	_Encoding
_Encodings
_RawMarkupzhtml.parserNc                   @  s   e Zd ZU dZded< dZded< 	 edd6ddZded< ded< ded< d7ddZd8ddZ	d9d:dd Z	d9d;d"d#Z
d<d%d&Zd=d(d)Zd=d*d+Zd<d,d-Zd>d/d0Zd<d1d2Zd<d3d4Zd5S )?BeautifulSoupHTMLParserreplacestrREPLACEignoreIGNOREon_duplicate_attributesoupr   argsr   r+   &Union[str, _DuplicateAttributeHandler]kwargsc                O  s@   || _ || _|jj| _tj| g|R i | g | _|   d S N)r,   r+   builderattribute_dict_classr   __init__already_closed_empty_element_initialize_xml_detector)selfr,   r+   r-   r/    r7   c/var/www/www-root/data/www/77.83.87.30/venv/lib/python3.10/site-packages/bs4/builder/_htmlparser.pyr3   T   s   
	z BeautifulSoupHTMLParser.__init__z	List[str]r4   messagereturnNonec                 C  s   t |r0   r   )r6   r9   r7   r7   r8   erroro   s   zBeautifulSoupHTMLParser.errortagattrsList[Tuple[str, Optional[str]]]c                 C  s   | j ||dd | | dS )zHandle an incoming empty-element tag.

        html.parser only calls this method when the markup looks like
        <tag/>.
        F)handle_empty_elementN)handle_starttaghandle_endtag)r6   r=   r>   r7   r7   r8   handle_startendtag   s   z*BeautifulSoupHTMLParser.handle_startendtagTr@   boolc                 C  s   |   }|D ]3\}}|du rd}||v r5| j}|| jkrq|d| jfv r)|||< qtt|}|||| q|||< q| jjjrF| 	 \}}	nd }}	| jj
|dd|||	d}
|
durl|
jrl|rl| j|dd | j| | jdu rx| | dS dS )zHandle an opening tag, e.g. '<tag>'

        :param handle_empty_element: True if this tag is known to be
            an empty-element tag (i.e. there is not expected to be any
            closing tag).
        N )
sourceline	sourceposF)check_already_closed)r2   r+   r)   r'   r   _DuplicateAttributeHandlerr,   r1   store_line_numbersgetposrA   is_empty_elementrB   r4   append_root_tag_name_root_tag_encountered)r6   r=   r>   r@   	attr_dictkeyvalueon_duperF   rG   tagObjr7   r7   r8   rA      s2   






z'BeautifulSoupHTMLParser.handle_starttagrH   c                 C  s.   |r|| j v r| j | dS | j| dS )zHandle a closing tag, e.g. '</tag>'

        :param tag: A tag name.
        :param check_already_closed: True if this tag is expected to
           be the closing portion of an empty-element tag,
           e.g. '<tag></tag>'.
        N)r4   remover,   rB   )r6   r=   rH   r7   r7   r8   rB      s   	z%BeautifulSoupHTMLParser.handle_endtagdatac                 C  s   | j | dS )z4Handle some textual data that shows up between tags.N)r,   handle_datar6   rV   r7   r7   r8   rW      s   z#BeautifulSoupHTMLParser.handle_datanamec                 C  sh   | drt|dd}n| drt|dd}nt|}t|\}}|r-d| j_| | dS )zHandle a numeric character reference by converting it to the
        corresponding Unicode character and treating it as textual
        data.

        :param name: Character number, possibly in hexadecimal.
        x   XTN)
startswithintlstripr   numeric_character_referencer,   contains_replacement_charactersrW   )r6   rY   	real_namerV   replacement_addedr7   r7   r8   handle_charref   s   

z&BeautifulSoupHTMLParser.handle_charrefc                 C  s0   t j|}|dur|}nd| }| | dS )zHandle a named entity reference by converting it to the
        corresponding Unicode character(s) and treating it as textual
        data.

        :param name: Name of the entity reference.
        Nz&%s)r   HTML_ENTITY_TO_CHARACTERgetrW   )r6   rY   	characterrV   r7   r7   r8   handle_entityref   s
   z(BeautifulSoupHTMLParser.handle_entityrefc                 C  s&   | j   | j | | j t dS )zOHandle an HTML comment.

        :param data: The text of the comment.
        N)r,   endDatarW   r   rX   r7   r7   r8   handle_comment  s   
z&BeautifulSoupHTMLParser.handle_commentdeclc                 C  s6   | j   |tdd }| j | | j t dS )zYHandle a DOCTYPE declaration.

        :param data: The text of the declaration.
        zDOCTYPE N)r,   ri   lenrW   r   )r6   rk   r7   r7   r8   handle_decl  s   
z#BeautifulSoupHTMLParser.handle_declc                 C  sN   |  drt}|tdd }nt}| j  | j| | j| dS )z{Handle a declaration of unknown type -- probably a CDATA block.

        :param data: The text of the declaration.
        zCDATA[N)upperr]   r   rl   r   r,   ri   rW   )r6   rV   clsr7   r7   r8   unknown_decl   s   
z$BeautifulSoupHTMLParser.unknown_declc                 C  s0   | j   | j | | | | j t dS )z\Handle a processing instruction.

        :param data: The text of the instruction.
        N)r,   ri   rW   _document_might_be_xmlr   rX   r7   r7   r8   	handle_pi/  s   

z!BeautifulSoupHTMLParser.handle_piN)r,   r   r-   r   r+   r.   r/   r   )r9   r&   r:   r;   )r=   r&   r>   r?   r:   r;   )T)r=   r&   r>   r?   r@   rD   r:   r;   )r=   r&   rH   rD   r:   r;   )rV   r&   r:   r;   )rY   r&   r:   r;   )rk   r&   r:   r;   )__name__
__module____qualname__r'   __annotations__r)   r3   r<   rC   rA   rB   rW   rd   rh   rj   rm   rp   rr   r7   r7   r7   r8   r$   =   s*   
 

>




	

r$   c                      s   e Zd ZU dZdZded< dZded< eZded< ee	e
gZd	ed
< ded< dZded< 		d&d' fddZ			d(d)dd Zefd*d$d%Z  ZS )+r   zA Beautiful soup `bs4.builder.TreeBuilder` that uses the
    :py:class:`html.parser.HTMLParser` parser, found in the Python
    standard library.

    FrD   is_xmlT	picklabler&   NAMEzIterable[str]featuresz$Tuple[Iterable[Any], Dict[str, Any]]parser_argsTRACKS_LINE_NUMBERSNOptional[Iterable[Any]]parser_kwargsOptional[Dict[str, Any]]r/   r   c                   sp   t  }dD ]}||v r||}|||< qtt| jdi | |p#g }|p'i }|| d|d< ||f| _dS )a  Constructor.

        :param parser_args: Positional arguments to pass into
            the BeautifulSoupHTMLParser constructor, once it's
            invoked.
        :param parser_kwargs: Keyword arguments to pass into
            the BeautifulSoupHTMLParser constructor, once it's
            invoked.
        :param kwargs: Keyword arguments for the superclass constructor.
        r*   Fconvert_charrefsNr7   )dictpopsuperr   r3   updater{   )r6   r{   r~   r/   extra_parser_kwargsargrR   	__class__r7   r8   r3   K  s   

zHTMLParserTreeBuilder.__init__markupr#   user_specified_encodingOptional[_Encoding]document_declared_encodingexclude_encodingsOptional[_Encodings]r:   DIterable[Tuple[str, Optional[_Encoding], Optional[_Encoding], bool]]c                 c  s    t |tr|dddfV  dS g }|r|| g }|r!|| t|||d|d}|jdu r3td|j|j|j|jfV  dS )a2  Run any preliminary steps necessary to make incoming markup
        acceptable to the parser.

        :param markup: Some markup -- probably a bytestring.
        :param user_specified_encoding: The user asked to try this encoding.
        :param document_declared_encoding: The markup itself claims to be
            in this encoding.
        :param exclude_encodings: The user asked _not_ to try any of
            these encodings.

        :yield: A series of 4-tuples: (markup, encoding, declared encoding,
             has undergone character replacement)

            Each 4-tuple represents a strategy for parsing the document.
            This TreeBuilder uses Unicode, Dammit to convert the markup
            into Unicode, so the ``markup`` element of the tuple will
            always be a string.
        NFT)known_definite_encodingsuser_encodingsis_htmlr   zPCould not convert input to Unicode, and html.parser will not accept bytestrings.)	
isinstancer&   rM   r   unicode_markupr   original_encodingdeclared_html_encodingra   )r6   r   r   r   r   r   r   dammitr7   r7   r8   prepare_markupi  s4   




z$HTMLParserTreeBuilder.prepare_markup_parser_classtype[BeautifulSoupHTMLParser]r;   c              
   C  s   | j \}}t|tsJ | jdusJ || jg|R i |}z|| |  W n ty: } zt|d}~ww g |_dS )z
        :param markup: The markup to feed into the parser.
        :param _parser_class: An HTMLParser subclass to use. This is only intended for use in unit tests.
        N)	r{   r   r&   r,   feedcloseAssertionErrorr   r4   )r6   r   r   r-   r/   parserer7   r7   r8   r     s   


zHTMLParserTreeBuilder.feed)NN)r{   r}   r~   r   r/   r   )NNN)
r   r#   r   r   r   r   r   r   r:   r   )r   r#   r   r   r:   r;   )rs   rt   ru   __doc__rw   rv   rx   
HTMLPARSERry   r   r   rz   r|   r3   r   r$   r   __classcell__r7   r7   r   r8   r   :  s    
 !H)0r   
__future__r   __license____all__html.parserr   typingr   r   r   r	   r
   r   r   r   r   r   r   bs4.elementr   r   r   r   r   r   
bs4.dammitr   r   bs4.builderr   r   r   r   bs4.exceptionsr   bs4r   r    bs4._typingr!   r"   r#   r   r&   rI   r$   r   r7   r7   r7   r8   <module>   s(   4  ~