3 \AE@sddlZddlZddlZddlmZdgZejdZejdZejdZ ejdZ ejdZ ejd Z ejd Z ejd Zejd Zejd ejZejd ZejdZGdddejZdS)N)unescape HTMLParserz[&<]z &[a-zA-Z#]z%&([a-zA-Z][-.a-zA-Z0-9]*)[^a-zA-Z0-9]z)&#(?:[0-9]+|[xX][0-9a-fA-F]+)[^0-9a-fA-F]z <[a-zA-Z]>z--\s*>z+([a-zA-Z][^\t\n\r\f />\x00]*)(?:\s|/(?!>))*z]((?<=[\'"\s/])[^\s/>][^\s/=>]*)(\s*=+\s*(\'[^\']*\'|"[^"]*"|(?![\'"])[^>\s]*))?(?:\s|/(?!>))*aF <[a-zA-Z][^\t\n\r\f />\x00]* # tag name (?:[\s/]* # optional whitespace before attribute name (?:(?<=['"\s/])[^\s/>][^\s/=>]* # attribute name (?:\s*=+\s* # value indicator (?:'[^']*' # LITA-enclosed value |"[^"]*" # LIT-enclosed value |(?!['"])[^>\s]* # bare value ) (?:\s*,)* # possibly followed by a comma )?(?:\s|/(?!>))* )* )? \s* # trailing whitespace z#c@seZdZd9ZddddZddZd d Zd d Zd ZddZ ddZ ddZ ddZ ddZ d:ddZddZddZdd Zd!d"Zd#d$Zd%d&Zd'd(Zd)d*Zd+d,Zd-d.Zd/d0Zd1d2Zd3d4Zd5d6Zd7d8Zd S);rscriptstyleT)convert_charrefscCs||_|jdS)N)rreset)selfrr #/usr/lib64/python3.6/html/parser.py__init__WszHTMLParser.__init__cCs(d|_d|_t|_d|_tjj|dS)Nz???)rawdatalasttaginteresting_normal interesting cdata_elem _markupbase ParserBaser)r r r r r`s zHTMLParser.resetcCs|j||_|jddS)Nr)rgoahead)r datar r r feedhs zHTMLParser.feedcCs|jddS)N)r)r r r r closeqszHTMLParser.closeNcCs|jS)N)_HTMLParser__starttag_text)r r r r get_starttag_textwszHTMLParser.get_starttag_textcCs$|j|_tjd|jtj|_dS)Nz )lowerrrecompileIr)r elemr r r set_cdata_mode{s zHTMLParser.set_cdata_modecCst|_d|_dS)N)rrr)r r r r clear_cdata_modeszHTMLParser.clear_cdata_modec CsL|j}d}t|}x||kr|jr||j r||jd|}|dkr|jdt||d}|dkrvtjdj || rvP|}n(|j j ||}|r|j }n |jrP|}||kr|jr|j r|j t |||n|j ||||j||}||krP|j}|d|rLtj||r&|j|} n|d|r>|j|} nl|d|rV|j|} nT|d|rn|j|} n<|d |r|j|} n$|d |kr|j d|d } nP| dkr>|sP|jd |d } | dkr|jd|d } | dkr|d } n| d 7} |jr,|j r,|j t ||| n|j ||| |j|| }q|d |rtj||}|r|jd d} |j| |j} |d| d s| d } |j|| }qn:d||dkr|j |||d |j||d }Pq|d|rtj||}|rN|jd } |j| |j} |d| d s@| d } |j|| }qtj||}|r|r|j||dkr|j} | |kr|} |j||d }Pn,|d |kr|j d|j||d }nPqqW|r:||kr:|j r:|jr|j r|j t |||n|j ||||j||}||d|_dS)Nr<&"z[\s;]z r(r(r()rrP)rcheck_for_whole_start_tagrtagfind_tolerantr2r;r9rrattrfind_tolerantrappendstripZgetposcountr)r+r/endswithhandle_startendtaghandle_starttagCDATA_CONTENT_ELEMENTSr!)r r?endposrattrsr2rBtagmZattrnamerestZ attrvaluer;linenooffsetr r r r3-sP   (*         zHTMLParser.parse_starttagcCs|j}tj||}|r|j}|||d}|dkr>|dS|dkr~|jd|rZ|dS|jd|rjd S||krv|S|dS|dkrd S|dkrd S||kr|S|dStddS) Nrr/z/>r&r z6abcdefghijklmnopqrstuvwxyz=/ABCDEFGHIJKLMNOPQRSTUVWXYZzwe should not get here!r(r(r()rlocatestarttagend_tolerantr2r;r0AssertionError)r r?rr_rAnextr r r rR`s.   z$HTMLParser.check_for_whole_start_tagcCs|j}tj||d}|sdS|j}tj||}|s|jdk rV|j||||Stj||d}|s|||ddkr|dS|j |S|j dj }|j d|j}|j ||dS|j dj }|jdk r||jkr|j||||S|j |j |j|S)Nrr&rEzrr()r endendtagr-r; endtagfindr2rr/rSrHr9rr* handle_endtagr")r r?rr2rIZ namematchZtagnamer r r r r4s6      zHTMLParser.parse_endtagcCs|j|||j|dS)N)rZri)r r^r]r r r rYs zHTMLParser.handle_startendtagcCsdS)Nr )r r^r]r r r rZszHTMLParser.handle_starttagcCsdS)Nr )r r^r r r riszHTMLParser.handle_endtagcCsdS)Nr )r rCr r r r:szHTMLParser.handle_charrefcCsdS)Nr )r rCr r r r=szHTMLParser.handle_entityrefcCsdS)Nr )r rr r r r/szHTMLParser.handle_datacCsdS)Nr )r rr r r rJszHTMLParser.handle_commentcCsdS)Nr )r Zdeclr r r rGszHTMLParser.handle_declcCsdS)Nr )r rr r r rMszHTMLParser.handle_picCsdS)Nr )r rr r r unknown_declszHTMLParser.unknown_declcCstjdtddt|S)NzZThe unescape method is deprecated and will be removed in 3.5, use html.unescape() instead.r&) stacklevel)warningswarnDeprecationWarningr)r sr r r rs zHTMLParser.unescape)rr)r)__name__ __module__ __qualname__r[r rrrrrr!r"rr7rHr6r3rRr4rYrZrir:r=r/rJrGrMrjrr r r r r?s6  z  3"()rrlrZhtmlr__all__rrr>r<r8r1rLZ commentcloserSrTVERBOSErdrgrhrrr r r r  s&