mfc@sdZddlZddlZejdZejdZejdZejdZejdZejdZ ejd Z ejd Z ejd Z ejd Z ejd ejZejdZejdZdefdYZdejfdYZdS(sA parser for HTML and XHTML.iNs[&<]s &[a-zA-Z#]s%&([a-zA-Z][-.a-zA-Z0-9]*)[^a-zA-Z0-9]s)&#(?:[0-9]+|[xX][0-9a-fA-F]+)[^0-9a-fA-F]s <[a-zA-Z]t>s--\s*>s(([a-zA-Z][-.a-zA-Z0-9:_]*)(?:\s|/(?!>))*s[a-zA-Z][^ />]*s]((?<=[\'"\s/])[^\s/>][^\s/=>]*)(\s*=+\s*(\'[^\']*\'|"[^"]*"|(?![\'"])[^>\s]*))?(?:\s|/(?!>))*s <[a-zA-Z][-.a-zA-Z0-9:_]* # tag name (?:[\s/]* # optional whitespace before attribute name (?:(?<=['"\s/])[^\s/>][^\s/=>]* # attribute name (?:\s*=+\s* # value indicator (?:'[^']*' # LITA-enclosed value |"[^"]*" # LIT-enclosed value |(?!['"])[^>\s]* # bare value ) )?(?:\s|/(?!>))* )* )? \s* # trailing whitespace s#tHTMLParseErrorcBs#eZdZddZdZRS(s&Exception raised for all parse errors.cCs3|s t||_|d|_|d|_dS(Nii(tAssertionErrortmsgtlinenotoffset(tselfRtposition((s"/usr/lib64/python2.7/HTMLParser.pyt__init__9s   cCsW|j}|jdk r,|d|j}n|jdk rS|d|jd}n|S(Ns , at line %ds , column %di(RRtNoneR(Rtresult((s"/usr/lib64/python2.7/HTMLParser.pyt__str__?s  N(NN(t__name__t __module__t__doc__R RR (((s"/usr/lib64/python2.7/HTMLParser.pyR6s t HTMLParsercBs eZdZdZdZdZdZdZdZdZ dZ d Z d Z d Zd Zd dZdZdZdZdZdZdZdZdZdZdZdZdZdZdZdZdZ RS( sFind tags and other markup and call handler functions. Usage: p = HTMLParser() p.feed(data) ... p.close() Start tags are handled by calling self.handle_starttag() or self.handle_startendtag(); end tags by self.handle_endtag(). The data between tags is passed from the parser to the derived class by calling self.handle_data() with the data as argument (the data may be split up in arbitrary chunks). Entity references are passed by calling self.handle_entityref() with the entity reference as the argument. Numeric character references are passed to self.handle_charref() with the string containing the reference as the argument. tscripttstylecCs|jdS(s#Initialize and reset this instance.N(treset(R((s"/usr/lib64/python2.7/HTMLParser.pyR_scCs8d|_d|_t|_d|_tjj|dS(s1Reset this instance. Loses all unprocessed data.ts???N( trawdatatlasttagtinteresting_normalt interestingR t cdata_elemt markupbaset ParserBaseR(R((s"/usr/lib64/python2.7/HTMLParser.pyRcs     cCs!|j||_|jddS(sFeed data to the parser. Call this as often as you want, with as little or as much text as you want (may include '\n'). iN(Rtgoahead(Rtdata((s"/usr/lib64/python2.7/HTMLParser.pytfeedkscCs|jddS(sHandle any buffered data.iN(R(R((s"/usr/lib64/python2.7/HTMLParser.pytclosetscCst||jdS(N(Rtgetpos(Rtmessage((s"/usr/lib64/python2.7/HTMLParser.pyterrorxscCs|jS(s)Return full source of start tag: '<...>'.(t_HTMLParser__starttag_text(R((s"/usr/lib64/python2.7/HTMLParser.pytget_starttag_text}scCs2|j|_tjd|jtj|_dS(Ns (tlowerRtretcompiletIR(Rtelem((s"/usr/lib64/python2.7/HTMLParser.pytset_cdata_modescCst|_d|_dS(N(RRR R(R((s"/usr/lib64/python2.7/HTMLParser.pytclear_cdata_modes c Cst|j}d}t|}x||kr|jj||}|rT|j}n|jraPn|}||kr|j|||!n|j||}||krPn|j}|d|r7t j ||r|j |}n|d|r |j |}n|d|r*|j |}nm|d|rK|j|}nL|d|rl|j|}n+|d|kr|jd|d}nP|dkr"|sPn|jd|d}|dkr|jd|d}|dkr |d}q n |d7}|j|||!n|j||}q|d |rtj ||}|r|jd d !} |j| |j}|d |ds|d}n|j||}qqd ||kr|j|dd !|j|d }nPq|d |rtj ||}|r|jd} |j| |j}|d |dsn|d}n|j||}qntj ||}|r|r|j||kr|jdnPq|d|kr|jd |j||d}qPqdstdqW|rc||krc|j rc|j|||!|j||}n|||_dS(Nits (Rs/>(R R"tcheck_for_whole_start_tagRttagfindR5RR?R=R$RtattrfindtunescapetappendtstripRtcountR.trfindR1tendswiththandle_startendtagthandle_starttagtCDATA_CONTENT_ELEMENTSR)(RRCtendposRtattrsR5RFttagtmtattrnametrestt attrvalueR?RR((s"/usr/lib64/python2.7/HTMLParser.pyR6sR     $$  cCs|j}tj||}|r|j}|||d!}|dkrR|dS|dkr|jd|rx|dS|jd|rdS|j||d|jdn|dkrdS|d krdS||kr|S|dSntd dS( NiRt/s/>iismalformed empty start tagRs6abcdefghijklmnopqrstuvwxyz=/ABCDEFGHIJKLMNOPQRSTUVWXYZswe should not get here!(RtlocatestarttagendR5R?R3R2R!R(RRCRRaREtnext((s"/usr/lib64/python2.7/HTMLParser.pyRRKs,        cCs|j}|||d!dks,tdtj||d}|sLdS|j}tj||}|s!|jdk r|j |||!|St j||d}|s|||d!dkr|dS|j |Sn|j j }|jd|j}|j||dS|j dj }|jdk ro||jkro|j |||!|Sn|j||j|S( NisR(RRt endendtagR/R?t endtagfindR5RR R1ttagfind_tolerantRJR=R$R;t handle_endtagR*(RRCRR5RKt namematchttagnameR(((s"/usr/lib64/python2.7/HTMLParser.pyR7ks8 #    cCs!|j|||j|dS(N(R\Rk(RR`R_((s"/usr/lib64/python2.7/HTMLParser.pyR[scCsdS(N((RR`R_((s"/usr/lib64/python2.7/HTMLParser.pyR\scCsdS(N((RR`((s"/usr/lib64/python2.7/HTMLParser.pyRkscCsdS(N((RRG((s"/usr/lib64/python2.7/HTMLParser.pyR>scCsdS(N((RRG((s"/usr/lib64/python2.7/HTMLParser.pyRAscCsdS(N((RR((s"/usr/lib64/python2.7/HTMLParser.pyR1scCsdS(N((RR((s"/usr/lib64/python2.7/HTMLParser.pyRLscCsdS(N((Rtdecl((s"/usr/lib64/python2.7/HTMLParser.pyRIscCsdS(N((RR((s"/usr/lib64/python2.7/HTMLParser.pyRPscCsdS(N((RR((s"/usr/lib64/python2.7/HTMLParser.pyt unknown_declscs2d|kr|Sfd}tjd||S(NR-cs|jd}yZ|ddkri|d}|dd krSt|dd}n t|}t|SWntk rd|dSXd dl}tjdkrid d 6}t_x0|jj D]\}}t|||RAR1RLRIRPRoRyRU(((s"/usr/lib64/python2.7/HTMLParser.pyRHs<        ^  4 (          (RRR%R&RRBR@R<R4ROt commentcloseRSRjRTtVERBOSERfRhRit ExceptionRRR(((s"/usr/lib64/python2.7/HTMLParser.pyts&