usr/lib64/python2.7/HTMLParser.pyo000064400000032143147207526050012532 0ustar00 mfc@sdZddlZddlZejdZejdZejdZejdZejdZejdZ ejd Z ejd Z ejd Z ejd Z ejd ejZejdZejdZdefdYZdejfdYZdS(sA parser for HTML and XHTML.iNs[&<]s &[a-zA-Z#]s%&([a-zA-Z][-.a-zA-Z0-9]*)[^a-zA-Z0-9]s)&#(?:[0-9]+|[xX][0-9a-fA-F]+)[^0-9a-fA-F]s <[a-zA-Z]t>s--\s*>s(([a-zA-Z][-.a-zA-Z0-9:_]*)(?:\s|/(?!>))*s[a-zA-Z][^ />]*s]((?<=[\'"\s/])[^\s/>][^\s/=>]*)(\s*=+\s*(\'[^\']*\'|"[^"]*"|(?![\'"])[^>\s]*))?(?:\s|/(?!>))*s <[a-zA-Z][-.a-zA-Z0-9:_]* # tag name (?:[\s/]* # optional whitespace before attribute name (?:(?<=['"\s/])[^\s/>][^\s/=>]* # attribute name (?:\s*=+\s* # value indicator (?:'[^']*' # LITA-enclosed value |"[^"]*" # LIT-enclosed value |(?!['"])[^>\s]* # bare value ) )?(?:\s|/(?!>))* )* )? \s* # trailing whitespace s#tHTMLParseErrorcBs#eZdZddZdZRS(s&Exception raised for all parse errors.cCs'||_|d|_|d|_dS(Nii(tmsgtlinenotoffset(tselfRtposition((s"/usr/lib64/python2.7/HTMLParser.pyt__init__9s  cCsW|j}|jdk r,|d|j}n|jdk rS|d|jd}n|S(Ns , at line %ds , column %di(RRtNoneR(Rtresult((s"/usr/lib64/python2.7/HTMLParser.pyt__str__?s  N(NN(t__name__t __module__t__doc__RRR (((s"/usr/lib64/python2.7/HTMLParser.pyR6s t HTMLParsercBs eZdZdZdZdZdZdZdZdZ dZ d Z d Z d Zd Zd dZdZdZdZdZdZdZdZdZdZdZdZdZdZdZdZdZ RS( sFind tags and other markup and call handler functions. Usage: p = HTMLParser() p.feed(data) ... p.close() Start tags are handled by calling self.handle_starttag() or self.handle_startendtag(); end tags by self.handle_endtag(). The data between tags is passed from the parser to the derived class by calling self.handle_data() with the data as argument (the data may be split up in arbitrary chunks). Entity references are passed by calling self.handle_entityref() with the entity reference as the argument. Numeric character references are passed to self.handle_charref() with the string containing the reference as the argument. tscripttstylecCs|jdS(s#Initialize and reset this instance.N(treset(R((s"/usr/lib64/python2.7/HTMLParser.pyR_scCs8d|_d|_t|_d|_tjj|dS(s1Reset this instance. Loses all unprocessed data.ts???N( trawdatatlasttagtinteresting_normalt interestingRt cdata_elemt markupbaset ParserBaseR(R((s"/usr/lib64/python2.7/HTMLParser.pyRcs     cCs!|j||_|jddS(sFeed data to the parser. Call this as often as you want, with as little or as much text as you want (may include '\n'). iN(Rtgoahead(Rtdata((s"/usr/lib64/python2.7/HTMLParser.pytfeedkscCs|jddS(sHandle any buffered data.iN(R(R((s"/usr/lib64/python2.7/HTMLParser.pytclosetscCst||jdS(N(Rtgetpos(Rtmessage((s"/usr/lib64/python2.7/HTMLParser.pyterrorxscCs|jS(s)Return full source of start tag: '<...>'.(t_HTMLParser__starttag_text(R((s"/usr/lib64/python2.7/HTMLParser.pytget_starttag_text}scCs2|j|_tjd|jtj|_dS(Ns (tlowerRtretcompiletIR(Rtelem((s"/usr/lib64/python2.7/HTMLParser.pytset_cdata_modescCst|_d|_dS(N(RRRR(R((s"/usr/lib64/python2.7/HTMLParser.pytclear_cdata_modes c Csb|j}d}t|}x||kr |jj||}|rT|j}n|jraPn|}||kr|j|||!n|j||}||krPn|j}|d|r7t j ||r|j |}n|d|r |j |}n|d|r*|j |}nm|d|rK|j|}nL|d|rl|j|}n+|d|kr|jd|d}nP|dkr"|sPn|jd|d}|dkr|jd|d}|dkr |d}q n |d7}|j|||!n|j||}q|d |rtj ||}|r|jd d !} |j| |j}|d |ds|d}n|j||}qqd ||kr|j|dd !|j|d }nPq|d |rtj ||}|r|jd} |j| |j}|d |dsn|d}n|j||}qntj ||}|r|r|j||kr|jdnPq|d|kr|jd |j||d}qPqqW|rQ||krQ|j rQ|j|||!|j||}n|||_dS(NitRtitnR4tjR2tktname((s"/usr/lib64/python2.7/HTMLParser.pyRs                   cCs|j}|||d!dkr0|jdn|||d!dkrT|j|S|||d!dkrx|j|S|||d!jd kr|jd |d}|d krd S|j||d|!|d S|j|SdS( Nis(RRBRR4RD((s"/usr/lib64/python2.7/HTMLParser.pyR8 s   cCsnd|_|j|}|dkr(|S|j}|||!|_g}tj||d}|j}|jdj|_ }x||krut j||}|sPn|jddd\} } } | sd} nX| d dko| dkns%| d dko | dknr5| dd!} n| rM|j | } n|j | j| f|j}qW|||!j } | d kr|j\} }d |jkr| |jjd } t|j|jjd }n|t|j}|j|||!|S| jd r;|j||n/|j||||jkrj|j|n|S( Niiiis'it"Rs/>s (Rs/>(RR!tcheck_for_whole_start_tagRttagfindR4R>R<R#RtattrfindtunescapetappendtstripRtcountR-trfindR0tendswiththandle_startendtagthandle_starttagtCDATA_CONTENT_ELEMENTSR((RRBtendposRtattrsR4REttagtmtattrnametrestt attrvalueR>RR((s"/usr/lib64/python2.7/HTMLParser.pyR5sP     $$  cCs|j}tj||}|r|j}|||d!}|dkrR|dS|dkr|jd|rx|dS|jd|rdS|j||d|jdn|dkrdS|d krdS||kr|S|dSntd dS( NiRt/s/>iismalformed empty start tagRs6abcdefghijklmnopqrstuvwxyz=/ABCDEFGHIJKLMNOPQRSTUVWXYZswe should not get here!(RtlocatestarttagendR4R>R2R1R tAssertionError(RRBRR`RDtnext((s"/usr/lib64/python2.7/HTMLParser.pyRQKs,        cCsg|j}tj||d}|s)dS|j}tj||}|s|jdk rt|j|||!|St j||d}|s|||d!dkr|dS|j |Sn|j j }|j d|j}|j||dS|j dj }|jdk rL||jkrL|j|||!|Sn|j||j|S(NiiiisR(Rt endendtagR.R>t endtagfindR4RRR0ttagfind_tolerantRIR<R#R:t handle_endtagR)(RRBRR4RJt namematchttagnameR'((s"/usr/lib64/python2.7/HTMLParser.pyR6ks6     cCs!|j|||j|dS(N(R[Rk(RR_R^((s"/usr/lib64/python2.7/HTMLParser.pyRZscCsdS(N((RR_R^((s"/usr/lib64/python2.7/HTMLParser.pyR[scCsdS(N((RR_((s"/usr/lib64/python2.7/HTMLParser.pyRkscCsdS(N((RRF((s"/usr/lib64/python2.7/HTMLParser.pyR=scCsdS(N((RRF((s"/usr/lib64/python2.7/HTMLParser.pyR@scCsdS(N((RR((s"/usr/lib64/python2.7/HTMLParser.pyR0scCsdS(N((RR((s"/usr/lib64/python2.7/HTMLParser.pyRKscCsdS(N((Rtdecl((s"/usr/lib64/python2.7/HTMLParser.pyRHscCsdS(N((RR((s"/usr/lib64/python2.7/HTMLParser.pyROscCsdS(N((RR((s"/usr/lib64/python2.7/HTMLParser.pyt unknown_declscs2d|kr|Sfd}tjd||S(NR,cs|jd}yZ|ddkri|d}|dd krSt|dd}n t|}t|SWntk rd|dSXd dl}tjdkrid d 6}t_x0|jj D]\}}t|||s&