�`^c@sdZddlZddlZejd�Zejd�Zejd�Zejd�Zejd�Zejd�Z ejd �Z
ejd
�Zejd�Zejd�Z
ejd
ej�Zejd�Zejd�Zdefd��YZdejfd��YZdS(sA parser for HTML and XHTML.i�Ns[&<]s
&[a-zA-Z#]s%&([a-zA-Z][-.a-zA-Z0-9]*)[^a-zA-Z0-9]s)&#(?:[0-9]+|[xX][0-9a-fA-F]+)[^0-9a-fA-F]s <[a-zA-Z]t>s--\s*>s$([a-zA-Z][^
/>]*)(?:\s|/(?!>))*s[a-zA-Z][^
/>]*s]((?<=[\'"\s/])[^\s/>][^\s/=>]*)(\s*=+\s*(\'[^\']*\'|"[^"]*"|(?![\'"])[^>\s]*))?(?:\s|/(?!>))*s
<[a-zA-Z][^\t\n\r\f />\x00]* # tag name
(?:[\s/]* # optional whitespace before attribute name
(?:(?<=['"\s/])[^\s/>][^\s/=>]* # attribute name
(?:\s*=+\s* # value indicator
(?:'[^']*' # LITA-enclosed value
|"[^"]*" # LIT-enclosed value
|(?!['"])[^>\s]* # bare value
)
)?(?:\s|/(?!>))*
)*
)?
\s* # trailing whitespace
s#</\s*([a-zA-Z][-.a-zA-Z0-9:_]*)\s*>tHTMLParseErrorcBs#eZdZdd�Zd�ZRS(s&Exception raised for all parse errors.cCs3|st�||_|d|_|d|_dS(Nii(tAssertionErrortmsgtlinenotoffset(tselfRtposition((s /sys/lib/python2.7/HTMLParser.pyt__init__<s
cCsW|j}|jdk r,|d|j}n|jdk rS|d|jd}n|S(Ns, at line %ds, column %di(RRtNoneR(Rtresult((s /sys/lib/python2.7/HTMLParser.pyt__str__Bs N(NN(t__name__t
__module__t__doc__R RR(((s /sys/lib/python2.7/HTMLParser.pyR9st
HTMLParsercBs
eZdZdZd�Zd�Zd�Zd�Zd�ZdZ
d�Zd �Zd
�Z
d�Zd�Zd
d�Zd�Zd�Zd�Zd�Zd�Zd�Zd�Zd�Zd�Zd�Zd�Zd�Zd�Zd�ZdZd�Z RS( s�Find tags and other markup and call handler functions.
Usage:
p = HTMLParser()
p.feed(data)
...
p.close()
Start tags are handled by calling self.handle_starttag() or
self.handle_startendtag(); end tags by self.handle_endtag(). The
data between tags is passed from the parser to the derived class
by calling self.handle_data() with the data as argument (the data
may be split up in arbitrary chunks). Entity references are
passed by calling self.handle_entityref() with the entity
reference as the argument. Numeric character references are
passed to self.handle_charref() with the string containing the
reference as the argument.
tscripttstylecCs|j�dS(s#Initialize and reset this instance.N(treset(R((s /sys/lib/python2.7/HTMLParser.pyRbscCs8d|_d|_t|_d|_tjj|�dS(s1Reset this instance. Loses all unprocessed data.ts???N( trawdatatlasttagtinteresting_normaltinterestingR t
cdata_elemt
markupbaset
ParserBaseR(R((s /sys/lib/python2.7/HTMLParser.pyRfs
cCs!|j||_|jd�dS(s�Feed data to the parser.
Call this as often as you want, with as little or as much text
as you want (may include '\n').
iN(Rtgoahead(Rtdata((s /sys/lib/python2.7/HTMLParser.pytfeednscCs|jd�dS(sHandle any buffered data.iN(R(R((s /sys/lib/python2.7/HTMLParser.pytclosewscCst||j���dS(N(Rtgetpos(Rtmessage((s /sys/lib/python2.7/HTMLParser.pyterror{scCs|jS(s)Return full source of start tag: '<...>'.(t_HTMLParser__starttag_text(R((s /sys/lib/python2.7/HTMLParser.pytget_starttag_text�scCs2|j�|_tjd|jtj�|_dS(Ns</\s*%s\s*>(tlowerRtretcompiletIR(Rtelem((s /sys/lib/python2.7/HTMLParser.pytset_cdata_mode�scCst|_d|_dS(N(RRR R(R((s /sys/lib/python2.7/HTMLParser.pytclear_cdata_mode�s c
Cs||j}d}t|�}x||kr%|jj||�}|rT|j�}n|jraPn|}||kr�|j|||!�n|j||�}||kr�Pn|j}|d|�r7t j
||�r�j|�}n�|d|�r |j|�}n�|d|�r*|j
|�}nm|d|�rK|j|�}nL|d|�rl|j|�}n+|d|kr�|jd�|d}nP|dkr"|s�Pn|jd|d�}|dkr|jd|d�}|dkr|d}qn
|d7}|j|||!�n|j||�}q|d |�rtj
||�}|r�|j�d
d!} |j| �|j�}|d|d�s�|d}n|j||�}qq"d||kr|j|||d
!�|j||d
�}nPq|d
|�rtj
||�}|r�|jd�} |j| �|j�}|d|d�sv|d}n|j||�}qntj
||�}|r�|r�|j�||kr�|jd�nPq"|d|kr|jd
�|j||d�}q"Pqdstd��qW|rk||krk|jrk|j|||!�|j||�}n|||_dS(Nit<s</s<!--s<?s<!iRs&#ii�t;t&s#EOF in middle of entity or char refsinteresting.search() lied(RtlenRtsearchtstartRthandle_datat updatepost
startswithtstarttagopentmatchtparse_starttagtparse_endtagt
parse_commenttparse_pitparse_html_declarationtfindtcharreftgroupthandle_charreftendt entityrefthandle_entityreft
incompleteR!R(
RR?RtitnR5tjR3tktname((s /sys/lib/python2.7/HTMLParser.pyR�s�
cCs�|j}|||d!dkr0|jd�n|||d!dkrT|j|�S|||d!dkrx|j|�S|||d!j�d kr�|jd
|d�}|dkr�dS|j||d|!�|dS|j|�SdS(
Nis<!s+unexpected call to parse_html_declaration()is<!--is<![i s <!doctypeRi�i(RR!R8tparse_marked_sectionR$R;thandle_decltparse_bogus_comment(RRCRtgtpos((s /sys/lib/python2.7/HTMLParser.pyR:�s
icCs|j}|||d!dkr0|jd�n|jd|d�}|dkrVdS|rw|j||d|!�n|dS( Nis<!s</s"unexpected call to parse_comment()Ri�i(s<!s</(RR!R;thandle_comment(RRCtreportRtpos((s /sys/lib/python2.7/HTMLParser.pyRJs cCs�|j}|||d!dks,td��tj||d�}|sLdS|j�}|j||d|!�|j�}|S(Nis<?sunexpected call to parse_pi()i�(RRtpicloseR/R0t handle_piR?(RRCRR5RE((s /sys/lib/python2.7/HTMLParser.pyR9s #cCs�d|_|j|�}|dkr(|S|j}|||!|_g}tj||d�}|sotd��|j�}|jd�j �|_
}x�|kr�tj||�}|s�Pn|jddd�\} }
}|
s�}nX|d dko|dkns7|d dko2|dknrG|dd!}n|r_|j|�}n|j
| j �|f�|j�}q�W|||!j�}|dkr+|j�\}
}d|jkr|
|jjd�}
t|j�|jjd�}n|t|j�}|j|||!�|S|jd
�rM|j||�n/|j||�||jkr||j|�n|S(
Niis#unexpected call to parse_starttag()iis'i�t"Rs/>s
(Rs/>(R R"tcheck_for_whole_start_tagRttagfindR5RR?R=R$RtattrfindtunescapetappendtstripRtcountR.trfindR1tendswiththandle_startendtagthandle_starttagtCDATA_CONTENT_ELEMENTSR)(RRCtendposRtattrsR5RFttagtmtattrnametrestt attrvalueR?RR((s /sys/lib/python2.7/HTMLParser.pyR6sR $$cCs�|j}tj||�}|r�j�}|||d!}|dkrR|dS|dkr�|jd|�rx|dS|jd|�r�dS|j||d�|jd�n|dkr�dS|d kr�dS||kr�S|dSntd
��dS(NiRt/s/>ii�smalformed empty start tagRs6abcdefghijklmnopqrstuvwxyz=/ABCDEFGHIJKLMNOPQRSTUVWXYZswe should not get here!(RtlocatestarttagendR5R?R3R2R!R(RRCRRaREtnext((s /sys/lib/python2.7/HTMLParser.pyRRNs, cCs�|j}|||d!dks,td��tj||d�}|sLdS|j�}tj||�}|s$|jdk r�|j |||!�|St
j||d�}|s�||d!dkr�|dS|j|�Sn|jd�j
�}|jd|j��}|j|�|dS|jd�j
�}|jdk rr||jkrr|j |||!�|Sn|j|�|j�|S( Nis</sunexpected call to parse_endtagii�is</>R(RRt endendtagR/R?t
endtagfindR5RR R1RSRJR=R$R;t
handle_endtagR*(RRCRR5RKt namematchttagnameR(((s /sys/lib/python2.7/HTMLParser.pyR7ns8 #
cCs!|j||�|j|�dS(N(R\Rj(RR`R_((s /sys/lib/python2.7/HTMLParser.pyR[�scCsdS(N((RR`R_((s /sys/lib/python2.7/HTMLParser.pyR\�scCsdS(N((RR`((s /sys/lib/python2.7/HTMLParser.pyRj�scCsdS(N((RRG((s /sys/lib/python2.7/HTMLParser.pyR>�scCsdS(N((RRG((s /sys/lib/python2.7/HTMLParser.pyRA�scCsdS(N((RR((s /sys/lib/python2.7/HTMLParser.pyR1�scCsdS(N((RR((s /sys/lib/python2.7/HTMLParser.pyRL�scCsdS(N((Rtdecl((s /sys/lib/python2.7/HTMLParser.pyRI�scCsdS(N((RR((s /sys/lib/python2.7/HTMLParser.pyRP�scCsdS(N((RR((s /sys/lib/python2.7/HTMLParser.pytunknown_decl�scs2d|kr|S�fd�}tjd||�S(NR-cs|j�d}yZ|ddkri|d}|dd
krSt|dd�}nt|�}t|�SWntk
r�d|dSXd dl}tjdkr�d
d6}t_x0|jj �D]\}}t|�||<q�Wny�j|SWnt
k
rd|dSXdS(Nit#itxtXis&#R,i�u'taposR-(RpRq(tgroupstinttunichrt
ValueErrorthtmlentitydefsRt
entitydefsR tname2codepointt iteritemstKeyError(tstcRwRxRFtv(R(s /sys/lib/python2.7/HTMLParser.pytreplaceEntities�s&
s#&(#?[xX]?(?:[0-9a-fA-F]+|\w{1,8}));(R%tsub(RR|R((Rs /sys/lib/python2.7/HTMLParser.pyRU�s(RRN(!RR
RR]RRRRR!R R"R#R)R*RR:RJR9R6RRR7R[R\RjR>RAR1RLRIRPRnRxRU(((s /sys/lib/python2.7/HTMLParser.pyRKs< ^ 4 ( (RRR%R&RRBR@R<R4ROtcommentcloseRSttagfind_tolerantRTtVERBOSERfRhRit ExceptionRRR(((s /sys/lib/python2.7/HTMLParser.pyt<module>s&
|