JFIF   ( %!1!%)+...383,7(-.+  ++-+++++-++-++--+--+-+-------+-++-+--+---+++--+7+-+"F!1AQaq"2BRb#3Sr$CDsT&!Q1Aa"2Rbq ?򉄘ǷLR HR,nNb .&W)fJbMOYxj-\bT2(4CQ"qiC/ " %0Jl"e2V  0SDd2@TV^{cW&F͉x9#l,.XɳvRZ C8S 6ml!@!E! `FS!M #(d)Q lml1ml Ų&x(ʨ2NFmj@D<dN5UN˄uTB emLAy#` ` ` I!I 6āHBxL & J#7BQ.$hv h q+tC"EJ) 8R e2U2Y@j%6PF^4LnNBp"8)4JI-ֲvK ^؊)hz[T5˗",Rҥf8ڤS4ʘ!`D ` X+ L,(hl)*S##`6[`0*L T H*HA@I&&r1kr*r*)N$#L  1#ZFSl `[( ("((he`4 Ch [="A R / 0I`twCDcWh"i) cLad\BcLKHZ"ZEW$Ƚ@A~i^`S *A&h:+c Y6vϕGClRPs.`H`(@<$qDe pL@DpLX, E2MP A  `II m& AQ "AT rbg# g2!SiLj*3L \ G;TFL`K BMy 2S`YLh1 d >-"ZfD^Q DH" RAbEV#Lfq,(rETp64-IJ!*p4F$q;G8DQ/TKP2$jp3KW]FtLtƉ1ol]VBgػJH6 )h61GJR7Nj.Z4piJRDd]t]0dP]:N.b'⹙SvDSz]L,_#ugT&[~?cS^"{Bh{/=ۑxOk̳O59o dar793`)SeYM@\ "$E(Tm&)N2Ih)F5EDed(FS,Pa @!@#@lea HCD$11jCLJqcod S3yd*,lL+QEfsgW1nw)cT#dS HXkFJB"6(ʝH)H"#EZh:Y`khݳh%Sc<mlAko2]gDqQtro=3OƸU9_-t8UvW3sGəg*#:c)><"wc\ASmT|6Ę>9~#1Ƈ~ڒE1vVi# I MM#u$8W 5ǍfƬΜg*Qpi1ȩFOf۔S,/⎯(Lrմ`(Z LsbA \6 6dm[I=!r:REI.wgzG)ԇSbӑxuׇTyyL^e'x^ty4Z&eB]I|v59Jjhm;Ng񷫳n<ϞҼѝjk;׹DlY^ҍ\+x9V!j([cmS.NO6jxNζrm&oײizT$N>?~ Sl-:iڥk\at#E!CL`.O0a*w/WV7/r)DŽt7'Nĵ#7O1 ]{[/-2bA<$&Gm_4t)_>)mjG;V^'k59o>ɌM,ؾf9z6 4v_3T.5V/RD-5 %T5XTޫ4TaZ`U *ƱUƲ UG"5+sJJ2E9#܎kr2G3Bb,XM6H: ?@p!'\4V02aԙ) hbZ]:` ev3ʘ'}!ohȒ*TJjr[RFyQ*#{h{R]J]Lr-.D-.җfo$D ?X0%~1P.Og{cWϫ22&Ϭ_V.W3nmiOl}+!˫#`kR33aUb0-g:qmsέ+0HO|&nhOn+}n5QF_"gvLm/z'+r'n_oC语i|1}Gi|}_D~9JZ_%DVQp\koۅjAs~/c0ksUJi^W9W5!>?O:q|ˣSIB/&K<(lg(%Wg$|LW7vߤW߇q|jef3D H\S6(eJb*@&sTKTW/*@v:.N- @ITʓ1Zg&-eꓝM r]EMס{q$b]'7Z7N:O~lNlP7iͲk)$O^퉢<YSD*hr'Z#5e6t[Fdh AJǔP9P 1\R).Il+jI*,(ܢ22N*OwKFX gc?\mB7iA+εe8 "ġ/p5pW-$މ-[a 5ViAW/V{/&UsF./՞ҕ*)rZg.^_+gt_z-oAbqQn*WlHyZ*\TaEewlLR3ԹȭN}MM}aih"5ܕRT$:~'TcT|*)xGC>n+r{XU xuF"<~67у'fxlf`r3D*#Z1ђfH`2dIWo/qB| 63xxW6^m%Kvg>\>x>!H5Nr8J/FJ9Wx(Hou" S'kWاC\9ְ#^OaҮ+~gnkuЉ,aWU*1 읍jnb|e= :2.UL`Q}YS&gI.c=a`%j:C%2@^>])25/ܙ<lzwɛ)ݣS4h3=J tyϬ.E7 8ڞGZu\_JHsݢϑ}IZ"ӳ=X<Ɖ2{a:{7L+>V}c)*lo Yv&+|L;>+/Sj26K+澡*;>-s"}M2] Ig5aCL*r"&\} #^R.7_Mgf}.ߌy(}Z\gP&ʠHj%</{.]rߙQ`>;5g;u6dԛ %xb|oՋTJ5Ϥ(]XqP>f{Jk2,8'~ZU6tMQsg XKg^2ϓ3},[wo۴I|ܷ%[Ol\Pkr]Y//cg6U⧻/VПi8ys_n<\~cze!!H~x;QJZKȮ^ȧG|cS~8ji,Fo+,y~?pk)u /in3JmkX(Mj1N 4c Epc>BO *LfQO&` c;LjcYf 1ɻ)CLsY^Y5" lP/wuEln&dav,(;'W9ej ku`-KHI՟%ԁʁ 1\}?OjsF^Xn$Ё.օC>D:?I @aGE.ĩ1 $ et~T`߸Ir'RX.Zwc%~U=r>-UaFbǺ?R=Z?i'[ASS;siJrzy>nxu$[_B\4}:r'ҵj1_v-[;y?ֹ0I16 . M%4^!S&t ! h !zQð.bBT ?@]?CHq(rd!.$>/x+bnʎNN#w)` )*f!-ɂ\(طYLHzc`Uq7BfCcE0ԉ4Fم쏠ce5T r͸GVlФ?ѣ} mhrkly.Ts㷖)Mө S^%'g>wk%bP[}j~ǾV#K -Fgv켼ǨgɼeSz/6{M=BPZFu\Q75n3Iݤ.W9QfF{vJwF't[@iVj4G~KOnH߿_Do=.c.One?E+GfGN⧭H?4;u`ua|V-+j4?48n ɦ=-]puv&Jc}K>b%U x8pz6L8AXFsW]N55ҦbIWZQ7ï Ԗ3cjz匩ӺOTɖƴ%a'MI}cdR$ݚIζ̝ LIu>J3{^෠㜦˯xܿe\b"2y'x{ RDW b+o2KFhR0:U늞En>լRӉt Iڹ\ wշQEv"v;EJ)yl[5:F0=b4,\PqKtv4{bQz:>C7"8W#Zjdd| cjz%K %Z 9dD{=NFʳAƩtI)kS*s$`:A\ʬ*ֹ9{Nl|eJ١rQnM%z_#x_•TO><)kyD %GN<~y>vfǧB)F)c\lې(#\ h`fgfjTBdhhHL2Y0^ Y0^-"D!QaI15 m~ gՒd|;#gMn(P$l H.R2^PU")pN` N8󫅂OJ;^jz\uumJMF|ηq[]$Vrrt:Q^;QPkHՠ{]HwˆMuIr7!r&- j%"9LtUb56+^TWBqdhHAD7 HwKH^F3LIq #hK`]IWKiH?کǴeԥQ>g{^q^>HKoOB||8aݏS}{S_]ϸ/X~ܵw'OSPAf֩ܟ[>7 @[ֵ;G߇QU*Cթ *OKU^zz[fRnpcJX9u<iq8B]u8 ]I,;[G#2W.¸D8rPG Y%PBJ= wo;PJgx6;yB`3zZGPAͫy{5Nb_re*ONHR]Ji)U{Ӓ:qqɏ[mB4࢒I$ 2vpBADY`DIVAn"Bh$&&cMbdB 鮆wHR'E(ѸZA*H~{B M҅n\@N{7ISCp Vd( r+bg|ns:qg:|J|ɪV.UVaAS͓FyRuLѦT騬 `3􏳕{eo/Tz8DkW?,cl~TqLne֠[B*D +t 6˦S;5KjV3e WBrT.XSHm sl5F%NGM`Y )": J!W4]HTrPX2 QYɕ\m2VLd+`,^ѺiPztUGY6+cӧ6] U%u/ˈFOiB*nFF#ұJ Z/c')?Q͟5.8E~G6e<\?}GkhMFUظOqhEA - "`dQ#(4Ԧf VLmc@q5J8K; M^JZnn)9Zm\ qIJqS: i[9~Oaƒ]Z4F&+666( N]쁼LM(oyvUI/Χ[ھ]hTˉG".SeYgu;hRDtڬv=5 ׁqMS\Ȭi5D]1$*0UL1QY`QdLb[+z9";'yi`OT/4{@EZ'Y0>4I*d nM#5hі.vrM[]Ä;]\ʦS,叕DQZq0fӌI͋]TNK"#;?F;aURx_4WDm+F*0XJE@){ 1R-E2(@Qh l D rT.Q;[J;[`30`ɀ 2#=JeSsxRjG=`H rLJ@ Y$JaB2/x( "Id'6O0CI$:Ol+}I>[L|iK+]ZrH*2Aʶ uHRd)OrrbSx=5dmue1neܬ"e>Lw94勲u ҏ_4GuоJw]QtgSk(qW(6h|v= 1=P/\YZ|R>"*5W/ίR'o %R$5= .!VIRMf4*aR5nv% Usj:V Lj]Bn/TZ&.2„ܒBP)aYRʌW!#ErGf';tW$czI*\KI,c7Zc-ўj|p+-ђ{eg 2;R_{VLM]7sؒFmԻy853gҾqJG!E̤ӏqzs༿? U#R)ŧU(,>,&,-^e^۔.b EW^n<)\9.QeJuFiSh2"EL8yeCKQD\5R,D5.P]c1STt*ZFJ.T:N #%]M}khOe(͓iEMsɆ3( YF<"Ly^*[ry6.ɸm k݊iT%nM8 $Q#F# q 1*?% iS^4oܗ wWPS,aNޖxOxڽqp#F6&o,7LJuMΤK(Td{U Ƹf|q5U{3[FLNK6ӵQY5+'>Q3FSk).&:5z yZq/*q$d+Ge+$lO@Nڤy5eBvˌ䖥shS:JksgksF ꧸oi-FYxy9[Vȼĝ'_.[y2U*c?E+:TsWՀgOS> z75>ncߏ-Kz8ԋ,Ϧ70Z9_1h$Xiu10)0$+$! qsE4wRkh2*T.s%DH:`:=k.'WB{ ȮRGҷ7чVg)CHS}1ݍԳۂ<8g_4y*-Ml\]mZT)mJ~|k<6zWjf4'*u%RNRȉZA) .VLtp 4 V&mtJ#l˅;&{]8>TmhoLXOeD^_J>]jsSej﫦iOM SK([!Vc5zn-A@p]Ӄ \3kmK>#-sܧ?NLar@Js?…Xldny]݌E5•9.8hh69#7js׳R,'pqt:kgPhRԄ+ՕG9}="ֲ\kǁm R73pg$t3+o |o\]'ee5ɐ.7ѐ|ZعSF{qkx5-$Q h5*1yM$ 7)hJ2Kg`-hn*>)EYDIkBpȩAzfǪ>7O K#lߤg]:u~huُ۵u}(mjGIj܏6ES~/5CiRy|kVKGBޭ3;w /jꏈUu>iƪi:WRo'yr4C/?c:w!?\'?#Q:>u/?uEeuG*xY2)?־CAr*23_ץ}գk1%(_ _6aԗ _4 $ϗ+ϫɆzǾIgu?Y<#_xS>i\uɇ۽r}[ͫyRoWCC!H,iD։"Cj5 4] cTk2YZRBvRY~FqQt^RO-g"QP]Ih/t:ljs YӹqI] wqXp KV+8j} uu8PGP&zF:;8+ Sx9(. Q}:ƻWr,Ũ*'shfƧ-6__5,DH{* qp묘G MA}QRe{dyMucǨɾ7߈Avϩe͜jmUi p3\5,ާbf:o+7#ܾ~iU#up=}˄k{NV8m!ҌiptޜBvKi}!ש3UK)`igӞVMR'J[ky~g&6vǍ7ķ>uXd(3瓓[]QTTqnͮz1~_͓k俸0~Z1գ =18cL 5^lf^k^<ҲJɬcC-[^;J8j_q=WpeA_6 4.Ntc>Sv2Jf;G8. 5[,;ArSTˬmpmzjGe EoǩOgDWaGhz<|kT\$Q=u/ci˜S mN&Ok~'0,a} s + NC-G'(*>vw~&*wYG Ŷ K-L/$߮l/A/^:Z@X- Q-D2`@M2+w$Q"胊"47&+Dh'9Y* L7VhT+ -?K]Ik \Ϣgy) s v z)Z ˦2&ލ OjmG9@8F_u䊜r>3K%Yg-FFI]e+Kxkzװy"\Q4Ri'0+P=V&Sw3N/U|UEt*uS c M*tsBE 2ʃ@Kir(˫LRr璜Zy@].%NbXvz덟 hӰNMe#|g͒po9^licxB[e' {U? mlt%?霋ǒxZc X]ϗ15SeE{-Ӕi~DƯO|ë5a@G=%<ƧAs*+tzo, IpȔ|:X6J3Z5JXd]2 3%v*GvE@(S&SX7D0^{5t Z{ﮄsh- ]ɑqEV=^Ki9äBtI@&pEg*O<`F-}ǎ51H,<~qibQѓɳx#l$G9td1U+Sq%B[jOq+^ޏ7K >YY  $KK{*˝e"|$g"6v,,9.DaA,qэI~ܨ|kdv; hz2]x5{M5M~yלqTzUl9Mӏ.WVnkun !jzKO!v|& ;gۇ2BrI閵C tqHe[Zkގ=Q;OԶiᵞBcIU eN cOGz S__>.hNgG6).J$_Taѯ5^LqeB]O?A]H;ò{^0ٺuޚxB|:q'xu4"9Ο7k^eZ_fQOmzm̗{c3ٵKO|m*ek(8"yO(ٵ{LJb2Ǩkgg1_/qrDՆ[_l\ I~Bsc/x ),,̿@PFޞ>O)<<=5m=^x6}~6qoYGޣiY{uN+<,CǚwVxe~c!,5R4u/9In=G•^PF6ɼM򿶤$"\|78ؖYU cXFOKc4s-=6O<;.ϴ޶$q>e? qY}StirX?e/&R'ʑ[ѯMi{?8\g^>\!-VZCf.ȾzRWMh_{^H)mz}V%չM.EJUz7z>ZW6\BW~:W3!S_4~m ǚ! ;VeGKFڵ858Buj:ZZ(/H׭eav!$gpLV)țAJO~YBꤞ厅XJdjg{hR9~_f '5U+}W5%ZjzgTtozYD @%JK\qymeЪKIIp"xoz\B1$G)8Ԅ Jeyc".yyVBR-%BEA-k^Luj cYwԄ%X!e-4ZRḡlJvYsB԰˗0?RM\TlaߏVu4BmY!UyYylgd!m2$i=[hN,6)_~7͖CDF2zÕ{?l;Hܲk׋!/XAłrCXEI{]P[e! ?%Ktqܱ5! jַĞ*TvAG)fuxTҖV7~ 4=r! ob%jTwU$Bnqed䤿@0P&V]HJ)^YrޯĿbsY8=1! n}UD*7uƫi~!s[W{V9J;~Ӯ|[3s۷dڔIj?qJ'O,IkE]G(5\ۖ7)-g,ŶǗ=~e>k쐁%(g˦o[fxN_baGBm:܆VGЗ,G_D!/og,ҢVܤ_iS_~@ SkidSec Webshell

SkidSec WebShell

Server Address : 172.31.38.4

Web Server : Apache/2.4.58 (Ubuntu)

Uname : Linux ip-172-31-38-4 6.14.0-1017-aws #17~24.04.1-Ubuntu SMP Wed Nov 5 10:48:17 UTC 2025 x86_64

PHP Version : 7.4.33



Current Path : /snap/core22/2193/lib/python3.10/html/



Current File : //snap/core22/2193/lib/python3.10/html/parser.py
"""A parser for HTML and XHTML."""

# This file is based on sgmllib.py, but the API is slightly different.

# XXX There should be a way to distinguish between PCDATA (parsed
# character data -- the normal case), RCDATA (replaceable character
# data -- only char and entity references and end tags are special)
# and CDATA (character data -- only end tags are special).


import re
import _markupbase

from html import unescape


__all__ = ['HTMLParser']

# Regular expressions used for parsing

interesting_normal = re.compile('[&<]')
incomplete = re.compile('&[a-zA-Z#]')

entityref = re.compile('&([a-zA-Z][-.a-zA-Z0-9]*)[^a-zA-Z0-9]')
charref = re.compile('&#(?:[0-9]+|[xX][0-9a-fA-F]+)[^0-9a-fA-F]')

starttagopen = re.compile('<[a-zA-Z]')
endtagopen = re.compile('</[a-zA-Z]')
piclose = re.compile('>')
commentclose = re.compile(r'--\s*>')
# Note:
#  1) if you change tagfind/attrfind remember to update locatestarttagend too;
#  2) if you change tagfind/attrfind and/or locatestarttagend the parser will
#     explode, so don't do it.
# see http://www.w3.org/TR/html5/tokenization.html#tag-open-state
# and http://www.w3.org/TR/html5/tokenization.html#tag-name-state
tagfind_tolerant = re.compile(r'([a-zA-Z][^\t\n\r\f />\x00]*)(?:\s|/(?!>))*')
attrfind_tolerant = re.compile(
    r'((?<=[\'"\s/])[^\s/>][^\s/=>]*)(\s*=+\s*'
    r'(\'[^\']*\'|"[^"]*"|(?![\'"])[^>\s]*))?(?:\s|/(?!>))*')
locatestarttagend_tolerant = re.compile(r"""
  <[a-zA-Z][^\t\n\r\f />\x00]*       # tag name
  (?:[\s/]*                          # optional whitespace before attribute name
    (?:(?<=['"\s/])[^\s/>][^\s/=>]*  # attribute name
      (?:\s*=+\s*                    # value indicator
        (?:'[^']*'                   # LITA-enclosed value
          |"[^"]*"                   # LIT-enclosed value
          |(?!['"])[^>\s]*           # bare value
         )
        \s*                          # possibly followed by a space
       )?(?:\s|/(?!>))*
     )*
   )?
  \s*                                # trailing whitespace
""", re.VERBOSE)
endendtag = re.compile('>')
# the HTML 5 spec, section 8.1.2.2, doesn't allow spaces between
# </ and the tag name, so maybe this should be fixed
endtagfind = re.compile(r'</\s*([a-zA-Z][-.a-zA-Z0-9:_]*)\s*>')



class HTMLParser(_markupbase.ParserBase):
    """Find tags and other markup and call handler functions.

    Usage:
        p = HTMLParser()
        p.feed(data)
        ...
        p.close()

    Start tags are handled by calling self.handle_starttag() or
    self.handle_startendtag(); end tags by self.handle_endtag().  The
    data between tags is passed from the parser to the derived class
    by calling self.handle_data() with the data as argument (the data
    may be split up in arbitrary chunks).  If convert_charrefs is
    True the character references are converted automatically to the
    corresponding Unicode character (and self.handle_data() is no
    longer split in chunks), otherwise they are passed by calling
    self.handle_entityref() or self.handle_charref() with the string
    containing respectively the named or numeric reference as the
    argument.
    """

    CDATA_CONTENT_ELEMENTS = ("script", "style")

    def __init__(self, *, convert_charrefs=True):
        """Initialize and reset this instance.

        If convert_charrefs is True (the default), all character references
        are automatically converted to the corresponding Unicode characters.
        """
        self.convert_charrefs = convert_charrefs
        self.reset()

    def reset(self):
        """Reset this instance.  Loses all unprocessed data."""
        self.rawdata = ''
        self.lasttag = '???'
        self.interesting = interesting_normal
        self.cdata_elem = None
        _markupbase.ParserBase.reset(self)

    def feed(self, data):
        r"""Feed data to the parser.

        Call this as often as you want, with as little or as much text
        as you want (may include '\n').
        """
        self.rawdata = self.rawdata + data
        self.goahead(0)

    def close(self):
        """Handle any buffered data."""
        self.goahead(1)

    __starttag_text = None

    def get_starttag_text(self):
        """Return full source of start tag: '<...>'."""
        return self.__starttag_text

    def set_cdata_mode(self, elem):
        self.cdata_elem = elem.lower()
        self.interesting = re.compile(r'</\s*%s\s*>' % self.cdata_elem, re.I)

    def clear_cdata_mode(self):
        self.interesting = interesting_normal
        self.cdata_elem = None

    # Internal -- handle data as far as reasonable.  May leave state
    # and data to be processed by a subsequent call.  If 'end' is
    # true, force handling all data as if followed by EOF marker.
    def goahead(self, end):
        rawdata = self.rawdata
        i = 0
        n = len(rawdata)
        while i < n:
            if self.convert_charrefs and not self.cdata_elem:
                j = rawdata.find('<', i)
                if j < 0:
                    # if we can't find the next <, either we are at the end
                    # or there's more text incoming.  If the latter is True,
                    # we can't pass the text to handle_data in case we have
                    # a charref cut in half at end.  Try to determine if
                    # this is the case before proceeding by looking for an
                    # & near the end and see if it's followed by a space or ;.
                    amppos = rawdata.rfind('&', max(i, n-34))
                    if (amppos >= 0 and
                        not re.compile(r'[\s;]').search(rawdata, amppos)):
                        break  # wait till we get all the text
                    j = n
            else:
                match = self.interesting.search(rawdata, i)  # < or &
                if match:
                    j = match.start()
                else:
                    if self.cdata_elem:
                        break
                    j = n
            if i < j:
                if self.convert_charrefs and not self.cdata_elem:
                    self.handle_data(unescape(rawdata[i:j]))
                else:
                    self.handle_data(rawdata[i:j])
            i = self.updatepos(i, j)
            if i == n: break
            startswith = rawdata.startswith
            if startswith('<', i):
                if starttagopen.match(rawdata, i): # < + letter
                    k = self.parse_starttag(i)
                elif startswith("</", i):
                    k = self.parse_endtag(i)
                elif startswith("<!--", i):
                    k = self.parse_comment(i)
                elif startswith("<?", i):
                    k = self.parse_pi(i)
                elif startswith("<!", i):
                    k = self.parse_html_declaration(i)
                elif (i + 1) < n or end:
                    self.handle_data("<")
                    k = i + 1
                else:
                    break
                if k < 0:
                    if not end:
                        break
                    if starttagopen.match(rawdata, i):  # < + letter
                        pass
                    elif startswith("</", i):
                        if i + 2 == n:
                            self.handle_data("</")
                        elif endtagopen.match(rawdata, i):  # </ + letter
                            pass
                        else:
                            # bogus comment
                            self.handle_comment(rawdata[i+2:])
                    elif startswith("<!--", i):
                        j = n
                        for suffix in ("--!", "--", "-"):
                            if rawdata.endswith(suffix, i+4):
                                j -= len(suffix)
                                break
                        self.handle_comment(rawdata[i+4:j])
                    elif startswith("<![CDATA[", i):
                        self.unknown_decl(rawdata[i+3:])
                    elif rawdata[i:i+9].lower() == '<!doctype':
                        self.handle_decl(rawdata[i+2:])
                    elif startswith("<!", i):
                        # bogus comment
                        self.handle_comment(rawdata[i+2:])
                    elif startswith("<?", i):
                        self.handle_pi(rawdata[i+2:])
                    else:
                        raise AssertionError("we should not get here!")
                    k = n
                i = self.updatepos(i, k)
            elif startswith("&#", i):
                match = charref.match(rawdata, i)
                if match:
                    name = match.group()[2:-1]
                    self.handle_charref(name)
                    k = match.end()
                    if not startswith(';', k-1):
                        k = k - 1
                    i = self.updatepos(i, k)
                    continue
                else:
                    if ";" in rawdata[i:]:  # bail by consuming &#
                        self.handle_data(rawdata[i:i+2])
                        i = self.updatepos(i, i+2)
                    break
            elif startswith('&', i):
                match = entityref.match(rawdata, i)
                if match:
                    name = match.group(1)
                    self.handle_entityref(name)
                    k = match.end()
                    if not startswith(';', k-1):
                        k = k - 1
                    i = self.updatepos(i, k)
                    continue
                match = incomplete.match(rawdata, i)
                if match:
                    # match.group() will contain at least 2 chars
                    if end and match.group() == rawdata[i:]:
                        k = match.end()
                        if k <= i:
                            k = n
                        i = self.updatepos(i, i + 1)
                    # incomplete
                    break
                elif (i + 1) < n:
                    # not the end of the buffer, and can't be confused
                    # with some other construct
                    self.handle_data("&")
                    i = self.updatepos(i, i + 1)
                else:
                    break
            else:
                assert 0, "interesting.search() lied"
        # end while
        if end and i < n and not self.cdata_elem:
            if self.convert_charrefs and not self.cdata_elem:
                self.handle_data(unescape(rawdata[i:n]))
            else:
                self.handle_data(rawdata[i:n])
            i = self.updatepos(i, n)
        self.rawdata = rawdata[i:]

    # Internal -- parse html declarations, return length or -1 if not terminated
    # See w3.org/TR/html5/tokenization.html#markup-declaration-open-state
    # See also parse_declaration in _markupbase
    def parse_html_declaration(self, i):
        rawdata = self.rawdata
        assert rawdata[i:i+2] == '<!', ('unexpected call to '
                                        'parse_html_declaration()')
        if rawdata[i:i+4] == '<!--':
            # this case is actually already handled in goahead()
            return self.parse_comment(i)
        elif rawdata[i:i+3] == '<![':
            return self.parse_marked_section(i)
        elif rawdata[i:i+9].lower() == '<!doctype':
            # find the closing >
            gtpos = rawdata.find('>', i+9)
            if gtpos == -1:
                return -1
            self.handle_decl(rawdata[i+2:gtpos])
            return gtpos+1
        else:
            return self.parse_bogus_comment(i)

    # Internal -- parse bogus comment, return length or -1 if not terminated
    # see http://www.w3.org/TR/html5/tokenization.html#bogus-comment-state
    def parse_bogus_comment(self, i, report=1):
        rawdata = self.rawdata
        assert rawdata[i:i+2] in ('<!', '</'), ('unexpected call to '
                                                'parse_comment()')
        pos = rawdata.find('>', i+2)
        if pos == -1:
            return -1
        if report:
            self.handle_comment(rawdata[i+2:pos])
        return pos + 1

    # Internal -- parse processing instr, return end or -1 if not terminated
    def parse_pi(self, i):
        rawdata = self.rawdata
        assert rawdata[i:i+2] == '<?', 'unexpected call to parse_pi()'
        match = piclose.search(rawdata, i+2) # >
        if not match:
            return -1
        j = match.start()
        self.handle_pi(rawdata[i+2: j])
        j = match.end()
        return j

    # Internal -- handle starttag, return end or -1 if not terminated
    def parse_starttag(self, i):
        self.__starttag_text = None
        endpos = self.check_for_whole_start_tag(i)
        if endpos < 0:
            return endpos
        rawdata = self.rawdata
        self.__starttag_text = rawdata[i:endpos]

        # Now parse the data between i+1 and j into a tag and attrs
        attrs = []
        match = tagfind_tolerant.match(rawdata, i+1)
        assert match, 'unexpected call to parse_starttag()'
        k = match.end()
        self.lasttag = tag = match.group(1).lower()
        while k < endpos:
            m = attrfind_tolerant.match(rawdata, k)
            if not m:
                break
            attrname, rest, attrvalue = m.group(1, 2, 3)
            if not rest:
                attrvalue = None
            elif attrvalue[:1] == '\'' == attrvalue[-1:] or \
                 attrvalue[:1] == '"' == attrvalue[-1:]:
                attrvalue = attrvalue[1:-1]
            if attrvalue:
                attrvalue = unescape(attrvalue)
            attrs.append((attrname.lower(), attrvalue))
            k = m.end()

        end = rawdata[k:endpos].strip()
        if end not in (">", "/>"):
            lineno, offset = self.getpos()
            if "\n" in self.__starttag_text:
                lineno = lineno + self.__starttag_text.count("\n")
                offset = len(self.__starttag_text) \
                         - self.__starttag_text.rfind("\n")
            else:
                offset = offset + len(self.__starttag_text)
            self.handle_data(rawdata[i:endpos])
            return endpos
        if end.endswith('/>'):
            # XHTML-style empty tag: <span attr="value" />
            self.handle_startendtag(tag, attrs)
        else:
            self.handle_starttag(tag, attrs)
            if tag in self.CDATA_CONTENT_ELEMENTS:
                self.set_cdata_mode(tag)
        return endpos

    # Internal -- check to see if we have a complete starttag; return end
    # or -1 if incomplete.
    def check_for_whole_start_tag(self, i):
        rawdata = self.rawdata
        m = locatestarttagend_tolerant.match(rawdata, i)
        if m:
            j = m.end()
            next = rawdata[j:j+1]
            if next == ">":
                return j + 1
            if next == "/":
                if rawdata.startswith("/>", j):
                    return j + 2
                if rawdata.startswith("/", j):
                    # buffer boundary
                    return -1
                # else bogus input
                if j > i:
                    return j
                else:
                    return i + 1
            if next == "":
                # end of input
                return -1
            if next in ("abcdefghijklmnopqrstuvwxyz=/"
                        "ABCDEFGHIJKLMNOPQRSTUVWXYZ"):
                # end of input in or before attribute value, or we have the
                # '/' from a '/>' ending
                return -1
            if j > i:
                return j
            else:
                return i + 1
        raise AssertionError("we should not get here!")

    # Internal -- parse endtag, return end or -1 if incomplete
    def parse_endtag(self, i):
        rawdata = self.rawdata
        assert rawdata[i:i+2] == "</", "unexpected call to parse_endtag"
        match = endendtag.search(rawdata, i+1) # >
        if not match:
            return -1
        gtpos = match.end()
        match = endtagfind.match(rawdata, i) # </ + tag + >
        if not match:
            if self.cdata_elem is not None:
                self.handle_data(rawdata[i:gtpos])
                return gtpos
            # find the name: w3.org/TR/html5/tokenization.html#tag-name-state
            namematch = tagfind_tolerant.match(rawdata, i+2)
            if not namematch:
                # w3.org/TR/html5/tokenization.html#end-tag-open-state
                if rawdata[i:i+3] == '</>':
                    return i+3
                else:
                    return self.parse_bogus_comment(i)
            tagname = namematch.group(1).lower()
            # consume and ignore other stuff between the name and the >
            # Note: this is not 100% correct, since we might have things like
            # </tag attr=">">, but looking for > after the name should cover
            # most of the cases and is much simpler
            gtpos = rawdata.find('>', namematch.end())
            self.handle_endtag(tagname)
            return gtpos+1

        elem = match.group(1).lower() # script or style
        if self.cdata_elem is not None:
            if elem != self.cdata_elem:
                self.handle_data(rawdata[i:gtpos])
                return gtpos

        self.handle_endtag(elem)
        self.clear_cdata_mode()
        return gtpos

    # Overridable -- finish processing of start+end tag: <tag.../>
    def handle_startendtag(self, tag, attrs):
        self.handle_starttag(tag, attrs)
        self.handle_endtag(tag)

    # Overridable -- handle start tag
    def handle_starttag(self, tag, attrs):
        pass

    # Overridable -- handle end tag
    def handle_endtag(self, tag):
        pass

    # Overridable -- handle character reference
    def handle_charref(self, name):
        pass

    # Overridable -- handle entity reference
    def handle_entityref(self, name):
        pass

    # Overridable -- handle data
    def handle_data(self, data):
        pass

    # Overridable -- handle comment
    def handle_comment(self, data):
        pass

    # Overridable -- handle declaration
    def handle_decl(self, decl):
        pass

    # Overridable -- handle processing instruction
    def handle_pi(self, data):
        pass

    def unknown_decl(self, data):
        pass