[ZPT] CVS: Packages/TAL - HTMLParser.py:1.6

guido@digicool.com guido@digicool.com
Wed, 21 Mar 2001 18:13:13 -0500 (EST)


Update of /cvs-repository/Packages/TAL
In directory korak:/tmp/cvs-serv19789

Modified Files:
	HTMLParser.py 
Log Message:
Change several "raise HTMLParseError" statements into assertions, as
these are really complaints about the intgrity of our own code.
HTMLParseError should be raised only for invalid input.  (Should we
turn more unrecognized constructs into exceptions, e.g. '&' not
followed by an entity or character reference?)

Also added an 'r' prefix to a regex string containing a backslash.



--- Updated File HTMLParser.py in package Packages/TAL --
--- HTMLParser.py	2001/03/21 22:33:58	1.5
+++ HTMLParser.py	2001/03/21 23:13:13	1.6
@@ -23,7 +23,7 @@
 charref = re.compile('&#([0-9]+)[^0-9]')
 
 starttagopen = re.compile('<[a-zA-Z]')
-piopen = re.compile('<\?')
+piopen = re.compile(r'<\?')
 piclose = re.compile('>')
 endtagopen = re.compile('</[a-zA-Z]')
 special = re.compile('<![^<>]*>')
@@ -147,6 +147,7 @@
             if i < j: self.handle_data(rawdata[i:j])
             i = self.updatepos(i, j)
             if i == n: break
+            assert rawdata[i] in "<&", "interesting.search() lied"
             if rawdata[i] == '<':
                 if starttagopen.match(rawdata, i):
                     if self.literal:
@@ -213,8 +214,6 @@
                         k = k-1
                     i = self.updatepos(i, k)
                     continue
-            else:
-                raise HTMLParserError('neither < nor & ??', self.getpos())
             # We get here only if incomplete matches but
             # nothing else
             match = incomplete.match(rawdata, i)
@@ -237,9 +236,7 @@
     # Internal -- parse comment, return length or -1 if not terminated
     def parse_comment(self, i):
         rawdata = self.rawdata
-        if rawdata[i:i+4] != '<!--':
-            raise HTMLParseError('unexpected call to parse_comment()',
-                                 self.getpos())
+        assert rawdata[i:i+4] == '<!--', 'unexpected call to parse_comment()'
         match = commentclose.search(rawdata, i+4)
         if not match:
             return -1
@@ -283,9 +280,7 @@
     # Internal -- parse processing instr, return length or -1 if not terminated
     def parse_pi(self, i):
         rawdata = self.rawdata
-        if rawdata[i:i+2] != '<?':
-            raise HTMLParseError('unexpected call to parse_pi()',
-                                 self.getpos())
+        assert rawdata[i:i+2] == '<?', 'unexpected call to parse_pi()'
         match = piclose.search(rawdata, i+2)
         if not match:
             return -1
@@ -311,9 +306,7 @@
         # Now parse the data between i+1 and j into a tag and attrs
         attrs = []
         match = tagfind.match(rawdata, i+1)
-        if not match:
-            raise HTMLParseError('unexpected call to parse_starttag()',
-                                 self.getpos())
+        assert match, 'unexpected call to parse_starttag()'
         k = match.end(0)
         self.lasttag = tag = string.lower(rawdata[i+1:k])