[ZPT] CVS: Packages/TAL - nsgmllib.py:1.11

fred@digicool.com fred@digicool.com
Fri, 16 Mar 2001 14:23:55 -0500 (EST)


Update of /cvs-repository/Packages/TAL
In directory korak:/tmp/cvs-serv7143

Modified Files:
	nsgmllib.py 
Log Message:

Change RuntimeError to SGMLParseError, which carries line number and
offset information.

Add support for SGML declaration syntax (<!....>) to some reasonable
degree.  This does not support everything allowed in SGML, but should
work with "real" HTML (internal subset in a DOCTYPE is not handled).
The content of the declaration is passed to the .handle_decl() method,
which can be overridden by subclasses.



--- Updated File nsgmllib.py in package Packages/TAL --
--- nsgmllib.py	2001/03/16 11:55:11	1.10
+++ nsgmllib.py	2001/03/16 19:23:54	1.11
@@ -37,7 +37,22 @@
     r'\s*([a-zA-Z_][-.:a-zA-Z_0-9]*)(\s*=\s*'
     r'(\'[^\']*\'|"[^"]*"|[-a-zA-Z0-9./:;+*%?!&$\(\)_#=~]*))?')
 
+declname = re.compile(r'[a-zA-Z][-_.a-zA-Z0-9]*\s*')
+declstringlit = re.compile(r'(\'[^\']*\'|"[^"]*")\s*')
 
+
+class SGMLParseError(Exception):
+    """Exception raised for all parse errors."""
+    def __init__(self, msg, pos=(None, None)):
+        self.msg = msg
+        self.lineno = pos[0]
+        self.offset = pos[1]
+
+    def __str__(self):
+        return ("%s (line %s, offset %s)"
+                % (self.msg, self.lineno, self.offset))
+
+
 # SGML parser base class -- find tags and call handler functions.
 # Usage: p = SGMLParser(); p.feed(data); ...; p.close().
 # The dtd is defined by deriving a class which defines methods
@@ -165,7 +180,11 @@
                         self.handle_data(rawdata[i])
                         i = self.updatepos(i, i+1)
                         continue
-                    k = match.end(0)
+                    # This is some sort of declaration; in "HTML as
+                    # deployed," this should only be the document type
+                    # declaration ("<!DOCTYPE html...>").
+                    k = self.parse_declaration(i)
+                    if k < 0: break
                     i = self.updatepos(i, k)
                     continue
             elif rawdata[i] == '&':
@@ -188,7 +207,7 @@
                     i = self.updatepos(i, k)
                     continue
             else:
-                raise RuntimeError, 'neither < nor & ??'
+                raise SGMLParserError('neither < nor & ??', self.getpos())
             # We get here only if incomplete matches but
             # nothing else
             match = incomplete.match(rawdata, i)
@@ -212,7 +231,8 @@
     def parse_comment(self, i):
         rawdata = self.rawdata
         if rawdata[i:i+4] != '<!--':
-            raise RuntimeError, 'unexpected call to handle_comment'
+            raise SGMLParseError('unexpected call to parse_comment()',
+                                 self.getpos())
         match = commentclose.search(rawdata, i+4)
         if not match:
             return -1
@@ -221,11 +241,44 @@
         j = match.end(0)
         return j-i
 
+    # Internal -- parse declaration.
+    def parse_declaration(self, i):
+        rawdata = self.rawdata
+        j = i + 2
+        # in practice, this should look like: ((name|stringlit) S*)+ '>'
+        while 1:
+            c = rawdata[j:j+1]
+            if c == ">":
+                # end of declaration syntax
+                self.handle_decl(rawdata[i+2:j])
+                return j + 1
+            if c in "\"'":
+                m = declstringlit.match(rawdata, j)
+                if not m:
+                    # incomplete or an error?
+                    return -1
+                j = m.end()
+            elif c in "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ":
+                m = declname.match(rawdata, j)
+                if not m:
+                    # incomplete or an error?
+                    return -1
+                j = m.end()
+            elif i == len(rawdata):
+                # end of buffer between tokens
+                return -1
+            else:
+                raise SGMLParseError(
+                    "unexpected char in declaration: %s" % `rawdata[i]`,
+                    self.getpos())
+        assert 0, "can't get here!"
+
     # Internal -- parse processing instr, return length or -1 if not terminated
     def parse_pi(self, i):
         rawdata = self.rawdata
         if rawdata[i:i+2] != '<?':
-            raise RuntimeError, 'unexpected call to handle_pi'
+            raise SGMLParseError('unexpected call to parse_pi()',
+                                 self.getpos())
         match = piclose.search(rawdata, i+2)
         if not match:
             return -1
@@ -253,7 +306,8 @@
         attrs = []
         match = tagfind.match(rawdata, i+1)
         if not match:
-            raise RuntimeError, 'unexpected call to parse_starttag'
+            raise SGMLParseError('unexpected call to parse_starttag()',
+                                 self.getpos())
         k = match.end(0)
         tag = string.lower(rawdata[i+1:k])
         self.lasttag = tag
@@ -389,6 +443,10 @@
 
     # Example -- handle comment, could be overridden
     def handle_comment(self, data):
+        pass
+
+    # Example -- handle declaration, could be overridden
+    def handle_decl(self, decl):
         pass
 
     # Example -- handle processing instruction, could be overridden