[Zope-Checkins] CVS: Zope/lib/python/docutils/parsers/rst - __init__.py:1.2 states.py:1.2 tableparser.py:1.2

Andreas Jung andreas@andreas-jung.com
Sat, 1 Feb 2003 04:26:41 -0500


Update of /cvs-repository/Zope/lib/python/docutils/parsers/rst
In directory cvs.zope.org:/tmp/cvs-serv18056/docutils/parsers/rst

Added Files:
	__init__.py states.py tableparser.py 
Log Message:
merge from ajung-restructuredtext-integration-branch


=== Zope/lib/python/docutils/parsers/rst/__init__.py 1.1 => 1.2 ===
--- /dev/null	Sat Feb  1 04:26:41 2003
+++ Zope/lib/python/docutils/parsers/rst/__init__.py	Sat Feb  1 04:26:07 2003
@@ -0,0 +1,120 @@
+# Author: David Goodger
+# Contact: goodger@users.sourceforge.net
+# Revision: $Revision$
+# Date: $Date$
+# Copyright: This module has been placed in the public domain.
+
+"""
+This is ``docutils.parsers.rst`` package. It exports a single class, `Parser`,
+the reStructuredText parser.
+
+
+Usage
+=====
+
+1. Create a parser::
+
+       parser = docutils.parsers.rst.Parser()
+
+   Several optional arguments may be passed to modify the parser's behavior.
+   Please see `Customizing the Parser`_ below for details.
+
+2. Gather input (a multi-line string), by reading a file or the standard
+   input::
+
+       input = sys.stdin.read()
+
+3. Create a new empty `docutils.nodes.document` tree::
+
+       document = docutils.utils.new_document(source, settings)
+
+   See `docutils.utils.new_document()` for parameter details.
+
+4. Run the parser, populating the document tree::
+
+       parser.parse(input, document)
+
+
+Parser Overview
+===============
+
+The reStructuredText parser is implemented as a state machine, examining its
+input one line at a time. To understand how the parser works, please first
+become familiar with the `docutils.statemachine` module, then see the
+`states` module.
+
+
+Customizing the Parser
+----------------------
+
+Anything that isn't already customizable is that way simply because that type
+of customizability hasn't been implemented yet.  Patches welcome!
+
+When instantiating an object of the `Parser` class, two parameters may be
+passed: ``rfc2822`` and ``inliner``.  Pass ``rfc2822=1`` to enable an initial
+RFC-2822 style header block, parsed as a "field_list" element (with "class"
+attribute set to "rfc2822").  Currently this is the only body-level element
+which is customizable without subclassing.  (Tip: subclass `Parser` and change
+its "state_classes" and "initial_state" attributes to refer to new classes.
+Contact the author if you need more details.)
+
+The ``inliner`` parameter takes an instance of `states.Inliner` or a subclass.
+It handles inline markup recognition.  A common extension is the addition of
+further implicit hyperlinks, like "RFC 2822".  This can be done by subclassing
+`states.Inliner`, adding a new method for the implicit markup, and adding a
+``(pattern, method)`` pair to the "implicit_dispatch" attribute of the
+subclass.  See `states.Inliner.implicit_inline()` for details.  Explicit
+inline markup can be customized in a `states.Inliner` subclass via the
+``patterns.initial`` and ``dispatch`` attributes (and new methods as
+appropriate).
+"""
+
+__docformat__ = 'reStructuredText'
+
+
+import docutils.parsers
+import docutils.statemachine
+from docutils.parsers.rst import states
+
+
+class Parser(docutils.parsers.Parser):
+
+    """The reStructuredText parser."""
+
+    supported = ('restructuredtext', 'rst', 'rest', 'restx', 'rtxt', 'rstx')
+    """Aliases this parser supports."""
+
+    settings_spec = (
+        'reStructuredText Parser Options',
+        None,
+        (('Recognize and link to PEP references (like "PEP 258").',
+          ['--pep-references'],
+          {'action': 'store_true'}),
+         ('Recognize and link to RFC references (like "RFC 822").',
+          ['--rfc-references'],
+          {'action': 'store_true'}),
+         ('Set number of spaces for tab expansion (default 8).',
+          ['--tab-width'],
+          {'metavar': '<width>', 'type': 'int', 'default': 8}),))
+
+    def __init__(self, rfc2822=None, inliner=None):
+        if rfc2822:
+            self.initial_state = 'RFC2822Body'
+        else:
+            self.initial_state = 'Body'
+        self.state_classes = states.state_classes
+        self.inliner = inliner
+
+    def parse(self, inputstring, document):
+        """Parse `inputstring` and populate `document`, a document tree."""
+        self.setup_parse(inputstring, document)
+        debug = document.reporter[''].debug
+        self.statemachine = states.RSTStateMachine(
+              state_classes=self.state_classes,
+              initial_state=self.initial_state,
+              debug=debug)
+        inputlines = docutils.statemachine.string2lines(
+              inputstring, tab_width=document.settings.tab_width,
+              convert_whitespace=1)
+        self.statemachine.run(inputlines, document, inliner=self.inliner)
+        self.finish_parse()


=== Zope/lib/python/docutils/parsers/rst/states.py 1.1 => 1.2 === (2729/2829 lines abridged)
--- /dev/null	Sat Feb  1 04:26:41 2003
+++ Zope/lib/python/docutils/parsers/rst/states.py	Sat Feb  1 04:26:07 2003
@@ -0,0 +1,2826 @@
+# Author: David Goodger
+# Contact: goodger@users.sourceforge.net
+# Revision: $Revision$
+# Date: $Date$
+# Copyright: This module has been placed in the public domain.
+
+"""
+This is the ``docutils.parsers.restructuredtext.states`` module, the core of
+the reStructuredText parser.  It defines the following:
+
+:Classes:
+    - `RSTStateMachine`: reStructuredText parser's entry point.
+    - `NestedStateMachine`: recursive StateMachine.
+    - `RSTState`: reStructuredText State superclass.
+    - `Inliner`: For parsing inline markup.
+    - `Body`: Generic classifier of the first line of a block.
+    - `SpecializedBody`: Superclass for compound element members.
+    - `BulletList`: Second and subsequent bullet_list list_items
+    - `DefinitionList`: Second+ definition_list_items.
+    - `EnumeratedList`: Second+ enumerated_list list_items.
+    - `FieldList`: Second+ fields.
+    - `OptionList`: Second+ option_list_items.
+    - `RFC2822List`: Second+ RFC2822-style fields.
+    - `ExtensionOptions`: Parses directive option fields.
+    - `Explicit`: Second+ explicit markup constructs.
+    - `SubstitutionDef`: For embedded directives in substitution definitions.
+    - `Text`: Classifier of second line of a text block.
+    - `SpecializedText`: Superclass for continuation lines of Text-variants.
+    - `Definition`: Second line of potential definition_list_item.
+    - `Line`: Second line of overlined section title or transition marker.
+    - `Struct`: An auxiliary collection class.
+
+:Exception classes:
+    - `MarkupError`
+    - `ParserError`
+    - `MarkupMismatch`
+
+:Functions:
+    - `escape2null()`: Return a string, escape-backslashes converted to nulls.
+    - `unescape()`: Return a string, nulls removed or restored to backslashes.
+
+:Attributes:
+    - `state_classes`: set of State classes used with `RSTStateMachine`.
+
+Parser Overview
+===============
+

[-=- -=- -=- 2729 lines omitted -=- -=- -=-]

+    def underline(self, match, context, next_state):
+        overline = context[0]
+        blocktext = overline + '\n' + self.state_machine.line
+        lineno = self.state_machine.abs_line_number() - 1
+        if len(overline.rstrip()) < 4:
+            self.short_overline(context, blocktext, lineno, 1)
+        msg = self.reporter.error(
+              'Invalid section title or transition marker.',
+              nodes.literal_block(blocktext, blocktext), line=lineno)
+        self.parent += msg
+        return [], 'Body', []
+
+    def short_overline(self, context, blocktext, lineno, lines=1):
+        msg = self.reporter.info(
+            'Possible incomplete section title.\nTreating the overline as '
+            "ordinary text because it's so short.", line=lineno)
+        self.parent += msg
+        self.state_correction(context, lines)
+
+    def state_correction(self, context, lines=1):
+        self.state_machine.previous_line(lines)
+        context[:] = []
+        raise statemachine.StateCorrection('Body', 'text')
+
+
+state_classes = (Body, BulletList, DefinitionList, EnumeratedList, FieldList,
+                 OptionList, ExtensionOptions, Explicit, Text, Definition,
+                 Line, SubstitutionDef, RFC2822Body, RFC2822List)
+"""Standard set of State classes used to start `RSTStateMachine`."""
+
+
+def escape2null(text):
+    """Return a string with escape-backslashes converted to nulls."""
+    parts = []
+    start = 0
+    while 1:
+        found = text.find('\\', start)
+        if found == -1:
+            parts.append(text[start:])
+            return ''.join(parts)
+        parts.append(text[start:found])
+        parts.append('\x00' + text[found+1:found+2])
+        start = found + 2               # skip character after escape
+
+def unescape(text, restore_backslashes=0):
+    """Return a string with nulls removed or restored to backslashes."""
+    if restore_backslashes:
+        return text.replace('\x00', '\\')
+    else:
+        return ''.join(text.split('\x00'))


=== Zope/lib/python/docutils/parsers/rst/tableparser.py 1.1 => 1.2 === (433/533 lines abridged)
--- /dev/null	Sat Feb  1 04:26:41 2003
+++ Zope/lib/python/docutils/parsers/rst/tableparser.py	Sat Feb  1 04:26:07 2003
@@ -0,0 +1,530 @@
+# Author: David Goodger
+# Contact: goodger@users.sourceforge.net
+# Revision: $Revision$
+# Date: $Date$
+# Copyright: This module has been placed in the public domain.
+
+"""
+This module defines table parser classes,which parse plaintext-graphic tables
+and produce a well-formed data structure suitable for building a CALS table.
+
+:Classes:
+    - `GridTableParser`: Parse fully-formed tables represented with a grid.
+    - `SimpleTableParser`: Parse simple tables, delimited by top & bottom
+      borders.
+
+:Exception class: `TableMarkupError`
+
+:Function:
+    `update_dict_of_lists()`: Merge two dictionaries containing list values.
+"""
+
+__docformat__ = 'reStructuredText'
+
+
+import re
+import sys
+from docutils import DataError
+
+
+class TableMarkupError(DataError): pass
+
+
+class TableParser:
+
+    """
+    Abstract superclass for the common parts of the syntax-specific parsers.
+    """
+
+    head_body_separator_pat = None
+    """Matches the row separator between head rows and body rows."""
+
+    def parse(self, block):
+        """
+        Analyze the text `block` and return a table data structure.
+
+        Given a plaintext-graphic table in `block` (list of lines of text; no
+        whitespace padding), parse the table, construct and return the data

[-=- -=- -=- 433 lines omitted -=- -=- -=-]

+        columns.append((sys.maxint, None))
+        lastcol = len(columns) - 2
+        for i in range(len(columns) - 1):
+            start, end = columns[i]
+            nextstart = columns[i+1][0]
+            block = []
+            margin = sys.maxint
+            for line, offset in lines:
+                if i == lastcol and line[end:].strip():
+                    text = line[start:].rstrip()
+                    columns[lastcol] = (start, start + len(text))
+                    self.adjust_last_column(start + len(text))
+                elif line[end:nextstart].strip():
+                    raise TableMarkupError('Text in column margin at line '
+                                           'offset %s.' % offset)
+                else:
+                    text = line[start:end].rstrip()
+                block.append(text)
+                if text:
+                    margin = min(margin, len(text) - len(text.lstrip()))
+            if 0 < margin < sys.maxint:
+                block = [line[margin:] for line in block]
+            row[i][3].extend(block)
+        self.table.append(row)
+
+    def adjust_last_column(self, new_end):
+        start, end = self.columns[-1]
+        if new_end > end:
+            self.columns[-1] = (start, new_end)
+
+    def structure_from_cells(self):
+        colspecs = [end - start for start, end in self.columns]
+        first_body_row = 0
+        if self.head_body_sep:
+            for i in range(len(self.table)):
+                if self.table[i][0][2] > self.head_body_sep:
+                    first_body_row = i
+                    break
+        return (colspecs, self.table[:first_body_row],
+                self.table[first_body_row:])
+
+
+def update_dict_of_lists(master, newdata):
+    """
+    Extend the list values of `master` with those from `newdata`.
+
+    Both parameters must be dictionaries containing list values.
+    """
+    for key, values in newdata.items():
+        master.setdefault(key, []).extend(values)