[Zope-Checkins] CVS: Zope2 - DocumentClass.py:1.24

chrism@serenade.digicool.com chrism@serenade.digicool.com
Mon, 25 Jun 2001 16:20:30 -0400


Update of /cvs-repository/Zope2/lib/python/StructuredText
In directory serenade:/home/chrism/BackTalk/lib/python/StructuredText

Modified Files:
	DocumentClass.py 
Log Message:
A number of small changes and enhancements to the detection of symbols and gestures:

 - All regexes attempt to now detect \r as well as \n as a newline character.

 - Fixed improper regex for emphasis which caused emphasized text to be rendered as strong.

 - Fixed href method to not return false positives for "comma", link:types.  In many cases, inappropriate things were being detected as comma-link-type hrefs.  Now only things with (http|https|ftp|mailto|file|about) as a leader are used as comma-link-types.

 - Changed ordered list types to not treat small words with following punctuation at the beginning of a paragraph (e.g. "Yes.") as an ordered list gesture.  Instead, only single characters (or any-length digits) followed by punctuation will be treated as an ordered list gesture.

- Removed unused regular expression from doc_header signature.

- Changed doc_literal, doc_emphasis, and doc_strong methods to
  work across newlines.  This means that people can start to type
  for example, a literal 'on one line and it should be ok to
  ... cross to the next'.  The danger here is that it will match
  too much (especially in the case of literals), but this is
  ameliorated by the fact that (at least for literals) the
  ending "'" is required to be followed by some kind of whitespace.
  Emphasis and strong don't have this limitation, but their symbology
  (*) is much less frequently used in normal writing.



--- Updated File DocumentClass.py in package Zope2 --
--- DocumentClass.py	2001/06/23 03:21:39	1.23
+++ DocumentClass.py	2001/06/25 20:20:30	1.24
@@ -85,7 +85,8 @@
 
 import re, ST, STDOM
 from string import split, join, replace, expandtabs, strip, find, rstrip
-from STletters import letters
+from STletters import letters, digits, literal_punc, under_punc,\
+     strongem_punc, phrase_delimiters
 
 StringType=type('')
 ListType=type([])
@@ -364,7 +365,7 @@
     
     #'doc_inner_link',
     #'doc_named_link',
-    #'doc_underline',
+    #'doc_underline'
     text_types = [
         'doc_sgml',
         'doc_href',
@@ -790,7 +791,7 @@
 
     def doc_numbered(
         self, paragraph,
-        expr = re.compile(r'(\s*[%s]+\.)|(\s*[0-9]+\.)|(\s*[0-9]+\s+)' % letters).match):
+        expr = re.compile(r'(\s*[%s]\.)|(\s*[0-9]+\.)|(\s*[0-9]+\s+)' % letters).match):
         
         # This is the old expression. It had a nasty habit
         # of grabbing paragraphs that began with a single
@@ -838,8 +839,7 @@
            indent=paragraph.indent,
            delim=d)
 
-    def doc_header(self, paragraph,
-                   expr=re.compile(r'[ %s0-9.:/,-_*<>\?\'\"]+' % letters).match):
+    def doc_header(self, paragraph):
         subs=paragraph.getSubparagraphs()
         if not subs: return None
         top=paragraph.getColorizableTexts()[0]
@@ -858,11 +858,14 @@
 
     def doc_literal(
         self, s,
-        expr=re.compile(
-          r"(?:\s|^)'"                                                  # open
-          r"([^ \t\n\r\f\v']|[^ \t\n\r\f\v'][^\n']*[^ \t\n\r\f\v'])" # contents
-          r"'(?:\s|[,.;:!?]|$)"                                        # close
-          ).search):
+        expr = re.compile(r"'([%s%s%s\s]+)'[%s]+" % (letters, digits, literal_punc, phrase_delimiters)).search):
+
+        # old expr... failed to cross newlines.
+        #        expr=re.compile(
+        #          r"(?:\s|^)'"           # open
+        #          r"([^ \t\n\r\f\v']|[^ \t\n\r\f\v'][^\n\r']*[^ \t\n\r\f\v'])" # contents
+        #          r"'(?:\s|[,.;:!?]|$)"  # close
+        #          ).search):
         
         r=expr(s)
         if r:
@@ -873,7 +876,8 @@
 
     def doc_emphasize(
         self, s,
-        expr = re.compile(r'\s*\*([ \n%s0-9.:/;,\'\"\?\-\_\/\=\-\>\<\(\)]+)\*(?!\*|-)' % letters).search
+        expr = re.compile(r'\*([%s%s%s\s]+?)\*' % (letters, digits, strongem_punc)).search
+        #expr = re.compile(r'\s*\*([ \n\r%s0-9.:/;,\'\"\?\-\_\/\=\-\>\<\(\)]+)\*(?!\*|-)' % letters).search       # old expr, inconsistent punctuation
         ):
 
         r=expr(s)
@@ -886,7 +890,7 @@
     def doc_inner_link(self,
                        s,
                        expr1 = re.compile(r"\.\.\s*").search,
-                       expr2 = re.compile(r"\[[%s0-9]+\]" % letters ).search):
+                       expr2 = re.compile(r"\[[%s%s]+\]" % (letters, digits) ).search):
         
         # make sure we dont grab a named link
         if expr2(s) and expr1(s):
@@ -920,9 +924,12 @@
     
     def doc_underline(self,
                       s,
-                      expr=re.compile(r"\_([%s0-9\s\.,\?]+)\_" % letters).search):
+                      #expr=re.compile(r"\_([a-zA-Z0-9\s\.,\?]+)\_").search, # old expr, inconsistent punc, failed to cross newlines
+                      expr=re.compile(r'_([%s%s%s\s]+)_' % (letters, digits, under_punc)).search):
         result = expr(s)
         if result:
+            if result.group(1)[:1] == '_':
+               return None # no double unders
             start,end = result.span(1)
             st,e = result.span()
             return (StructuredTextUnderline(s[start:end]),st,e)
@@ -931,7 +938,8 @@
     
     def doc_strong(self,
                    s,
-                   expr = re.compile(r'\s*\*([ \n%s0-9.:/;,\'\"\?\-\_\/\=\-\>\<\(\)]+)\*(?!\*|-)' % letters).search
+                   expr = re.compile(r'\*\*([%s%s%s\s]+?)\*\*' % (letters, digits, strongem_punc)).search
+                   #expr = re.compile(r'\s*\*\*([ \n\r%s0-9.:/;,\'\"\?\-\_\/\=\-\>\<\(\)]+)\*\*(?!\*|-)' % letters).search, # old expr, inconsistent punc, failed to cross newlines.
         ):
 
         r=expr(s)
@@ -942,8 +950,8 @@
            return None
 
     ## Some constants to make the doc_href() regex easier to read.
-    _DQUOTEDTEXT = r'("[ %s0-9\n\-\.\,\;\(\)\/\:\/\*\']+")' % letters ## double quoted text
-    _URL_AND_PUNC = r'([%s0-9_\@\.\,\?\!\/\:\;\-\#\~]+)' % letters
+    _DQUOTEDTEXT = r'("[ %s0-9\n\r\-\.\,\;\(\)\/\:\/\*\']+")' % letters ## double quoted text
+    _URL_AND_PUNC = r'((http|https|ftp|mailto|file|about)[:/]+?[%s0-9_\@\.\,\?\!\/\:\;\-\#\~]+)' % letters
     _SPACES = r'(\s*)'
     
     def doc_href(self, s,
@@ -989,7 +997,7 @@
 
 
     def doc_xref(self, s,
-        expr = re.compile('\[([%s0-9\-.:/;,\n\~]+)\]' % letters).search
+        expr = re.compile('\[([%s0-9\-.:/;,\n\r\~]+)\]' % letters).search
         ):
         r = expr(s)
         if r: