[Zope3-checkins] SVN: Zope3/branches/testbrowser-integration/src/zope/testbrowser/ add a copy of mechanize with Stephan's optimizations

Benji York benji at zope.com
Thu Aug 25 13:05:52 EDT 2005


Log message for revision 38090:
  add a copy of mechanize with Stephan's optimizations
  

Changed:
  U   Zope3/branches/testbrowser-integration/src/zope/testbrowser/__init__.py
  A   Zope3/branches/testbrowser-integration/src/zope/testbrowser/mechanize/
  A   Zope3/branches/testbrowser-integration/src/zope/testbrowser/mechanize/__init__.py
  A   Zope3/branches/testbrowser-integration/src/zope/testbrowser/mechanize/_mechanize.py
  A   Zope3/branches/testbrowser-integration/src/zope/testbrowser/mechanize/_useragent.py
  U   Zope3/branches/testbrowser-integration/src/zope/testbrowser/testing.py

-=-
Modified: Zope3/branches/testbrowser-integration/src/zope/testbrowser/__init__.py
===================================================================
--- Zope3/branches/testbrowser-integration/src/zope/testbrowser/__init__.py	2005-08-25 16:33:25 UTC (rev 38089)
+++ Zope3/branches/testbrowser-integration/src/zope/testbrowser/__init__.py	2005-08-25 17:05:52 UTC (rev 38090)
@@ -29,6 +29,8 @@
     sys.modules['ClientForm'] = ClientForm
 else:
     assert sys.modules['ClientForm'] is ClientForm
+import ClientForm as x
+assert x is ClientForm
 
 # stitch in pullparser
 from zope.testbrowser import pullparser
@@ -37,6 +39,19 @@
     sys.modules['pullparser'] = pullparser
 else:
     assert sys.modules['pullparser'] is pullparser
+import pullparser as x
+assert x is pullparser
 # end TODO
 
+# stitch in _mechanize
+from zope.testbrowser import mechanize
+
+if 'mechanize' not in sys.modules:
+    sys.modules['mechanize'] = mechanize
+else:
+    assert sys.modules['mechanize'] is mechanize
+import mechanize as x
+assert x is mechanize
+# end TODO
+
 from testing import Browser

Added: Zope3/branches/testbrowser-integration/src/zope/testbrowser/mechanize/__init__.py
===================================================================
--- Zope3/branches/testbrowser-integration/src/zope/testbrowser/mechanize/__init__.py	2005-08-25 16:33:25 UTC (rev 38089)
+++ Zope3/branches/testbrowser-integration/src/zope/testbrowser/mechanize/__init__.py	2005-08-25 17:05:52 UTC (rev 38090)
@@ -0,0 +1,4 @@
+from _useragent import UserAgent#, http_get, http_put, http_head
+from _mechanize import Browser, Link, \
+     BrowserStateError, LinkNotFoundError, FormNotFoundError, \
+     __version__


Property changes on: Zope3/branches/testbrowser-integration/src/zope/testbrowser/mechanize/__init__.py
___________________________________________________________________
Name: svn:keywords
   + Id
Name: svn:eol-style
   + native

Added: Zope3/branches/testbrowser-integration/src/zope/testbrowser/mechanize/_mechanize.py
===================================================================
--- Zope3/branches/testbrowser-integration/src/zope/testbrowser/mechanize/_mechanize.py	2005-08-25 16:33:25 UTC (rev 38089)
+++ Zope3/branches/testbrowser-integration/src/zope/testbrowser/mechanize/_mechanize.py	2005-08-25 17:05:52 UTC (rev 38090)
@@ -0,0 +1,519 @@
+"""Stateful programmatic WWW navigation, after Perl's WWW::Mechanize.
+
+Copyright 2003-2004 John J. Lee <jjl at pobox.com>
+Copyright 2003 Andy Lester (original Perl code)
+
+This code is free software; you can redistribute it and/or modify it under
+the terms of the BSD License (see the file COPYING included with the
+distribution).
+
+"""
+
+# XXX
+# The stuff on web page's todo list.
+# Moof's emails about response object, .back(), etc.
+# Add Browser.load_response() method.
+# Add Browser.form_as_string() and Browser.__str__() methods.
+
+import urlparse, re
+
+import ClientCookie
+from ClientCookie._Util import response_seek_wrapper
+from ClientCookie._HeadersUtil import split_header_words
+from ClientCookie._urllib2_support import HTTPRequestUpgradeProcessor
+import ClientForm
+import pullparser
+# serves me right for not using a version tuple...
+VERSION_RE = re.compile(r"(?P<major>\d+)\.(?P<minor>\d+)\.(?P<bugfix>\d+)"
+                        r"(?P<state>[ab])?(?:-pre)?(?P<pre>\d+)?$")
+def parse_version(text):
+    m = VERSION_RE.match(text)
+    if m is None:
+        raise ValueError
+    return tuple([m.groupdict()[part] for part in
+                  ("major", "minor", "bugfix", "state", "pre")])
+assert map(int, parse_version(ClientCookie.VERSION)[:3]) >= [1, 0, 2], \
+       "ClientCookie 1.0.2 or newer is required"
+assert map(int, parse_version(ClientForm.VERSION)[:2]) >= [0, 1], \
+       "ClientForm 0.1.x is required"
+assert pullparser.__version__[:3] >= (0, 0, 4), \
+       "pullparser 0.0.4b or newer is required"
+del VERSION_RE, parse_version
+
+from _useragent import UserAgent
+
+__version__ = (0, 0, 9, "a", None)  # 0.0.9a
+
+class BrowserStateError(Exception): pass
+class LinkNotFoundError(Exception): pass
+class FormNotFoundError(Exception): pass
+
+class Link:
+    def __init__(self, base_url, url, text, tag, attrs):
+        assert None not in [url, tag, attrs]
+        self.base_url = base_url
+        self.absolute_url = urlparse.urljoin(base_url, url)
+        self.url, self.text, self.tag, self.attrs = url, text, tag, attrs
+    def __eq__(self, other):
+        try:
+            for name in "url", "text", "tag", "attrs":
+                if getattr(self, name) != getattr(other, name):
+                    return False
+        except AttributeError:
+            return False
+        return True
+    def __repr__(self):
+        return "Link(base_url=%r, url=%r, text=%r, tag=%r, attrs=%r)" % (
+            self.base_url, self.url, self.text, self.tag, self.attrs)
+
+class Browser(UserAgent):
+    """Browser-like class with support for history, forms and links.
+
+    BrowserStateError is raised whenever the browser is in the wrong state to
+    complete the requested operation - eg., when .back() is called when the
+    browser history is empty, or when .follow_link() is called when the current
+    response does not contain HTML data.
+
+    Public attributes:
+
+    request: last request (ClientCookie.Request or urllib2.Request)
+    form: currently selected form (see .select_form())
+    default_encoding: character encoding used for encoding numeric character
+     references when matching link text, if no encoding is found in the reponse
+     (you should turn on HTTP-EQUIV handling if you want the best chance of
+     getting this right without resorting to this default)
+
+    """
+    urltags = {
+        "a": "href",
+        "area": "href",
+        "frame": "src",
+        "iframe": "src",
+    }
+
+    def __init__(self, default_encoding="latin-1"):
+        self.default_encoding = default_encoding
+        self._history = []  # LIFO
+        self.request = self._response = None
+        self.form = None
+        self._forms = None
+        self._title = None
+        self._links = None
+        UserAgent.__init__(self)  # do this last to avoid __getattr__ problems
+
+    def close(self):
+        UserAgent.close(self)
+        self._history = self._forms = self._title = self._links = None
+        self.request = self._response = None
+
+    def open(self, url, data=None): return self._mech_open(url, data)
+
+    def _mech_open(self, url, data=None, update_history=True):
+        if not hasattr(url, 'get_full_url'):
+            # string URL -- convert to absolute URL if required
+            scheme, netloc = urlparse.urlparse(url)[:2]
+            if not scheme:
+                # relative URL
+                assert not netloc, "malformed URL"
+                if self._response is None:
+                    raise BrowserStateError(
+                        "can't fetch relative URL: not viewing any document")
+                url = urlparse.urljoin(self._response.geturl(), url)
+
+        if self.request is not None:
+            self._history.append((self.request, self._response))
+        self._response = None
+        # we want self.request to be assigned even if OpenerDirector.open fails
+        self.request = self._request(url, data)
+        self._previous_scheme = self.request.get_type()
+
+        self._response = ClientCookie.OpenerDirector.open(
+            self, self.request, data)
+        if not hasattr(self._response, "seek"):
+            self._response = response_seek_wrapper(self._response)
+        self._parse_html(self._response)
+
+        return self._response
+
+    def response(self):
+        """Return last response (as return value of urllib2.urlopen())."""
+        # XXX This is currently broken: responses returned by this method
+        # all share the same seek position.
+        return self._response
+
+    def geturl(self):
+        """Get URL of current document."""
+        if self._response is None:
+            raise BrowserStateError("not viewing any document")
+        return self._response.geturl()
+
+    def reload(self):
+        """Reload current document, and return response object."""
+        if self.request is None:
+            raise BrowserStateError("no URL has yet been .open()ed")
+        return self._mech_open(self.request, update_history=False)
+
+    def back(self, n=1):
+        """Go back n steps in history, and return response object.
+
+        n: go back this number of steps (default 1 step)
+
+        """
+        while n:
+            try:
+                self.request, self._response = self._history.pop()
+            except IndexError:
+                raise BrowserStateError("already at start of history")
+            n -= 1
+        if self._response is not None:
+            self._parse_html(self._response)
+        return self._response
+
+    def links(self, *args, **kwds):
+        """Return iteratable over links (mechanize.Link objects)."""
+        if not self.viewing_html():
+            raise BrowserStateError("not viewing HTML")
+        if args:
+            raise ValueError("keyword arguments only, please!")
+        if kwds:
+            return self._find_links(False, **kwds)
+        if self._links is None:
+            self.build_links()
+        return self._links
+
+    def build_links(self):
+        base = self._response.geturl()
+        self._response.seek(0)
+        p = pullparser.PullParser(
+            self._response, encoding=self._encoding(self._response))
+        self._links = []
+        for token in p.tags(*(self.urltags.keys()+["base"])):
+            if token.data == "base":
+                base = dict(token.attrs).get("href")
+                continue
+            if token.type == "endtag":
+                continue
+            attrs = dict(token.attrs)
+            tag = token.data
+            name = attrs.get("name")
+            text = None
+            url = attrs.get(self.urltags[tag])
+            if tag == "a":
+                if token.type != "startendtag":
+                    # XXX hmm, this'd break if end tag is missing
+                    text = p.get_compressed_text(("endtag", tag))
+                # but this doesn't work for eg. <a href="blah"><b>Andy</b></a>
+                #text = p.get_compressed_text()
+                # This is a hack from WWW::Mechanize to get some really basic
+                # JavaScript working, which I'm not yet convinced is a good
+                # idea.
+##                 onClick = attrs["onclick"]
+##                 m = re.search(r"/^window\.open\(\s*'([^']+)'/", onClick)
+##                 if onClick and m:
+##                     url = m.group(1)
+            if not url:
+                # Probably an <A NAME="blah"> link or <AREA NOHREF...>.
+                # For our purposes a link is something with a URL, so ignore
+                # this.
+                continue
+
+            link = Link(base, url, text, tag, token.attrs)
+            self._links.append(link)
+        self._response.seek(0)
+
+
+    def forms(self):
+        """Return iteratable over forms.
+
+        The returned form objects implement the ClientForm.HTMLForm interface.
+
+        """
+        if not self.viewing_html():
+            raise BrowserStateError("not viewing HTML")
+        if self._forms is None:
+            self._response.seek(0)
+            self._forms = ClientForm.ParseResponse(self._response)
+            self._response.seek(0)
+        return self._forms
+
+    def viewing_html(self):
+        """Return whether the current response contains HTML data."""
+        if self._response is None:
+            raise BrowserStateError("not viewing any document")
+        ct = self._response.info().getheaders("content-type")
+        return ct and ct[0].startswith("text/html")
+
+    def title(self):
+        """Return title, or None if there is no title element in the document.
+
+        Tags are stripped or textified as described in docs for
+        PullParser.get_text() method of pullparser module.
+
+        """
+        if not self.viewing_html():
+            raise BrowserStateError("not viewing HTML")
+        if self._title is None:
+            p = pullparser.PullParser(self._response,
+                                      encoding=self._encoding(self._response))
+            try:
+                p.get_tag("title")
+            except pullparser.NoMoreTokensError:
+                pass
+            else:
+                self._title = p.get_text()
+        return self._title
+
+    def select_form(self, name=None, predicate=None, nr=None):
+        """Select an HTML form for input.
+
+        This is like giving a form the "input focus" in a browser.
+
+        If a form is selected, the object supports the HTMLForm interface, so
+        you can call methods like .set_value(), .set(), and .click().
+
+        At least one of the name, predicate and nr arguments must be supplied.
+        If no matching form is found, mechanize.FormNotFoundError is raised.
+
+        If name is specified, then the form must have the indicated name.
+
+        If predicate is specified, then the form must match that function.  The
+        predicate function is passed the HTMLForm as its single argument, and
+        should return a boolean value indicating whether the form matched.
+
+        nr, if supplied, is the sequence number of the form (where 0 is the
+        first).  Note that control 0 is the first form matching all the other
+        arguments (if supplied); it is not necessarily the first control in the
+        form.
+
+        """
+        if not self.viewing_html():
+            raise BrowserStateError("not viewing HTML")
+        if (name is None) and (predicate is None) and (nr is None):
+            raise ValueError(
+                "at least one argument must be supplied to specify form")
+
+        orig_nr = nr
+        for form in self.forms():
+            if name is not None and name != form.name:
+                continue
+            if predicate is not None and not predicate(form):
+                continue
+            if nr:
+                nr -= 1
+                continue
+            self.form = form
+            break  # success
+        else:
+            # failure
+            description = []
+            if name is not None: description.append("name '%s'" % name)
+            if predicate is not None:
+                description.append("predicate %s" % predicate)
+            if orig_nr is not None: description.append("nr %d" % orig_nr)
+            description = ", ".join(description)
+            raise FormNotFoundError("no form matching "+description)
+
+    def _add_referer_header(self, request):
+        if self.request is None:
+            return request
+        scheme = request.get_type()
+        previous_scheme = self.request.get_type()
+        if scheme not in ["http", "https"]:
+            return request
+        request = HTTPRequestUpgradeProcessor().http_request(request)  # yuck
+
+        if (self._handle_referer and
+            previous_scheme in ["http", "https"] and not
+            (previous_scheme == "https" and scheme != "https")):
+            request.add_unredirected_header("Referer",
+                                            self.request.get_full_url())
+        return request
+
+    def click(self, *args, **kwds):
+        """See ClientForm.HTMLForm.click for documentation."""
+        if not self.viewing_html():
+            raise BrowserStateError("not viewing HTML")
+        request = self.form.click(*args, **kwds)
+        return self._add_referer_header(request)
+
+    def submit(self, *args, **kwds):
+        """Submit current form.
+
+        Arguments are as for ClientForm.HTMLForm.click().
+
+        """
+        return self.open(self.click(*args, **kwds))
+
+    def click_link(self, link=None, **kwds):
+        """Find a link and return a Request object for it.
+
+        Arguments are as for .find_link(), except that a link may be supplied
+        as the first argument.
+
+        """
+        if not self.viewing_html():
+            raise BrowserStateError("not viewing HTML")
+        if not link:
+            link = self.find_link(**kwds)
+        else:
+            if kwds:
+                raise ValueError(
+                    "either pass a Link, or keyword arguments, not both")
+        request = ClientCookie.Request(link.absolute_url)
+        return self._add_referer_header(request)
+
+    def follow_link(self, link=None, **kwds):
+        """Find a link and .open() it.
+
+        Arguments are as for .click_link().
+
+        """
+        return self.open(self.click_link(link, **kwds))
+
+    def find_link(self, *args, **kwds):
+        """Find a link in current page.
+
+        Links are returned as mechanize.Link objects.
+
+        # Return third link that .search()-matches the regexp "python"
+        # (by ".search()-matches", I mean that the regular expression method
+        # .search() is used, rather than .match()).
+        find_link(text_regex=re.compile("python"), nr=2)
+
+        # Return first http link in the current page that points to somewhere
+        # on python.org whose link text (after tags have been removed) is
+        # exactly "monty python".
+        find_link(text="monty python",
+                  url_regex=re.compile("http.*python.org"))
+
+        # Return first link with exactly three HTML attributes.
+        find_link(predicate=lambda link: len(link.attrs) == 3)
+
+        Links include anchors (<a>), image maps (<area>), and frames (<frame>,
+        <iframe>).
+
+        All arguments must be passed by keyword, not position.  Zero or more
+        arguments may be supplied.  In order to find a link, all arguments
+        supplied must match.
+
+        If a matching link is not found, mechanize.LinkNotFoundError is raised.
+
+        text: link text between link tags: eg. <a href="blah">this bit</a> (as
+         returned by pullparser.get_compressed_text(), ie. without tags but
+         with opening tags "textified" as per the pullparser docs) must compare
+         equal to this argument, if supplied
+        text_regex: link text between tag (as defined above) must match the
+         regular expression object passed as this argument, if supplied
+        name, name_regex: as for text and text_regex, but matched against the
+         name HTML attribute of the link tag
+        url, url_regex: as for text and text_regex, but matched against the
+         URL of the link tag (note this matches against Link.url, which is a
+         relative or absolute URL according to how it was written in the HTML)
+        tag: element name of opening tag, eg. "a"
+        predicate: a function taking a Link object as its single argument,
+         returning a boolean result, indicating whether the links
+        nr: matches the nth link that matches all other criteria (default 0)
+
+        """
+        if args:
+            raise ValueError("keyword arguments only, please!")
+        return self._find_links(True, **kwds)
+
+    def __getattr__(self, name):
+        # pass through ClientForm / DOMForm methods and attributes
+        if self.form is not None:
+            try: return getattr(self.form, name)
+            except AttributeError: pass
+        raise AttributeError("%s instance has no attribute %s "
+                             "(perhaps you forgot to .select_form()?" %
+                             (self.__class__, name))
+
+#---------------------------------------------------
+# Private methods.
+
+    def _find_links(self, single,
+                    text=None, text_regex=None,
+                    name=None, name_regex=None,
+                    url=None, url_regex=None,
+                    tag=None,
+                    predicate=None,
+                    nr=0
+                    ):
+        if not self.viewing_html():
+            raise BrowserStateError("not viewing HTML")
+
+        links = []
+        orig_nr = nr
+
+        if self._links is None:
+            self.build_links()
+
+        for link in self._links:
+            if url is not None and url != link.url:
+                continue
+            if url_regex is not None and not url_regex.search(link.url):
+                continue
+            if (text is not None and
+                (link.text is None or text != link.text)):
+                continue
+            if (text_regex is not None and
+                (link.text is None or not text_regex.search(link.text))):
+                continue
+            if name is not None and name != dict(link.attrs).get("name"):
+                continue
+            if name_regex is not None:
+                link_name = dict(link.attrs).get("name")
+                if link_name is None or not name_regex.search(link_name):
+                    continue
+            if tag is not None and tag != link.tag:
+                continue
+            if predicate is not None and not predicate(link):
+                continue
+            if nr:
+                nr -= 1
+                continue
+            if single:
+                return link
+            else:
+                links.append(link)
+                nr = orig_nr
+        if not links:
+            raise LinkNotFoundError()
+        return links
+
+    def _encoding(self, response):
+        # HTTPEquivProcessor may be in use, so both HTTP and HTTP-EQUIV
+        # headers may be in the response.
+        ct_headers = response.info().getheaders("content-type")
+        if not ct_headers:
+            return self.default_encoding
+
+        # sometimes servers return multiple HTTP headers: take the first
+        http_ct = ct_headers[0]
+        for k, v in split_header_words([http_ct])[0]:
+            if k == "charset":
+                return v
+
+        # no HTTP-specified encoding, so look in META HTTP-EQUIV headers,
+        # which, if present, will be last
+        if len(ct_headers) > 1:
+            equiv_ct = ct_headers[-1]
+            for k, v in split_header_words([equiv_ct])[0]:
+                if k == "charset":
+                    return v
+        return self.default_encoding
+
+    def _parse_html(self, response):
+        self.form = None
+        self._title = None
+        if not self.viewing_html():
+            # nothing to see here
+            return
+
+        # set ._forms, ._links
+        #self._forms = ClientForm.ParseResponse(self._response)
+        self._forms = None
+        response.seek(0)
+        self._links = None
+
+        #response.seek(0)


Property changes on: Zope3/branches/testbrowser-integration/src/zope/testbrowser/mechanize/_mechanize.py
___________________________________________________________________
Name: svn:keywords
   + Id
Name: svn:eol-style
   + native

Added: Zope3/branches/testbrowser-integration/src/zope/testbrowser/mechanize/_useragent.py
===================================================================
--- Zope3/branches/testbrowser-integration/src/zope/testbrowser/mechanize/_useragent.py	2005-08-25 16:33:25 UTC (rev 38089)
+++ Zope3/branches/testbrowser-integration/src/zope/testbrowser/mechanize/_useragent.py	2005-08-25 17:05:52 UTC (rev 38090)
@@ -0,0 +1,382 @@
+"""Convenient HTTP UserAgent class.
+
+This is a subclass of urllib2.OpenerDirector.
+
+
+Copyright 2003 John J. Lee <jjl at pobox.com>
+
+This code is free software; you can redistribute it and/or modify it under
+the terms of the BSD License (see the file COPYING included with the
+distribution).
+
+"""
+
+import urllib2, httplib
+import ClientCookie
+from ClientCookie import OpenerDirector, BaseHandler
+
+class HTTPRefererProcessor(BaseHandler):
+    def http_request(self, request):
+        # See RFC 2616 14.36.  The only times we know the source of the
+        # request URI has a URI associated with it are redirect, and
+        # Browser.click() / Browser.submit() / Browser.follow_link().
+        # Otherwise, it's the user's job to add any Referer header before
+        # .open()ing.
+        if hasattr(request, "redirect_dict"):
+            request = self.parent._add_referer_header(request)
+        return request
+
+    https_request = http_request
+
+
+class UserAgent(OpenerDirector):
+    """Convenient user-agent class.
+
+    Do not use .add_handler() to add a handler for something already dealt with
+    by this code.
+
+    Public attributes:
+
+    addheaders: list of (name, value) pairs specifying headers to send with
+     every request, unless they are overridden in the Request instance.
+
+     >>> ua = UserAgent()
+     >>> ua.addheaders = [
+     ...  ("User-agent", "Mozilla/5.0 (compatible)"),
+     ...  ("From", "responsible.person at example.com")]
+
+    """
+
+    handler_classes = {
+        # scheme handlers
+        "http": ClientCookie.HTTPHandler,
+        "ftp": urllib2.FTPHandler,  # CacheFTPHandler is buggy in 2.3
+        "file": urllib2.FileHandler,
+        "gopher": urllib2.GopherHandler,
+        # XXX etc.
+
+        # other handlers
+        "_unknown": urllib2.UnknownHandler,
+        # HTTP{S,}Handler depend on HTTPErrorProcessor too
+        "_http_error": ClientCookie.HTTPErrorProcessor,
+        "_http_request_upgrade": ClientCookie.HTTPRequestUpgradeProcessor,
+        "_http_default_error": urllib2.HTTPDefaultErrorHandler,
+
+        # feature handlers
+        "_authen": urllib2.HTTPBasicAuthHandler,
+        # XXX rest of authentication stuff
+        "_redirect": ClientCookie.HTTPRedirectHandler,
+        "_cookies": ClientCookie.HTTPCookieProcessor,
+        "_refresh": ClientCookie.HTTPRefreshProcessor,
+        "_referer": HTTPRefererProcessor,  # from this module, note
+        "_equiv": ClientCookie.HTTPEquivProcessor,
+        "_seek": ClientCookie.SeekableProcessor,
+        "_proxy": urllib2.ProxyHandler,
+        # XXX there's more to proxies, too
+
+        # debug handlers
+        "_debug_redirect": ClientCookie.HTTPRedirectDebugProcessor,
+        "_debug_response_body": ClientCookie.HTTPResponseDebugProcessor,
+        }
+
+    default_schemes = ["http", "ftp", "file", "gopher"]
+    default_others = ["_unknown", "_http_error", "_http_request_upgrade",
+                      "_http_default_error"]
+    default_features = ["_authen", "_redirect", "_cookies", "_seek", "_proxy"]
+    if hasattr(httplib, 'HTTPS'):
+        handler_classes["https"] = ClientCookie.HTTPSHandler
+        default_schemes.append("https")
+    if hasattr(ClientCookie, "HTTPRobotRulesProcessor"):
+        handler_classes["_robots"] = ClientCookie.HTTPRobotRulesProcessor
+        default_features.append("_robots")
+
+    def __init__(self):
+        OpenerDirector.__init__(self)
+
+        self._ua_handlers = {}
+        for scheme in (self.default_schemes+
+                       self.default_others+
+                       self.default_features):
+            klass = self.handler_classes[scheme]
+            self._ua_handlers[scheme] = klass()
+        for handler in self._ua_handlers.itervalues():
+            self.add_handler(handler)
+
+        # special case, requires extra support from mechanize.Browser
+        self._handle_referer = True
+
+    def close(self):
+        OpenerDirector.close(self)
+        self._ua_handlers = None
+
+    # XXX
+##     def set_timeout(self, timeout):
+##         self._timeout = timeout
+##     def set_http_connection_cache(self, conn_cache):
+##         self._http_conn_cache = conn_cache
+##     def set_ftp_connection_cache(self, conn_cache):
+##         # XXX ATM, FTP has cache as part of handler; should it be separate?
+##         self._ftp_conn_cache = conn_cache
+
+    def set_handled_schemes(self, schemes):
+        """Set sequence of protocol scheme strings.
+
+        If this fails (with ValueError) because you've passed an unknown
+        scheme, the set of handled schemes WILL be updated, but schemes in the
+        list that come after the unknown scheme won't be handled.
+
+        """
+        want = {}
+        for scheme in schemes:
+            if scheme.startswith("_"):
+                raise ValueError("invalid scheme '%s'" % scheme)
+            want[scheme] = None
+
+        # get rid of scheme handlers we don't want
+        for scheme, oldhandler in self._ua_handlers.items():
+            if scheme.startswith("_"): continue  # not a scheme handler
+            if scheme not in want:
+                self._replace_handler(scheme, None)
+            else:
+                del want[scheme]  # already got it
+        # add the scheme handlers that are missing
+        for scheme in want.keys():
+            if scheme not in self.handler_classes:
+                raise ValueError("unknown scheme '%s'")
+            self._set_handler(scheme, True)
+
+    def _add_referer_header(self, request):
+        raise NotImplementedError(
+            "this class can't do HTTP Referer: use mechanize.Browser instead")
+
+    def set_cookiejar(self, cookiejar):
+        """Set a ClientCookie.CookieJar, or None."""
+        self._set_handler("_cookies", obj=cookiejar)
+    def set_credentials(self, credentials):
+        """Set a urllib2.HTTPPasswordMgr, or None."""
+        # XXX use Greg Stein's httpx instead?
+        self._set_handler("_authen", obj=credentials)
+
+    # these methods all take a boolean parameter
+    def set_handle_robots(self, handle):
+        """Set whether to observe rules from robots.txt."""
+        self._set_handler("_robots", handle)
+    def set_handle_redirect(self, handle):
+        """Set whether to handle HTTP Refresh headers."""
+        self._set_handler("_redirect", handle)
+    def set_handle_refresh(self, handle):
+        """Set whether to handle HTTP Refresh headers."""
+        self._set_handler("_refresh", handle)
+    def set_handle_equiv(self, handle):
+        """Set whether to treat HTML http-equiv headers like HTTP headers.
+
+        Response objects will be .seek()able if this is set.
+
+        """
+        self._set_handler("_equiv", handle)
+    def set_handle_referer(self, handle):
+        """Set whether to add Referer header to each request.
+
+        This base class does not implement this feature (so don't turn this on
+        if you're using this base class directly), but the subclass
+        mechanize.Browser does.
+
+        """
+        self._set_handler("_referer", handle)
+        self._handle_referer = True
+    def set_seekable_responses(self, handle):
+        """Make response objects .seek()able."""
+        self._set_handler("_seek", handle)
+    def set_debug_redirects(self, handle):
+        """Print information about HTTP redirects.
+
+        This includes refreshes, which show up as faked 302 redirections at the
+        moment.
+
+        """
+        self._set_handler("_debug_redirect", handle)
+    def set_debug_responses(self, handle):
+        """Print HTTP response bodies."""
+        self._set_handler("_debug_response_body", handle)
+    def set_debug_http(self, handle):
+        """Print HTTP headers."""
+        level = int(bool(handle))
+        for scheme in "http", "https":
+            h = self._ua_handlers.get(scheme)
+            if h is not None:
+                h.set_http_debuglevel(level)
+
+    def _set_handler(self, name, handle=None, obj=None):
+        if handle is None:
+            handle = obj is not None
+        if handle:
+            handler_class = self.handler_classes[name]
+            if obj is not None:
+                newhandler = handler_class(obj)
+            else:
+                newhandler = handler_class()
+        else:
+            newhandler = None
+        self._replace_handler(name, newhandler)
+
+    # XXXX I'd *really* rather get rid of this and just rebuild every time.
+    #  This is fragile to base class changes, and hard to understand.
+    #  Have to make sure there's no state directly stored in handlers, though,
+    #  and have appropriate methods for adding state back to the cookie etc.
+    #  handlers known to this class (only the ones in urllib2 / ClientCookie --
+    #  no need to care about other peoples' as long as it's documented that
+    #  calling the set_* methods will in general clobber handler state).
+    def _replace_handler(self, name, newhandler=None):
+        # first, if handler was previously added, remove it
+        if name is not None:
+            try:
+                handler = self._ua_handlers[name]
+            except:
+                pass
+            else:
+                for table in (
+                    [self.handle_open,
+                     self.process_request, self.process_response]+
+                    self.handle_error.values()):
+                    for handlers in table.values():
+                        remove(handlers, handler)
+                    remove(self.handlers, handler)
+        # then add the replacement, if any
+        if newhandler is not None:
+            self.add_handler(newhandler)
+            self._ua_handlers[name] = newhandler
+
+def remove(sequence, obj):
+    # for use when can't use .remove() because of obj.__cmp__ :-(
+    # (ClientCookie only requires Python 2.0, which doesn't have __lt__)
+    i = 0
+    while i < len(sequence):
+        if sequence[i] is obj:
+            del sequence[i]
+        else:
+            i += 1
+
+# XXX
+# This is urllib2.Request with a new .set_method() method,
+# for HTTP HEAD / PUT -- move into ClientCookie if/when need it.
+# Maybe it should have a constructor arg, too.
+## class Request:
+
+##     def __init__(self, url, data=None, headers={}):
+##         # unwrap('<URL:type://host/path>') --> 'type://host/path'
+##         self.__original = unwrap(url)
+##         self.type = None
+##         # self.__r_type is what's left after doing the splittype
+##         self.host = None
+##         self.port = None
+##         self.data = data
+##         self.headers = {}
+##         for key, value in headers.items():
+##             self.add_header(key, value)
+##         if data is None:
+##             self._method = "GET"
+##         else:
+##             self._method = "POST"
+
+##     def __getattr__(self, attr):
+##         # XXX this is a fallback mechanism to guard against these
+##         # methods getting called in a non-standard order.  this may be
+##         # too complicated and/or unnecessary.
+##         # XXX should the __r_XXX attributes be public?
+##         if attr[:12] == '_Request__r_':
+##             name = attr[12:]
+##             if hasattr(Request, 'get_' + name):
+##                 getattr(self, 'get_' + name)()
+##                 return getattr(self, attr)
+##         raise AttributeError, attr
+
+##     def get_method(self):
+##         return self._method
+
+##     def set_method(self, method):
+##         if method == "POST":
+##             if data is None:
+##                 data = ""
+##         else:
+##             self.data = None
+##         self._method == method
+
+##     def add_data(self, data):
+##         self.data = data
+
+##     def has_data(self):
+##         return self.data is not None
+
+##     def get_data(self):
+##         return self.data
+
+##     def get_full_url(self):
+##         return self.__original
+
+##     def get_type(self):
+##         if self.type is None:
+##             self.type, self.__r_type = splittype(self.__original)
+##             if self.type is None:
+##                 raise ValueError, "unknown url type: %s" % self.__original
+##         return self.type
+
+##     def get_host(self):
+##         if self.host is None:
+##             self.host, self.__r_host = splithost(self.__r_type)
+##             if self.host:
+##                 self.host = unquote(self.host)
+##         return self.host
+
+##     def get_selector(self):
+##         return self.__r_host
+
+##     def set_proxy(self, host, type):
+##         self.host, self.type = host, type
+##         self.__r_host = self.__original
+
+##     def add_header(self, key, val):
+##         # useful for something like authentication
+##         self.headers[key.capitalize()] = val
+
+
+## def http_get(fullurl, ranges=None, conditions=None):
+##     """HTTP GET, with convenient partial fetches (ranges).
+
+##     XXX conditional fetches?
+
+##     ranges: sequence of pairs of byte ranges (start, end) to fetch;
+
+##     Ranges follow the usual Python rules (the start byte is included,
+##     the end byte is not; negative numbers count back from the end of
+##     the entity; start None means start of entity; end None means end of
+##     entity).  There are restrictions, though: end must not be negative,
+##     and if start is negative, end must be None.
+
+##     >>> http_get("http://www.example.com/big.dat",
+##                  [(0, 10), (-10, None)])  # first and last 10 bytes
+##     >>> http_get("http://www.example.com/big.dat",
+##                  [(50000, None)])  # from byte 50000 to the end
+
+##     """
+##     if conditions: raise NotImplementedError("conditions not yet implemented")
+##     req = self._request(fullurl, data)
+##     assert req.get_type() == "http", "http_get for non-HTTP URI"
+##     rs = []
+##     for start, end in ranges:
+##         if start < 0:
+##             assert end is None, "invalid range"
+##             start = ""
+##         else:
+##             assert 0 <= start <= end, "invalid range"
+##             if start == end: continue
+##             end = end - 1
+##         rs.append("%s-%s" % range)
+##     req.add_header(("Range", "bytes=" % string.join(rs, ", ")))
+##     return self.open(req)
+
+## def http_head(self, fullurl):
+##     raise NotImplementedError()  # XXX
+
+## def http_put(self, fullurl, data=None):
+##     # XXX what about 30x handling?
+##     raise NotImplementedError()  # XXX


Property changes on: Zope3/branches/testbrowser-integration/src/zope/testbrowser/mechanize/_useragent.py
___________________________________________________________________
Name: svn:keywords
   + Id
Name: svn:eol-style
   + native

Modified: Zope3/branches/testbrowser-integration/src/zope/testbrowser/testing.py
===================================================================
--- Zope3/branches/testbrowser-integration/src/zope/testbrowser/testing.py	2005-08-25 16:33:25 UTC (rev 38089)
+++ Zope3/branches/testbrowser-integration/src/zope/testbrowser/testing.py	2005-08-25 17:05:52 UTC (rev 38090)
@@ -64,7 +64,6 @@
         # ``HTTPCaller`` wants. 
         request_string = (method + ' ' + url + ' HTTP/1.1\n'
                           + headers + '\n' + body)
-
         self.response = self.caller(request_string, handle_errors)
 
     def getresponse(self):



More information about the Zope3-Checkins mailing list