[Zope3-checkins] CVS: Zope3/src/zope/app/xml - w3cschemalocations.py:1.1

Martijn Faassen m.faassen@vet.uu.nl
Thu, 10 Apr 2003 06:33:32 -0400


Update of /cvs-repository/Zope3/src/zope/app/xml
In directory cvs.zope.org:/tmp/cvs-serv28331

Added Files:
	w3cschemalocations.py 
Log Message:
Added a module that can extract schema locations of W3C XML Schemas
from XML documents.  


=== Added File Zope3/src/zope/app/xml/w3cschemalocations.py ===
##############################################################################
#
# Copyright (c) 2001, 2002 Zope Corporation and Contributors.
# All Rights Reserved.
#
# This software is subject to the provisions of the Zope Public License,
# Version 2.0 (ZPL).  A copy of the ZPL should accompany this distribution.
# THIS SOFTWARE IS PROVIDED "AS IS" AND ANY AND ALL EXPRESS OR IMPLIED
# WARRANTIES ARE DISCLAIMED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
# WARRANTIES OF TITLE, MERCHANTABILITY, AGAINST INFRINGEMENT, AND FITNESS
# FOR A PARTICULAR PURPOSE.
#
##############################################################################
"""
This module contains a few utilities to extract information from XML text.

$Id: w3cschemalocations.py,v 1.1 2003/04/10 10:33:29 faassen Exp $
"""
from xml.parsers.expat import ParserCreate, ExpatError

def getW3CXMLSchemaLocations(xml):
    """Give list of URIs of the schema an XML document promises to implement.

    These are specified in the xsi:schemaLocation attribute of the document
    element.
    """
    parser = W3CXMLSchemaLocationParser(xml)
    parser.parse()
    return parser.getSchemaLocations()

class DoneParsing(Exception):
    pass

class W3CXMLSchemaLocationParser:

    SCHEMA_INSTANCE_NAMESPACE = 'http://www.w3.org/2001/XMLSchema-instance'

    def __init__(self, xml):
        self._xml = xml
        self._schema_uris = []
        self._parser = ParserCreate(namespace_separator=" ")
        self._parser.StartElementHandler = self.startElement
        
    def startElement(self, name, attrs):
        for key, value in attrs.items():
            try:
                namespace_uri, name = key.split(' ')
            except ValueError:
                namespace_uri = None
                name = key
            if (namespace_uri == self.SCHEMA_INSTANCE_NAMESPACE and
                name == 'schemaLocation'):
                self._schema_uris = value.strip().split()
        # abort parsing after the first element, which is the document element
        # raising an error seems to be a legitimate way to do this
        raise DoneParsing

    def parse(self):
        try:
            self._parser.Parse(self._xml, True)
        except ExpatError, e:
            # we do not take any special pains to make sure this is
            # well-formed anyway; this should happen at a higher level
            # (views) or will be detected at a lower layer (parsing into
            # a DOM or SAX events) anyway. 
            pass 
        except DoneParsing:
            pass
        
    def getSchemaLocations(self):
        return self._schema_uris