[Zope3-checkins] CVS: Zope3/src/zope/app/dublincore - dcterms.py:1.1 xmlmetadata.py:1.1

Fred L. Drake, Jr. fred at zope.com
Wed Aug 20 18:25:15 EDT 2003


Update of /cvs-repository/Zope3/src/zope/app/dublincore
In directory cvs.zope.org:/tmp/cvs-serv31612

Added Files:
	dcterms.py xmlmetadata.py 
Log Message:
New Dublin Core serializer.  Not quite ready to use with filesystem
synchronization, but much of it is done.


=== Added File Zope3/src/zope/app/dublincore/dcterms.py ===
##############################################################################
#
# Copyright (c) 2003 Zope Corporation and Contributors.
# All Rights Reserved.
#
# This software is subject to the provisions of the Zope Public License,
# Version 2.0 (ZPL).  A copy of the ZPL should accompany this distribution.
# THIS SOFTWARE IS PROVIDED "AS IS" AND ANY AND ALL EXPRESS OR IMPLIED
# WARRANTIES ARE DISCLAIMED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
# WARRANTIES OF TITLE, MERCHANTABILITY, AGAINST INFRINGEMENT, AND FITNESS
# FOR A PARTICULAR PURPOSE.
#
##############################################################################
"""Support information for qualified Dublin Core metadata.

$Id: dcterms.py,v 1.1 2003/08/20 21:25:15 fdrake Exp $
"""

# useful namespace URIs
DC_NS = "http://purl.org/dc/elements/1.1/"
DCTERMS_NS = "http://purl.org/dc/terms/"
XSI_NS = "http://www.w3.org/2001/XMLSchema-instance"


def splitEncoding(name):
    if "." not in name:
        return name, None
    parts = name.split(".")
    if parts[-1] in encodings:
        if len(parts) == 2:
            return parts
        else:
            return ".".join(parts[:-1]), parts[-1]
    else:
        return name, None


# The type validator function must raise an exception if the value
# passed isn't valid for the type being check, other just return.

_dcmitypes = {}
for x in ("Collection Dataset Event Image InteractiveResource"
          " Service Software Sound Text PhysicalObject").split():
    _dcmitypes[x.lower()] = x
del x

def check_dcmitype(value):
    if value.lower() not in _dcmitypes:
        raise ValueError("%r not a valid DCMIType")

def check_imt(value):
    pass

def check_iso639_2(value):
    pass

def check_rfc1766(value):
    pass

def check_uri(value):
    pass

def check_point(value):
    pass

def check_iso3166(value):
    pass

def check_box(value):
    pass

def check_tgn(value):
    pass

def check_period(value):
    pass

def check_w3cdtf(value):
    pass

def check_rfc3066(value):
    pass

encodings = {
    # name --> (allowed for, validator|None),
    "LCSH":     (("Subject",), None),
    "MESH":     (("Subject",), None),
    "DDC":      (("Subject",), None),
    "LCC":      (("Subject",), None),
    "UDC":      (("Subject",), None),
    "DCMIType": (("Type",), check_dcmitype),
    "IMT":      (("Format",), check_imt),
    "ISO639-2": (("Language",), check_iso639_2),
    "RFC1766":  (("Language",), check_rfc1766),
    "URI":      (("Identifier", "Relation", "Source",), check_uri),
    "Point":    (("Coverage.Spatial",), check_point),
    "ISO3166":  (("Coverage.Spatial",), check_iso3166),
    "Box":      (("Coverage.Spatial",), check_box),
    "TGN":      (("Coverage.Spatial",), check_tgn),
    "Period":   (("Coverage.Temporal",), check_period),
    "W3CDTF":   (("Coverage.Temporal", "Date",), check_w3cdtf),
    "RFC3066":  (("Language",), check_rfc3066),
    }


name_to_element = {
    # unqualified DCMES 1.1
    "Title":         ("dc:title",         ""),
    "Creator":       ("dc:creator",       ""),
    "Subject":       ("dc:subject",       ""),
    "Description":   ("dc:description",   ""),
    "Publisher":     ("dc:publisher",     ""),
    "Contributor":   ("dc:contributor",   ""),
    "Date":          ("dc:date",          "dcterms:W3CDTF"),
    "Type":          ("dc:type",          ""),
    "Format":        ("dc:format",        ""),
    "Identifier":    ("dc:identifier",    ""),
    "Source":        ("dc:source",        ""),
    "Language":      ("dc:language",      ""),
    "Relation":      ("dc:relation",      ""),
    "Coverage":      ("dc:coverage",      ""),
    "Rights":        ("dc:rights",        ""),

    # qualified DCMES 1.1 (directly handled by Zope)
    "Date.Created":  ("dcterms:created",  "dcterms:W3CDTF"),
    "Date.Modified": ("dcterms:modified", "dcterms:W3CDTF"),

    # qualified DCMES 1.1 (not used by Zope)
    "Audience":                      ("dcterms:audience", ""),
    "Audience.Education Level":      ("dcterms:educationLevel", ""),
    "Audience.Mediator":             ("dcterms:mediator", ""),
    "Coverage.Spatial":              ("dcterms:spatial", ""),
    "Coverage.Temporal":             ("dcterms:temporal", ""),
    "Date.Accepted":                 ("dcterms:accepted", "dcterms:W3CDTF"),
    "Date.Available":                ("dcterms:available", "dcterms:W3CDTF"),
    "Date.Copyrighted":              ("dcterms:copyrighted", "dcterms:W3CDTF"),
    "Date.Issued":                   ("dcterms:issued", "dcterms:W3CDTF"),
    "Date.Submitted":                ("dcterms:submitted", "dcterms:W3CDTF"),
    "Date.Valid":                    ("dcterms:valid", "dcterms:W3CDTF"),
    "Description.Abstract":          ("dcterms:abstract", ""),
    "Description.Table Of Contents": ("dcterms:tableOfContents", ""),
    "Format":                        ("dc:format", ""),
    "Format.Extent":                 ("dcterms:extent", ""),
    "Format.Medium":                 ("dcterms:medium", ""),
    "Identifier.Bibliographic Citation": ("dcterms:bibliographicCitation", ""),
    "Relation.Is Version Of":        ("dcterms:isVersionOf", ""),
    "Relation.Has Version":          ("dcterms:hasVersion", ""),
    "Relation.Is Replaced By":       ("dcterms:isReplacedBy", ""),
    "Relation.Replaces":             ("dcterms:replaces", ""),
    "Relation.Is Required By":       ("dcterms:isRequiredBy", ""),
    "Relation.Requires":             ("dcterms:requires", ""),
    "Relation.Is Part Of":           ("dcterms:isPartOf", ""),
    "Relation.Has Part":             ("dcterms:hasPart", ""),
    "Relation.Is Referenced By":     ("dcterms:isReferencedBy", ""),
    "Relation.References":           ("dcterms:references", ""),
    "Relation.Is Format Of":         ("dcterms:isFormatOf", ""),
    "Relation.Has Format":           ("dcterms:hasFormat", ""),
    "Relation.Conforms To":          ("dcterms:conformsTo", ""),
    "Rights.Access Rights":          ("dcterms:accessRights", ""),
    "Title.Alternative":             ("dcterms:alternative", ""),
    }

_prefix_to_ns = {
    "dc": DC_NS,
    "dcterms": DCTERMS_NS,
    # "xsi": XSI_NS,    dont' use this for element names, only attrs
    }

element_to_name = {}
for name, (qname, unused) in name_to_element.iteritems():
    prefix, localname = qname.split(":")
    elem_name = _prefix_to_ns[prefix], localname
    element_to_name[elem_name] = name


=== Added File Zope3/src/zope/app/dublincore/xmlmetadata.py ===
##############################################################################
#
# Copyright (c) 2003 Zope Corporation and Contributors.
# All Rights Reserved.
#
# This software is subject to the provisions of the Zope Public License,
# Version 2.0 (ZPL).  A copy of the ZPL should accompany this distribution.
# THIS SOFTWARE IS PROVIDED "AS IS" AND ANY AND ALL EXPRESS OR IMPLIED
# WARRANTIES ARE DISCLAIMED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
# WARRANTIES OF TITLE, MERCHANTABILITY, AGAINST INFRINGEMENT, AND FITNESS
# FOR A PARTICULAR PURPOSE.
#
##############################################################################
"""XXX short summary goes here.

XXX longer description goes here.

$Id: xmlmetadata.py,v 1.1 2003/08/20 21:25:15 fdrake Exp $
"""

import xml.sax
import xml.sax.handler

from cStringIO import StringIO
from xml.sax.saxutils import quoteattr

from zope.app.dublincore import dcterms


XSI_TYPE = (dcterms.XSI_NS, "type")

dublin_core_namespaces = dcterms.DC_NS, dcterms.DCTERMS_NS


def dumpString(mapping):
    sio = StringIO()
    sio.write("<?xml version='1.0' encoding='utf-8'?>\n"
              "<metadata\n"
              "  xmlns:dc='http://purl.org/dc/elements/1.1/'\n"
              "  xmlns:dcterms='http://purl.org/dc/terms/'\n"
              "  xmlns:xsi='http://www.w3.org/2001/XMLSchema-instance'>\n")
    items = mapping.items()
    items.sort()
    prev = None
    for name, values in items:
        name, type = dcterms.splitEncoding(name)
        group = name.split(".", 1)[0]
        if prev != group:
            sio.write("\n")
            prev = group
        if name in name_to_element:
            element, t = name_to_element[name]
            if not type:
                type = t
            if type:
                type = " xsi:type=" + quoteattr(type)
            for value in values:
                sio.write("  <%s%s>\n    %s\n  </%s>\n"
                          % (element, type, value, element))
        else:
            raise RuntimeError("could not serialize %r metadata element"
                               % name)
    sio.write("</metadata>\n")
    return sio.getvalue()


def parse(source, error_handler=None):
    parser, ch = _setup_parser(error_handler)
    parser.parse(source)
    return ch.mapping

def parseString(text, error_handler=None):
    parser, ch = _setup_parser(error_handler)
    parser.feed(text)
    parser.close()
    return ch.mapping

def _setup_parser(error_handler):
    parser = xml.sax.make_parser()
    ch = DublinCoreHandler()
    parser.setFeature(xml.sax.handler.feature_namespaces, True)
    parser.setContentHandler(ch)
    if error_handler is not None:
        parser.setErrorHandler(error_handler)
    return parser, ch


class PrefixManager:
    # We don't use this other than in the DublinCoreHandler, but it's
    # entirely general so we'll separate it out for now.

    """General handler for namespace prefixes.

    This should be used as a mix-in when creating a ContentHandler.
    """

    __prefix_map = None

    def startPrefixMapping(self, prefix, uri):
        if self.__prefix_map is None:
            self.__prefix_map = {}
        pm = self.__prefix_map
        pm.setdefault(prefix, []).append(uri)

    def endPrefixMapping(self, prefix):
        pm = self.__prefix_map
        uris = pm[prefix]
        del uris[-1]
        if not uris:
            del pm[prefix]

    def get_uri(self, prefix):
        pm = self.__prefix_map
        if pm is None:
            return None
        if prefix in pm:
            return pm[prefix][-1]
        else:
            return None


class DublinCoreHandler(PrefixManager, xml.sax.handler.ContentHandler):

    def startDocument(self):
        self.mapping = {}
        self.stack = []

    def get_dc_container(self):
        name = None
        for (uri, localname), dcelem, validator in self.stack:
            if uri in dublin_core_namespaces:
                name = uri, localname
        if name in dcterms.element_to_name:
            # dcelem contains type info, so go back to the mapping
            return dcterms.element_to_name[name]
        else:
            return None

    def startElementNS(self, name, qname, attrs):
        self.buffer = u""
        # XXX need convert element to metadata element name
        dcelem = validator = None
        if name in dcterms.element_to_name:
            dcelem = dcterms.element_to_name[name]
        type = attrs.get(XSI_TYPE)
        if type:
            if not dcelem:
                raise ValueError(
                    "data type specified for unknown metadata element: %s"
                    % qname)
            if ":" in type:
                prefix, t = type.split(":", 1)
                ns = self.get_uri(prefix)
                if ns != dcterms.DCTERMS_NS:
                    raise ValueError("unknown data type namespace: %s" % t)
                type = t
            if type not in dcterms.encodings:
                raise ValueError("unknown data type: %r" % type)
            allowed_in, validator = dcterms.encodings[type]
            if dcelem not in allowed_in:
                raise ValueError("%s values are not allowed for %r"
                                 % (type, dcelem))
            dcelem = "%s.%s" % (dcelem, type)
        if dcelem:
            cont = self.get_dc_container()
            if cont and cont != dcelem:
                prefix = cont + "."
                if not dcelem.startswith(prefix):
                    raise ValueError("%s is not a valid refinement for %s"
                                     % (dcelem, cont))
        self.stack.append((name, dcelem, validator))

    def endElementNS(self, name, qname):
        startname, dcelem, validator = self.stack.pop()
        assert startname == name
        if self.buffer is None:
            return
        data = self.buffer.strip()
        self.buffer = None
        if not dcelem:
            return
        if validator is not None:
            validator(data)
        if dcelem in self.mapping:
            self.mapping[dcelem] += (data,)
        else:
            self.mapping[dcelem] = (data,)

    def characters(self, data):
        if self.buffer is not None:
            self.buffer += data




More information about the Zope3-Checkins mailing list