[Zope-CVS] CVS: Packages/TestScripts - xml2dict.py:1.1

Chris Withers chrisw@nipltd.com
Thu, 7 Mar 2002 09:45:55 -0500


Update of /cvs-repository/Packages/TestScripts
In directory cvs.zope.org:/tmp/cvs-serv18090

Added Files:
	xml2dict.py 
Log Message:
handy module for parsing XML into a nested structure that smells a bit like a dictionary of dictionaries.
Very useful for parsing xml config files ;-)

=== Added File Packages/TestScripts/xml2dict.py ===
#############################################################
# XML handling
#
# - aDictionary = xml2dict(somexml)
#
#############################################################
from StringIO import StringIO
from xml.sax import make_parser
from xml.sax.handler import feature_namespaces,ContentHandler

class XML2DictError(Exception):
    pass

_marker = []

class Node:

    def __init__(self,name):
        self.name = name
        self.data = ''
        self.children={}

    def __getitem__(self,name,default=_marker):
        value = self.children.get(name,default)
        if value is _marker:
            raise KeyError,name
        return value

    def __setitem__(self,name,value):
        dict = self.children
        cell = dict.get(name,())
        cell += (value,)
        dict[name]=cell

    def __delitem__(self,name):
        del self.children[name]

    def addText(self,text):
        self.data += text

    def __call__(self):
        return self.data

    def __str__(self):
        return "Node<%s,%s,%s>" % (self.name,repr(self.data),self.children)

    __repr__=__str__
    
    def __ne__(self,other):
        """for tests only..."""
        try:
            if self.data!=other.data:
                raise 'Not Equal',"(%s,%s):data: %s!=%s" % (
                    self.name,
                    other.name,
                    repr(self.data),
                    repr(other.data)
                    )
                return 1
            if self.children!=other.children:
                raise 'Not Equal',"(%s,%s):children: %s!=%s" % (
                    self.name,
                    other.name,
                    repr(self.children),
                    repr(other.children)
                    )
                raise "Not Equal"
                return 1
            return 0
        except AttributeError:
            return 1

    def __eq__(self,other):
        return not self.__ne__(other)    
        
# SAX Handler Class
class XML2DictHandler(ContentHandler):

    # --------------------------------------------------------------------------
    # Manage the data structures we build up as we process the file
    # --------------------------------------------------------------------------
    def __init__(self):
        self.openElements=[]        
    # --------------------------------------------------------------------------
    # sax processing routines
    # --------------------------------------------------------------------------
    
    def startElement(self, name, attrs):
        node = Node(name)
        elements = self.openElements
        if elements:
            elements[-1][name]=node
        elements.append(node)

    def endElement(self, name):
        elements = self.openElements
        element = elements.pop()
        if name!=element.name:
            raise XML2DictError,'</%s> found when </%s> was expected' % (name,element.name)
        element.data = element.data.encode('iso-8859-1')
        if not elements:
            self.dict = element
        
    def characters(self, someText):
        self.openElements[-1].addText(someText)       

def xml2dict(xml):
    # parse a lump of XML into a dictonary
    file=StringIO(xml)
    # Create a parser
    theParser = make_parser()    
    # Tell the parser we are not interested in XML namespaces
    theParser.setFeature(feature_namespaces, 0)    
    # Create the handler
    csh = XML2DictHandler()    
    # Tell the parser to use our handler
    theParser.setContentHandler(csh)            
    theParser.parse(file)
    return csh.dict

###############################################################################
# Tests
###############################################################################
from unittest import TestCase, main, makeSuite

simple="""
<tag1>
<tag2>tag2-value</tag2>
<tag3>tag3-value</tag3>
</tag1>
"""
repeat="""
<!-- a comment-->
<tag1>
<!-- another comment -->
<tag2>tag2-value1</tag2>
<tag2>tag2-value2</tag2>
<tag3>tag3-value</tag3>
</tag1>
"""
complex="""
<tag1>
 <tag2>
  <tag3>tag3-value1</tag3>
  <tag3>tag3-value2</tag3>
 </tag2>
 <tag2>
  <tag3>tag3-value3</tag3>
  <tag3>tag3-value4</tag3>
  <tag5>value5</tag5>
 </tag2>
</tag1>
"""
class TestXML2Dict(TestCase):

    def testNode(self):
        node = Node('test')
        node.addText('fish')
        self.assertEqual(node(),'fish')
        node.addText(' finger')
        self.assertEqual(node(),'fish finger')        
        node['child1']='child1'
        self.assertEqual(node['child1'],('child1',))
        node['child1']='child2'
        self.assertEqual(node['child1'],('child1','child2'))
        del node['child1']
        self.assertRaises(KeyError,node.__getitem__,'child1')

    def testSimple(self):
        """Test a simple one-level XML packet"""
        result = Node('tag1')
        result.data = '\n\n\n'
        tag2 = Node('tag2')
        tag2.data='tag2-value'
        result['tag2']=tag2
        tag3 = Node('tag3')
        tag3.data='tag3-value'
        result['tag3']=tag3
        self.assertEqual(xml2dict(simple),result)

    def testRepeat(self):
        """
        Test a simple one-level XML packet with tag duplicates
        Also tests comments.
        """
        result = Node('tag1')
        result.data = '\n\n\n\n\n'
        tag21 = Node('tag2')
        tag21.data='tag2-value1'
        result['tag2']=tag21
        tag22 = Node('tag2')
        tag22.data='tag2-value2'
        result['tag2']=tag22
        tag3 = Node('tag3')
        tag3.data='tag3-value'
        result['tag3']=tag3
        self.assertEqual(xml2dict(repeat),result)

    def testComplex(self):
        """
        Test a more complex packet.
        """
        result = Node('tag1')
        result.data='\n \n \n'
        
        tag2=Node('tag2')
        tag2.data='\n  \n  \n '
        tag3=Node('tag3')
        tag3.data='tag3-value1'
        tag2['tag3']=tag3
        tag3=Node('tag3')
        tag3.data='tag3-value2'
        tag2['tag3']=tag3
        result['tag2']=tag2
        
        tag2=Node('tag2')
        tag2.data='\n  \n  \n  \n '
        tag3=Node('tag3')
        tag3.data='tag3-value3'
        tag2['tag3']=tag3
        tag3=Node('tag3')
        tag3.data='tag3-value4'
        tag2['tag3']=tag3
        tag5=Node('tag5')
        tag5.data='value5'
        tag2['tag5']=tag5
        result['tag2']=tag2
        
        self.assertEqual(xml2dict(complex),result)

def test_suite():
    return makeSuite(TestXML2Dict)

if __name__=='__main__':
    main()