# # Copyright (c) 2004 The Regents of the University of California. # All rights reserved. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions # are met: # 1. Redistributions of source code must retain the above copyright # notice, this list of conditions, and the following disclaimer. # 2. Redistributions in binary form must reproduce the above # copyright notice, this list of conditions, and the following # disclaimer in the documentation and/or other materials provided # with the distribution. # 3. Redistributions must acknowledge that this software was # originally developed by the UCSF Computer Graphics Laboratory # under support by the NIH National Center for Research Resources, # grant P41-RR01081. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER "AS IS" AND ANY # EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR # PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS BE LIABLE # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR # CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT # OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR # BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, # WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE # OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, # EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. # """ A general-purpose converter between XML objects and Python objects. """ from string import * import xml.parsers.expat from xml.sax.saxutils import escape from types import ListType def _ensureList(x): "Ensure that objects are loaded into lists." if not x: return [] if type(x) != ListType: return [x] return x def _indent(level): "Returns the indentation for a given depth level." return " " * level class _TypeInfo: "A handy class to store callback information about class types." def __init__(self, constructor, super=None): if super: try: sample = constructor(super) except: sample = constructor() else: sample = constructor() self.name = sample.XMLName() self.fieldDict = sample.XMLFields() self.attrDict = sample.XMLAttributes() self.embedDict = sample.XMLEmbedding() self.constructor = constructor self.free_float = not self.fieldDict class Multiloader: """ Multiloader: The General XML Object Loader & Saver Multiloader provides the code required to load objects from XML data files, and write them back into the data files after you're done processing them. These objects need to have certain callbacks, which multiloader uses to identify them and how they are to be structured inside the XML file. Object Format All objects that are read in or out using the Multiloader *must* have the following callback functions present: - 'XMLName()' -- Returns the XML name of the object. - 'XMLFields()' -- Returns a dictionary mapping data members of the object into their associated XML element names. - 'XMLAttributes()' -- Returns a dictionary mapping XML element names into lists of tuples. Each tuple represents an attribute that the related element can be given. Each tuple has as its first element the XML name of the attribute, and as its second element the name of the data member of the object where it can store and look up the attribute data. - 'XMLEmbedding()' -- Returns a dictionary mapping XML object names to constructors, for objects that can be embedded within the object. Each class used by Multiloader, including classes embedded inside other classes through 'XMLEmbedding()', must have all four of these callback functions defined. Embedded objects will be passed a reference to their superobject as the 'super' parameter of their constructors; if this fails, they will be constructed using the default constructors. Furthermore, *all objects used by Multiloader must have empty (or keyword only) constructors*. Loading Once you have created a Multiloader instance, you must load in the data, using the 'load()' function. Once this has been done, you can retrieve the data either by using 'getObjects()', by using 'getLookupTable()', or by doing a search with 'lookup()'. After each object is loaded in, its 'finish()' method will be called, if available. This method should do any type conversions or special gimmicks that need to be done once an object is loaded. For example, numeric data members will probably be loaded in as strings, so this function should convert them back to numbers. Saving In addition to reading data in using a Multiloader, you can also write data out. There are two functions that do this: 'save()' and 'saveList()'. One takes a structured dictionary as a parameter (just like the one returned by 'getObjects()') and the other takes a list of objects. Remember that all of the objects to be written out must have the four callbacks mentioned above defined. If they don't, Multiloader will have no idea how to convert them to XML. Known Issues 1 When determining the information about class types using '_TypeInfo', a sample instance of each class is created, which may be undesirable, particularly if the constructor for the class does anything important, or if something is keeping track of all the instances of the class that are constructed. 2 Won't write out '' or '' for blank elements; instead, does '' and '', which is valid XML but doesn't look as cool. """ # # The next three functions implement the callbacks necessary for the # XML parser. "startElement" is called whenever the parser sees the # beginning of an element (e.g. ), endElement is called whenever # the parser sees the end element (e.g. ), and charData is called # when the parser sees character data. # def _startElement(self, name, attrs): self._curChar = "" # Are we the document element? if not self._docElement: self._docElement = name return # Are we at the start of a top object (not a subobject)? if name in self._objTypes and not self._curObject: # Yes. Construct the object. self._curObject = self._objTypes[name].constructor() self._objInfo = self._objTypes[name] elif self._curObject and name in self._objInfo.embedDict: # No. Instead, we are at the start of a subobject. # Stack the object and construct the subobject, passing it a reference to super. try: newObject = self._objInfo.embedDict[name](super=self._curObject) except: # Whoops! Looks like they don't want a reference to the superobject. newObject = self._objInfo.embedDict[name]() self._stack.append( (self._curObject, self._objInfo) ) self._objInfo = _TypeInfo(self._objInfo.embedDict[name], self._curObject) self._curObject = newObject # Properly store the attributes if self._objInfo and name in self._objInfo.attrDict: for pair in self._objInfo.attrDict[name]: if pair[0] in attrs: setattr(self._curObject, pair[1], attrs[pair[0]]) def _endElement(self, name): # Did we just finish reading the document? if name == self._docElement: # Woohoo! Nothing more to do here. return # Did we just finish reading an object? if name in self._objTypes: # Yes, a top level object. # Fix up the elements (in terms of being lists or not) for key in self._objInfo.fieldDict.keys(): elem = None if hasattr(self._curObject, self._objInfo.fieldDict[key]): elem = getattr(self._curObject, self._objInfo.fieldDict[key]) if not elem: elem = "" elif type(elem) == ListType and len(elem) == 1: elem = elem[0] setattr(self._curObject, self._objInfo.fieldDict[key], elem) # Finish the top level object, and await another. self._objects[name].append(self._curObject) if hasattr(self._curObject, "finish"): self._curObject.finish() if self._lookupField and hasattr(self._curObject, self._lookupField): if getattr(self._curObject, self._lookupField) in self._lookup[name]: self._lookup[name][getattr(self._curObject, self._lookupField)].append(self._curObject) else: self._lookup[name][getattr(self._curObject, self._lookupField)] = [self._curObject] self._curObject = None self._objInfo = None elif self._stack and name in self._stack[-1][1].embedDict: if self._objInfo.free_float: setattr(self._curObject, "_cdata", self._curChar) # Yes, a subobject. # Fix up the elements (in terms of being lists or not) for key in self._objInfo.fieldDict.keys(): elem = None if hasattr(self._curObject, self._objInfo.fieldDict[key]): elem = getattr(self._curObject, self._objInfo.fieldDict[key]) if not elem: elem = "" elif type(elem) == ListType and len(elem) == 1: elem = elem[0] setattr(self._curObject, self._objInfo.fieldDict[key], elem) # Attach this object to its superobject, and return to the superobject. object, self._objInfo = self._stack.pop() if name not in self._objInfo.fieldDict: raise Exception("You forgot to give me an XMLField to store the embedded object in!") elem = [] if not hasattr(object, self._objInfo.fieldDict[name]): elem.append(self._curObject) elif len(getattr(object, self._objInfo.fieldDict[name])) == 0: elem.append(self._curObject) else: elem = _ensureList(getattr(object, self._objInfo.fieldDict[name])) elem.append(self._curObject) if hasattr(self._curObject, "finish"): self._curObject.finish() setattr(object, self._objInfo.fieldDict[name], elem) self._curObject = object elif not self._curObject: # A top level object/element that we don't recognize? Gadzooks! # Well, since this isn't a verifying parser, just ignore it. pass elif not name in self._objInfo.fieldDict: # An object with an element that we were never told about? Gadzooks! # Well, since this isn't a verifying parser, just ignore it. pass else: # No, just an element. elem = [] if not hasattr(self._curObject, self._objInfo.fieldDict[name]): elem.append(self._curChar) elif not getattr(self._curObject, self._objInfo.fieldDict[name]): elem.append(self._curChar) else: elem = _ensureList(getattr(self._curObject, self._objInfo.fieldDict[name])) elem.append(self._curChar) setattr(self._curObject, self._objInfo.fieldDict[name], elem) def _charData(self, data): self._curChar += data def load(self, file, objectConstructors, lookupField=None): """Load the objects from a file. This function instructs the Multiloader to load in the objects from the specified XML file. This function must be passed the filename of the XML data file and a list of constructors for the Python objects. It can also be passed a particular field to build a search dictionary against. This 'lookupField' should be the name of the Python data member, not the XML element name.""" self._lookupField = lookupField self._docElement = '' self._objects = {} self._lookup = {} self._objTypes = {} # For each constructor, discover the information # about that type of object. (XML structure, etc) for constructor in objectConstructors: sample = constructor() objName = sample.XMLName() self._objTypes[objName] = _TypeInfo(constructor) self._lookup[objName] = {} self._objects[objName] = [] # Set the current object to be empty self._curObject = None self._objInfo = None # The object stack, for recursion into embedded objects self._stack = [] # Create parser self.parser = xml.parsers.expat.ParserCreate() # Register callbacks self.parser.StartElementHandler = self._startElement self.parser.EndElementHandler = self._endElement self.parser.CharacterDataHandler = self._charData # Parse self.parser.ParseFile(open(file)) def loadString(self, data, objectConstructors, lookupField=None): """Load the objects from an XML data string. This function instructs the Multiloader to load in the objects from a string storing XML data. Aside from the filename being replaced with the data string, the parameters for this function are the same as those for the regular 'load()' function.""" self._lookupField = lookupField self._docElement = '' self._objects = {} self._lookup = {} self._objTypes = {} # For each constructor, discover the information # about that type of object. (XML structure, etc) for constructor in objectConstructors: sample = constructor() objName = sample.XMLName() self._objTypes[objName] = _TypeInfo(constructor) self._lookup[objName] = {} self._objects[objName] = [] # Set the current object to be empty self._curObject = None self._objInfo = None # The object stack, for recursion into embedded objects self._stack = [] # Create parser self.parser = xml.parsers.expat.ParserCreate() # Register callbacks self.parser.StartElementHandler = self._startElement self.parser.EndElementHandler = self._endElement self.parser.CharacterDataHandler = self._charData # Parse self.parser.Parse(data, 1) def getLookupTable(self): """Get the table used for searches. This function returns the table created for lookups. This table is represented as a dictionary indexed by XML object name; each XML object name is mapped to another dictionary, which maps the element data stored in the object data member 'lookupField' (passed to the Multiloader upon loading) to the objects that contains that exact element data.""" return self._lookup def lookup(self, fieldEntry, inClass=None): """Search through the loaded Python objects. This function searches through the loaded objects for entries that have a particular entry (fieldEntry) in a particular Python data member (the member specified as lookupField to the load function). It returns a dictionary indexed by XML object names, each mapping to a list of objects which have the data 'fieldEntry' in their data member 'lookupField'. When specifying 'inClass' to be the name of a particular class, the function will merely return a list of objects that have the matching entry.""" try: if inClass: return self._lookup[inClass][fieldEntry] else: results = {} for inClass in self._lookup: if fieldEntry in self._lookup[inClass]: results.update({inClass:self._lookup[inClass][fieldEntry]}) return results except: return None def getObjects(self): """Get the loaded Python objects. This function returns a dictionary of lists, indexed by the XML name of the objects inside them. Each list contains all of the objects of that type that were read in by the Multiloader on the top level. Embedded objects are stored properly inside these top level objects.""" return self._objects def save(self, filename, dict): """Save a structured dictionary of Python objects into an XML file. This function converts a structured dictionary of objects into XML, and writes them out to the specified file. The dictionary must be in the same format as the one returned by the getObjects() function. The objects must have the proper callbacks. *Note: This function essentially turns the dictionary into a list and calls saveList(), so if you've got a list you should use saveList() rather than doing any work to structure it.* """ objects = [] for name in dict: objects += dict[name] self.saveList(filename, objects) def saveList(self, filename, objects): """Save a list of Python objects into an XML file. This function converts an arbitrary list of objects into XML, and writes them out to the specified filename. These objects must have the proper callbacks.""" # Open the file outFile = open(filename, "w") # Write out the starting tags outFile.write('\n') outFile.write('\n') # Iterate over the list of objects, recursively outputing each object for obj in objects: self._saveObj(outFile, obj, 1) # Write out the closing tab outFile.write('\n') # Close the file outFile.close def _saveObj(self, outFile, obj, level): # Get all of the lookup fields directly from the object rather # than our stored values. This will be helpful when we recurse to # get encapsulated objects. # Get the top-level object name XMLName = obj.XMLName() # Get the XML Structures fieldDict = obj.XMLFields() attrDict = obj.XMLAttributes() # Get any embedded structures embedList = obj.XMLEmbedding() # Prepare the list of attributes attrs = '' if XMLName in attrDict: for pair in attrDict[XMLName]: if hasattr(obj, pair[1]): attrs += ' ' + pair[0] + '="' + escape(unicode(getattr(obj, pair[1]))) + '"' # Output the top level tag line = _indent(level)+'<'+XMLName+attrs+'>' outFile.write(line.encode('utf-8', 'replace')) if not fieldDict and hasattr(obj, "_cdata"): line = strip(escape(unicode(getattr(obj, "_cdata")))) outFile.write(line.encode('utf-8', 'replace')) outFile.write ('\n') return outFile.write('\n') # Output all of the fields (recursing when necessary) for key in fieldDict.keys(): if hasattr(obj, fieldDict[key]): field = getattr(obj, fieldDict[key]) if type(field) is type([]): for f in field: self._saveElem(outFile, level, key, f, embedList, obj, attrDict) else: self._saveElem(outFile, level, key, field, embedList, obj, attrDict) # Close the top level outFile.write (_indent(level)+'\n') def _saveElem(self, outFile, level, key, field, embedList, obj, attrDict): # is it really an embedded object that needs to be written out in its entirety? if key in embedList: if field: self._saveObj(outFile, field, level+1) else: attrs = '' if key in attrDict: for pair in attrDict[key]: if hasattr(obj, pair[1]): attrs += ' ' + pair[0] + '="' + escape(getattr(obj, pair[1])) + '"' if not field: field = '' field = escape(unicode(field)) line = '%s<%s%s>%s\n' % (_indent(level+1), key, attrs, field, key) outFile.write(line.encode('utf-8', 'replace'))