"""
Common code for the sgmllib, htmllib and xmllib parser drivers.
$Id$
"""
from xml.sax import saxlib,saxutils
import urllib2
# --- LibParser
class LibParser(saxlib.Parser,saxlib.Locator):
"Common code for the sgmllib, htmllib and xmllib parser drivers."
def __init__(self):
saxlib.Parser.__init__(self)
def parse(self,sysID):
"Parses the referenced document."
self.sysID=sysID
self.parseFile(urllib2.urlopen(sysID))
def parseFile(self,fileobj):
"Parses the given file."
if self._can_locate():
self.doc_handler.setDocumentLocator(self)
self.reset()
while 1:
buf=fileobj.read(16384)
if buf=="": break
try:
self.feed(buf)
except RuntimeError,e:
self.err_handler.fatalError(saxlib.SAXException(str(e),e))
self.close()
def unknown_endtag(self,tag):
"Handles end tags."
self.doc_handler.endElement(tag)
def handle_xml(self,encoding,standalone):
"Remembers whether the document is standalone."
self.standalone= standalone=="yes"
def handle_data(self,data):
"Handles PCDATA."
self.doc_handler.characters(data,0,len(data))
def handle_cdata(self,data):
"Handles CDATA marked sections."
self.doc_handler.characters(data,0,len(data))
def syntax_error(self, message):
"Handles fatal errors."
if self._can_locate():
self.err_handler.fatalError(saxlib.SAXParseException(message,None,
self))
else:
self.err_handler.fatalError(saxlib.SAXException(message,None))
# --- SGMLParsers
class SGMLParsers(LibParser):
"Common code for the sgmllib and htmllib parsers."
def handle_pi(self,data):
"Handles processing instructions."
# Should we try to parse out the name if there is one?
self.doc_handler.processingInstruction("",data)
def handle_starttag(self,tag,method,attributes):
self.unknown_starttag(tag,attributes)
def unknown_starttag(self,tag,attributes):
"Handles start tags."
attrs={}
for (a,v) in attributes:
attrs[a]=v
self.doc_handler.startElement(tag,saxutils.AttributeMap(attrs))
def handle_endtag(self,tag,method):
"Handles end tags."
self.doc_handler.endElement(tag)
def unknown_entityref(self,name):
"Handles entity references by throwing an error."
self.err_handler.fatalError(saxlib.SAXException("Reference to unknown entity "
"'%s'" % name,None))
def unknown_charref(self,no):
"Handles non-ASCII character references."
self.err_handler.fatalError(saxlib.SAXException("Reference to unknown character '%d'" % no,None))
def handle_data(self,data):
"Handles character data in element content."
self.doc_handler.characters(data,0,len(data))
def report_unbalanced(self,gi):
"Reports unbalanced tags."
self.err_handler.fatalError(saxlib.SAXException("Unbalanced end tag for '%s'" % gi,None))
def _can_locate(self):
"Internal: returns true if location info is available."
return 0