diff options
Diffstat (limited to 'src/webdav/qp_xml.py')
-rw-r--r-- | src/webdav/qp_xml.py | 240 |
1 files changed, 240 insertions, 0 deletions
diff --git a/src/webdav/qp_xml.py b/src/webdav/qp_xml.py new file mode 100644 index 0000000..f167e1b --- /dev/null +++ b/src/webdav/qp_xml.py @@ -0,0 +1,240 @@ +# pylint: disable-msg=W0311,E1101,E1103,W0201,C0103,W0622,W0402,W0706,R0911,W0613,W0612,R0912,W0141,C0111,C0121 + +# qp_xml: Quick Parsing for XML +# +# Written by Greg Stein. Public Domain. +# No Copyright, no Rights Reserved, and no Warranties. +# +# This module is maintained by Greg and is available as part of the XML-SIG +# distribution. This module and its changelog can be fetched at: +# http://www.lyra.org/cgi-bin/viewcvs.cgi/xml/xml/utils/qp_xml.py +# +# Additional information can be found on Greg's Python page at: +# http://www.lyra.org/greg/python/ +# +# This module was added to the XML-SIG distribution on February 14, 2000. +# As part of that distribution, it falls under the XML distribution license. +# + +import string +from xml.parsers import expat + + +error = __name__ + '.error' + + +# +# The parsing class. Instantiate and pass a string/file to .parse() +# +class Parser: + def __init__(self): + self.reset() + + def reset(self): + self.root = None + self.cur_elem = None + + def find_prefix(self, prefix): + elem = self.cur_elem + while elem: + if elem.ns_scope.has_key(prefix): + return elem.ns_scope[prefix] + elem = elem.parent + + if prefix == '': + return '' # empty URL for "no namespace" + + return None + + def process_prefix(self, name, use_default): + idx = string.find(name, ':') + if idx == -1: + if use_default: + return self.find_prefix(''), name + return '', name # no namespace + + if string.lower(name[:3]) == 'xml': + return '', name # name is reserved by XML. don't break out a NS. + + ns = self.find_prefix(name[:idx]) + if ns is None: + raise error, 'namespace prefix ("%s") not found' % name[:idx] + + return ns, name[idx+1:] + + def start(self, name, attrs): + elem = _element(name=name, lang=None, parent=None, + children=[], ns_scope={}, attrs={}, + first_cdata='', following_cdata='') + + if self.cur_elem: + elem.parent = self.cur_elem + elem.parent.children.append(elem) + self.cur_elem = elem + else: + self.cur_elem = self.root = elem + + work_attrs = [ ] + + # scan for namespace declarations (and xml:lang while we're at it) + for name, value in attrs.items(): + if name == 'xmlns': + elem.ns_scope[''] = value + elif name[:6] == 'xmlns:': + elem.ns_scope[name[6:]] = value + elif name == 'xml:lang': + elem.lang = value + else: + work_attrs.append((name, value)) + + # inherit xml:lang from parent + if elem.lang is None and elem.parent: + elem.lang = elem.parent.lang + + # process prefix of the element name + elem.ns, elem.name = self.process_prefix(elem.name, 1) + + # process attributes' namespace prefixes + for name, value in work_attrs: + elem.attrs[self.process_prefix(name, 0)] = value + + def end(self, name): + parent = self.cur_elem.parent + + del self.cur_elem.ns_scope + del self.cur_elem.parent + + self.cur_elem = parent + + def cdata(self, data): + elem = self.cur_elem + if elem.children: + last = elem.children[-1] + last.following_cdata = last.following_cdata + data + else: + elem.first_cdata = elem.first_cdata + data + + def parse(self, input): + self.reset() + + p = expat.ParserCreate() + p.StartElementHandler = self.start + p.EndElementHandler = self.end + p.CharacterDataHandler = self.cdata + + try: + if type(input) == type(''): + p.Parse(input, 1) + else: + while 1: + s = input.read(_BLOCKSIZE) + if not s: + p.Parse('', 1) + break + + p.Parse(s, 0) + + finally: + if self.root: + _clean_tree(self.root) + + return self.root + + +# +# handy function for dumping a tree that is returned by Parser +# +def dump(f, root): + f.write('<?xml version="1.0"?>\n') + namespaces = _collect_ns(root) + _dump_recurse(f, root, namespaces, dump_ns=1) + f.write('\n') + + +# +# This function returns the element's CDATA. Note: this is not recursive -- +# it only returns the CDATA immediately within the element, excluding the +# CDATA in child elements. +# +def textof(elem): + return elem.textof() + + +######################################################################### +# +# private stuff for qp_xml +# + +_BLOCKSIZE = 16384 # chunk size for parsing input + +class _element: + def __init__(self, **kw): + self.__dict__.update(kw) + + def textof(self): + '''Return the CDATA of this element. + + Note: this is not recursive -- it only returns the CDATA immediately + within the element, excluding the CDATA in child elements. + ''' + s = self.first_cdata + for child in self.children: + s = s + child.following_cdata + return s + + def find(self, name, ns=''): + for elem in self.children: + if elem.name == name and elem.ns == ns: + return elem + return None + + +def _clean_tree(elem): + elem.parent = None + del elem.parent + map(_clean_tree, elem.children) + + +def _collect_recurse(elem, dict): + dict[elem.ns] = None + for ns, name in elem.attrs.keys(): + dict[ns] = None + for child in elem.children: + _collect_recurse(child, dict) + +def _collect_ns(elem): + "Collect all namespaces into a NAMESPACE -> PREFIX mapping." + d = { '' : None } + _collect_recurse(elem, d) + del d[''] # make sure we don't pick up no-namespace entries + keys = d.keys() + for i in range(len(keys)): + d[keys[i]] = i + return d + +def _dump_recurse(f, elem, namespaces, lang=None, dump_ns=0): + if elem.ns: + f.write('<ns%d:%s' % (namespaces[elem.ns], elem.name)) + else: + f.write('<' + elem.name) + for (ns, name), value in elem.attrs.items(): + if ns: + f.write(' ns%d:%s="%s"' % (namespaces[ns], name, value)) + else: + f.write(' %s="%s"' % (name, value)) + if dump_ns: + for ns, id in namespaces.items(): + f.write(' xmlns:ns%d="%s"' % (id, ns)) + if elem.lang != lang: + f.write(' xml:lang="%s"' % elem.lang) + if elem.children or elem.first_cdata: + f.write('>' + elem.first_cdata) + for child in elem.children: + _dump_recurse(f, child, namespaces, elem.lang) + f.write(child.following_cdata) + if elem.ns: + f.write('</ns%d:%s>' % (namespaces[elem.ns], elem.name)) + else: + f.write('</%s>' % elem.name) + else: + f.write('/>') |