Web   ·   Wiki   ·   Activities   ·   Blog   ·   Lists   ·   Chat   ·   Meeting   ·   Bugs   ·   Git   ·   Translate   ·   Archive   ·   People   ·   Donate
summaryrefslogtreecommitdiffstats
path: root/creactistore/_templates/lib/rdflib/plugins/parsers/rdfxml.py
diff options
context:
space:
mode:
Diffstat (limited to 'creactistore/_templates/lib/rdflib/plugins/parsers/rdfxml.py')
-rw-r--r--creactistore/_templates/lib/rdflib/plugins/parsers/rdfxml.py579
1 files changed, 579 insertions, 0 deletions
diff --git a/creactistore/_templates/lib/rdflib/plugins/parsers/rdfxml.py b/creactistore/_templates/lib/rdflib/plugins/parsers/rdfxml.py
new file mode 100644
index 0000000..00e8d6a
--- /dev/null
+++ b/creactistore/_templates/lib/rdflib/plugins/parsers/rdfxml.py
@@ -0,0 +1,579 @@
+# Copyright (c) 2002, Daniel Krech, http://eikeon.com/
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met:
+#
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+#
+# * Redistributions in binary form must reproduce the above
+# copyright notice, this list of conditions and the following
+# disclaimer in the documentation and/or other materials provided
+# with the distribution.
+#
+# * Neither the name of Daniel Krech nor the names of its
+# contributors may be used to endorse or promote products derived
+# from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+"""
+"""
+from xml.sax import make_parser
+from xml.sax.handler import ErrorHandler
+from xml.sax.saxutils import handler, quoteattr, escape
+from urlparse import urljoin, urldefrag
+
+from rdflib.namespace import RDF, is_ncname
+from rdflib.term import URIRef
+from rdflib.term import BNode
+from rdflib.term import Literal
+from rdflib.exceptions import ParserError, Error
+from rdflib.parser import Parser
+
+__all__ = ['create_parser', 'BagID', 'ElementHandler', 'RDFXMLHandler', 'RDFXMLParser']
+
+RDFNS = RDF
+
+# http://www.w3.org/TR/rdf-syntax-grammar/#eventterm-attribute-URI
+# A mapping from unqualified terms to there qualified version.
+UNQUALIFIED = {"about" : RDF.about,
+ "ID" : RDF.ID,
+ "type" : RDF.type,
+ "resource": RDF.resource,
+ "parseType": RDF.parseType}
+
+# http://www.w3.org/TR/rdf-syntax-grammar/#coreSyntaxTerms
+CORE_SYNTAX_TERMS = [RDF.RDF, RDF.ID, RDF.about, RDF.parseType, RDF.resource, RDF.nodeID, RDF.datatype]
+
+# http://www.w3.org/TR/rdf-syntax-grammar/#syntaxTerms
+SYNTAX_TERMS = CORE_SYNTAX_TERMS + [RDF.Description, RDF.li]
+
+# http://www.w3.org/TR/rdf-syntax-grammar/#oldTerms
+OLD_TERMS = [
+ URIRef("http://www.w3.org/1999/02/22-rdf-syntax-ns#aboutEach"),
+ URIRef("http://www.w3.org/1999/02/22-rdf-syntax-ns#aboutEachPrefix"),
+ URIRef("http://www.w3.org/1999/02/22-rdf-syntax-ns#bagID")]
+
+NODE_ELEMENT_EXCEPTIONS = CORE_SYNTAX_TERMS + [RDF.li,] + OLD_TERMS
+NODE_ELEMENT_ATTRIBUTES = [RDF.ID, RDF.nodeID, RDF.about]
+
+PROPERTY_ELEMENT_EXCEPTIONS = CORE_SYNTAX_TERMS + [RDF.Description,] + OLD_TERMS
+PROPERTY_ATTRIBUTE_EXCEPTIONS = CORE_SYNTAX_TERMS + [RDF.Description, RDF.li] + OLD_TERMS
+PROPERTY_ELEMENT_ATTRIBUTES = [RDF.ID, RDF.resource, RDF.nodeID]
+
+XMLNS = "http://www.w3.org/XML/1998/namespace"
+BASE = (XMLNS, "base")
+LANG = (XMLNS, "lang")
+
+
+class BagID(URIRef):
+ __slots__ = ['li']
+ def __init__(self, val):
+ super(URIRef, self).__init__(val)
+ self.li = 0
+
+ def next_li(self):
+ self.li += 1
+ return RDFNS[self.li]
+
+
+class ElementHandler(object):
+ __slots__ = ['start', 'char', 'end', 'li', 'id',
+ 'base', 'subject', 'predicate', 'object',
+ 'list', 'language', 'datatype', 'declared', 'data']
+ def __init__(self):
+ self.start = None
+ self.char = None
+ self.end = None
+ self.li = 0
+ self.id = None
+ self.base = None
+ self.subject = None
+ self.object = None
+ self.list = None
+ self.language = None
+ self.datatype = None
+ self.declared = None
+ self.data = None
+
+ def next_li(self):
+ self.li += 1
+ return RDFNS[self.li]
+
+
+class RDFXMLHandler(handler.ContentHandler):
+
+ def __init__(self, store):
+ self.store = store
+ self.preserve_bnode_ids = False
+ self.reset()
+
+ def reset(self):
+ document_element = ElementHandler()
+ document_element.start = self.document_element_start
+ document_element.end = lambda name, qname: None
+ self.stack = [None, document_element,]
+ self.ids = {} # remember IDs we have already seen
+ self.bnode = {}
+ self._ns_contexts = [{}] # contains uri -> prefix dicts
+ self._current_context = self._ns_contexts[-1]
+
+ # ContentHandler methods
+
+ def setDocumentLocator(self, locator):
+ self.locator = locator
+
+ def startDocument(self):
+ pass
+
+ def startPrefixMapping(self, prefix, namespace):
+ self._ns_contexts.append(self._current_context.copy())
+ self._current_context[namespace] = prefix
+ self.store.bind(prefix, URIRef(namespace), override=False)
+
+ def endPrefixMapping(self, prefix):
+ self._current_context = self._ns_contexts[-1]
+ del self._ns_contexts[-1]
+
+ def startElementNS(self, name, qname, attrs):
+ stack = self.stack
+ stack.append(ElementHandler())
+ current = self.current
+ parent = self.parent
+ base = attrs.get(BASE, None)
+ if base is not None:
+ base, frag = urldefrag(base)
+ if parent and parent.base:
+ base = urljoin(parent.base, base)
+ else:
+ systemId = self.locator.getPublicId() or self.locator.getSystemId()
+ if systemId:
+ base = urljoin(systemId, base)
+ else:
+ if parent:
+ base = parent.base
+ if base is None:
+ systemId = self.locator.getPublicId() or self.locator.getSystemId()
+ if systemId:
+ base, frag = urldefrag(systemId)
+ current.base = base
+ language = attrs.get(LANG, None)
+ if language is None:
+ if parent:
+ language = parent.language
+ current.language = language
+ current.start(name, qname, attrs)
+
+ def endElementNS(self, name, qname):
+ self.current.end(name, qname)
+ self.stack.pop()
+
+ def characters(self, content):
+ char = self.current.char
+ if char:
+ char(content)
+
+ def ignorableWhitespace(self, content):
+ pass
+
+ def processingInstruction(self, target, data):
+ pass
+
+ def add_reified(self, sid, (s, p, o)):
+ self.store.add((sid, RDF.type, RDF.Statement))
+ self.store.add((sid, RDF.subject, s))
+ self.store.add((sid, RDF.predicate, p))
+ self.store.add((sid, RDF.object, o))
+
+ def error(self, message):
+ locator = self.locator
+ info = "%s:%s:%s: " % (locator.getSystemId(),
+ locator.getLineNumber(), locator.getColumnNumber())
+ raise ParserError(info + message)
+
+ def get_current(self):
+ return self.stack[-2]
+ # Create a read only property called current so that self.current
+ # give the current element handler.
+ current = property(get_current)
+
+ def get_next(self):
+ return self.stack[-1]
+ # Create a read only property that gives the element handler to be
+ # used for the next element.
+ next = property(get_next)
+
+ def get_parent(self):
+ return self.stack[-3]
+ # Create a read only property that gives the current parent
+ # element handler
+ parent = property(get_parent)
+
+ def absolutize(self, uri):
+ result = urljoin(self.current.base, uri, allow_fragments=1)
+ if uri and uri[-1]=="#" and result[-1]!="#":
+ result = "%s#" % result
+ return URIRef(result)
+
+ def convert(self, name, qname, attrs):
+ if name[0] is None:
+ name = URIRef(name[1])
+ else:
+ name = URIRef("".join(name))
+ atts = {}
+ for (n, v) in attrs.items(): #attrs._attrs.iteritems(): #
+ if n[0] is None:
+ att = URIRef(n[1])
+ else:
+ att = URIRef("".join(n))
+ if att.startswith(XMLNS) or att[0:3].lower()=="xml":
+ pass
+ elif att in UNQUALIFIED:
+ #if not RDFNS[att] in atts:
+ atts[RDFNS[att]] = v
+ else:
+ atts[URIRef(att)] = v
+ return name, atts
+
+ def document_element_start(self, name, qname, attrs):
+ if name[0] and URIRef("".join(name)) == RDF.RDF:
+ # Cheap hack so 2to3 doesn't turn it into __next__
+ next = getattr(self, 'next')
+ next.start = self.node_element_start
+ next.end = self.node_element_end
+ else:
+ self.node_element_start(name, qname, attrs)
+ #self.current.end = self.node_element_end
+ # TODO... set end to something that sets start such that
+ # another element will cause error
+
+
+ def node_element_start(self, name, qname, attrs):
+ name, atts = self.convert(name, qname, attrs)
+ current = self.current
+ absolutize = self.absolutize
+
+ # Cheap hack so 2to3 doesn't turn it into __next__
+ next = getattr(self, 'next')
+ next.start = self.property_element_start
+ next.end = self.property_element_end
+
+ if name in NODE_ELEMENT_EXCEPTIONS:
+ self.error("Invalid node element URI: %s" % name)
+
+ if RDF.ID in atts:
+ if RDF.about in atts or RDF.nodeID in atts:
+ self.error("Can have at most one of rdf:ID, rdf:about, and rdf:nodeID")
+
+ id = atts[RDF.ID]
+ if not is_ncname(id):
+ self.error("rdf:ID value is not a valid NCName: %s" % id)
+ subject = absolutize("#%s" % id)
+ if subject in self.ids:
+ self.error("two elements cannot use the same ID: '%s'" % subject)
+ self.ids[subject] = 1 # IDs can only appear once within a document
+ elif RDF.nodeID in atts:
+ if RDF.ID in atts or RDF.about in atts:
+ self.error("Can have at most one of rdf:ID, rdf:about, and rdf:nodeID")
+ nodeID = atts[RDF.nodeID]
+ if not is_ncname(nodeID):
+ self.error("rdf:nodeID value is not a valid NCName: %s" % nodeID)
+ if self.preserve_bnode_ids is False:
+ if nodeID in self.bnode:
+ subject = self.bnode[nodeID]
+ else:
+ subject = BNode()
+ self.bnode[nodeID] = subject
+ else:
+ subject = BNode(nodeID)
+ elif RDF.about in atts:
+ if RDF.ID in atts or RDF.nodeID in atts:
+ self.error("Can have at most one of rdf:ID, rdf:about, and rdf:nodeID")
+ subject = absolutize(atts[RDF.about])
+ else:
+ subject = BNode()
+
+ if name!=RDF.Description: # S1
+ self.store.add((subject, RDF.type, absolutize(name)))
+
+ language = current.language
+ for att in atts:
+ if not att.startswith(str(RDFNS)):
+ predicate = absolutize(att)
+ try:
+ object = Literal(atts[att], language)
+ except Error, e:
+ self.error(e.msg)
+ elif att==RDF.type: #S2
+ predicate = RDF.type
+ object = absolutize(atts[RDF.type])
+ elif att in NODE_ELEMENT_ATTRIBUTES:
+ continue
+ elif att in PROPERTY_ATTRIBUTE_EXCEPTIONS: #S3
+ self.error("Invalid property attribute URI: %s" % att)
+ continue # for when error does not throw an exception
+ else:
+ predicate = absolutize(att)
+ try:
+ object = Literal(atts[att], language)
+ except Error, e:
+ self.error(e.msg)
+ self.store.add((subject, predicate, object))
+
+ current.subject = subject
+
+
+ def node_element_end(self, name, qname):
+ self.parent.object = self.current.subject
+
+ def property_element_start(self, name, qname, attrs):
+ name, atts = self.convert(name, qname, attrs)
+ current = self.current
+ absolutize = self.absolutize
+
+ # Cheap hack so 2to3 doesn't turn it into __next__
+ next = getattr(self, 'next')
+ object = None
+ current.data = None
+ current.list = None
+
+ if not name.startswith(str(RDFNS)):
+ current.predicate = absolutize(name)
+ elif name==RDF.li:
+ current.predicate = current.next_li()
+ elif name in PROPERTY_ELEMENT_EXCEPTIONS:
+ self.error("Invalid property element URI: %s" % name)
+ else:
+ current.predicate = absolutize(name)
+
+ id = atts.get(RDF.ID, None)
+ if id is not None:
+ if not is_ncname(id):
+ self.error("rdf:ID value is not a value NCName: %s" % id)
+ current.id = absolutize("#%s" % id)
+ else:
+ current.id = None
+
+ resource = atts.get(RDF.resource, None)
+ nodeID = atts.get(RDF.nodeID, None)
+ parse_type = atts.get(RDF.parseType, None)
+ if resource is not None and nodeID is not None:
+ self.error("Property element cannot have both rdf:nodeID and rdf:resource")
+ if resource is not None:
+ object = absolutize(resource)
+ next.start = self.node_element_start
+ next.end = self.node_element_end
+ elif nodeID is not None:
+ if not is_ncname(nodeID):
+ self.error("rdf:nodeID value is not a valid NCName: %s" % nodeID)
+ if self.preserve_bnode_ids is False:
+ if nodeID in self.bnode:
+ object = self.bnode[nodeID]
+ else:
+ subject = BNode()
+ self.bnode[nodeID] = subject
+ object = subject
+ else:
+ object = subject = BNode(nodeID)
+ next.start = self.node_element_start
+ next.end = self.node_element_end
+ else:
+ if parse_type is not None:
+ for att in atts:
+ if att!=RDF.parseType and att!=RDF.ID:
+ self.error("Property attr '%s' now allowed here" % att)
+ if parse_type=="Resource":
+ current.subject = object = BNode()
+ current.char = self.property_element_char
+ next.start = self.property_element_start
+ next.end = self.property_element_end
+ elif parse_type=="Collection":
+ current.char = None
+ object = current.list = RDF.nil #BNode()#self.parent.subject
+ next.start = self.node_element_start
+ next.end = self.list_node_element_end
+ else: #if parse_type=="Literal":
+ # All other values are treated as Literal
+ # See: http://www.w3.org/TR/rdf-syntax-grammar/#parseTypeOtherPropertyElt
+ object = Literal("", datatype=RDF.XMLLiteral)
+ current.char = self.literal_element_char
+ current.declared = {}
+ next.start = self.literal_element_start
+ next.char = self.literal_element_char
+ next.end = self.literal_element_end
+ current.object = object
+ return
+ else:
+ object = None
+ current.char = self.property_element_char
+ next.start = self.node_element_start
+ next.end = self.node_element_end
+
+ datatype = current.datatype = atts.get(RDF.datatype, None)
+ language = current.language
+ if datatype is not None:
+ # TODO: check that there are no atts other than datatype and id
+ datatype = absolutize(datatype)
+ else:
+ for att in atts:
+ if not att.startswith(str(RDFNS)):
+ predicate = absolutize(att)
+ elif att in PROPERTY_ELEMENT_ATTRIBUTES:
+ continue
+ elif att in PROPERTY_ATTRIBUTE_EXCEPTIONS:
+ self.error("""Invalid property attribute URI: %s""" % att)
+ else:
+ predicate = absolutize(att)
+
+ if att==RDF.type:
+ o = URIRef(atts[att])
+ else:
+ if datatype is not None:
+ language = None
+ o = Literal(atts[att], language, datatype)
+
+ if object is None:
+ object = BNode()
+ self.store.add((object, predicate, o))
+ if object is None:
+ current.data = ""
+ current.object = None
+ else:
+ current.data = None
+ current.object = object
+
+ def property_element_char(self, data):
+ current = self.current
+ if current.data is not None:
+ current.data += data
+
+ def property_element_end(self, name, qname):
+ current = self.current
+ if current.data is not None and current.object is None:
+ literalLang = current.language
+ if current.datatype is not None:
+ literalLang = None
+ current.object = Literal(current.data, literalLang, current.datatype)
+ current.data = None
+ if self.next.end==self.list_node_element_end:
+ if current.object!=RDF.nil:
+ self.store.add((current.list, RDF.rest, RDF.nil))
+ if current.object is not None:
+ self.store.add((self.parent.subject, current.predicate, current.object))
+ if current.id is not None:
+ self.add_reified(current.id, (self.parent.subject,
+ current.predicate, current.object))
+ current.subject = None
+
+ def list_node_element_end(self, name, qname):
+ current = self.current
+ if self.parent.list==RDF.nil:
+ list = BNode()
+ # Removed between 20030123 and 20030905
+ #self.store.add((list, RDF.type, LIST))
+ self.parent.list = list
+ self.store.add((self.parent.list, RDF.first, current.subject))
+ self.parent.object = list
+ self.parent.char = None
+ else:
+ list = BNode()
+ # Removed between 20030123 and 20030905
+ #self.store.add((list, RDF.type, LIST))
+ self.store.add((self.parent.list, RDF.rest, list))
+ self.store.add((list, RDF.first, current.subject))
+ self.parent.list = list
+
+ def literal_element_start(self, name, qname, attrs):
+ current = self.current
+ self.next.start = self.literal_element_start
+ self.next.char = self.literal_element_char
+ self.next.end = self.literal_element_end
+ current.declared = self.parent.declared.copy()
+ if name[0]:
+ prefix = self._current_context[name[0]]
+ if prefix:
+ current.object = "<%s:%s" % (prefix, name[1])
+ else:
+ current.object = "<%s" % name[1]
+ if not name[0] in current.declared:
+ current.declared[name[0]] = prefix
+ if prefix:
+ current.object += (' xmlns:%s="%s"' % (prefix, name[0]))
+ else:
+ current.object += (' xmlns="%s"' % name[0])
+ else:
+ current.object = "<%s" % name[1]
+
+ for (name, value) in attrs.items():
+ if name[0]:
+ if not name[0] in current.declared:
+ current.declared[name[0]] = self._current_context[name[0]]
+ name = current.declared[name[0]] + ":" + name[1]
+ else:
+ name = name[1]
+ current.object += (' %s=%s' % (name, quoteattr(value)))
+ current.object += ">"
+
+ def literal_element_char(self, data):
+ self.current.object += escape(data)
+
+ def literal_element_end(self, name, qname):
+ if name[0]:
+ prefix = self._current_context[name[0]]
+ if prefix:
+ end = u"</%s:%s>" % (prefix, name[1])
+ else:
+ end = u"</%s>" % name[1]
+ else:
+ end = u"</%s>" % name[1]
+ self.parent.object += self.current.object + end
+
+
+def create_parser(target, store):
+ parser = make_parser()
+ try:
+ # Workaround for bug in expatreader.py. Needed when
+ # expatreader is trying to guess a prefix.
+ parser.start_namespace_decl("xml", "http://www.w3.org/XML/1998/namespace")
+ except AttributeError:
+ pass # Not present in Jython (at least)
+ parser.setFeature(handler.feature_namespaces, 1)
+ rdfxml = RDFXMLHandler(store)
+ rdfxml.setDocumentLocator(target)
+ #rdfxml.setDocumentLocator(_Locator(self.url, self.parser))
+ parser.setContentHandler(rdfxml)
+ parser.setErrorHandler(ErrorHandler())
+ return parser
+
+
+class RDFXMLParser(Parser):
+
+ def __init__(self):
+ pass
+
+ def parse(self, source, sink, **args):
+ self._parser = create_parser(source, sink)
+ content_handler = self._parser.getContentHandler()
+ preserve_bnode_ids = args.get("preserve_bnode_ids", None)
+ if preserve_bnode_ids is not None:
+ content_handler.preserve_bnode_ids = preserve_bnode_ids
+ # We're only using it once now
+ #content_handler.reset()
+ #self._parser.reset()
+ self._parser.parse(source)
+
+
+