Web   ·   Wiki   ·   Activities   ·   Blog   ·   Lists   ·   Chat   ·   Meeting   ·   Bugs   ·   Git   ·   Translate   ·   Archive   ·   People   ·   Donate
summaryrefslogtreecommitdiffstats
path: root/creactistore/_templates/lib/rdflib/plugins/parsers/rdfxml.py
diff options
context:
space:
mode:
Diffstat (limited to 'creactistore/_templates/lib/rdflib/plugins/parsers/rdfxml.py')
-rw-r--r--creactistore/_templates/lib/rdflib/plugins/parsers/rdfxml.py579
1 files changed, 0 insertions, 579 deletions
diff --git a/creactistore/_templates/lib/rdflib/plugins/parsers/rdfxml.py b/creactistore/_templates/lib/rdflib/plugins/parsers/rdfxml.py
deleted file mode 100644
index 00e8d6a..0000000
--- a/creactistore/_templates/lib/rdflib/plugins/parsers/rdfxml.py
+++ /dev/null
@@ -1,579 +0,0 @@
-# Copyright (c) 2002, Daniel Krech, http://eikeon.com/
-# All rights reserved.
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions are
-# met:
-#
-# * Redistributions of source code must retain the above copyright
-# notice, this list of conditions and the following disclaimer.
-#
-# * Redistributions in binary form must reproduce the above
-# copyright notice, this list of conditions and the following
-# disclaimer in the documentation and/or other materials provided
-# with the distribution.
-#
-# * Neither the name of Daniel Krech nor the names of its
-# contributors may be used to endorse or promote products derived
-# from this software without specific prior written permission.
-#
-# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-"""
-"""
-from xml.sax import make_parser
-from xml.sax.handler import ErrorHandler
-from xml.sax.saxutils import handler, quoteattr, escape
-from urlparse import urljoin, urldefrag
-
-from rdflib.namespace import RDF, is_ncname
-from rdflib.term import URIRef
-from rdflib.term import BNode
-from rdflib.term import Literal
-from rdflib.exceptions import ParserError, Error
-from rdflib.parser import Parser
-
-__all__ = ['create_parser', 'BagID', 'ElementHandler', 'RDFXMLHandler', 'RDFXMLParser']
-
-RDFNS = RDF
-
-# http://www.w3.org/TR/rdf-syntax-grammar/#eventterm-attribute-URI
-# A mapping from unqualified terms to there qualified version.
-UNQUALIFIED = {"about" : RDF.about,
- "ID" : RDF.ID,
- "type" : RDF.type,
- "resource": RDF.resource,
- "parseType": RDF.parseType}
-
-# http://www.w3.org/TR/rdf-syntax-grammar/#coreSyntaxTerms
-CORE_SYNTAX_TERMS = [RDF.RDF, RDF.ID, RDF.about, RDF.parseType, RDF.resource, RDF.nodeID, RDF.datatype]
-
-# http://www.w3.org/TR/rdf-syntax-grammar/#syntaxTerms
-SYNTAX_TERMS = CORE_SYNTAX_TERMS + [RDF.Description, RDF.li]
-
-# http://www.w3.org/TR/rdf-syntax-grammar/#oldTerms
-OLD_TERMS = [
- URIRef("http://www.w3.org/1999/02/22-rdf-syntax-ns#aboutEach"),
- URIRef("http://www.w3.org/1999/02/22-rdf-syntax-ns#aboutEachPrefix"),
- URIRef("http://www.w3.org/1999/02/22-rdf-syntax-ns#bagID")]
-
-NODE_ELEMENT_EXCEPTIONS = CORE_SYNTAX_TERMS + [RDF.li,] + OLD_TERMS
-NODE_ELEMENT_ATTRIBUTES = [RDF.ID, RDF.nodeID, RDF.about]
-
-PROPERTY_ELEMENT_EXCEPTIONS = CORE_SYNTAX_TERMS + [RDF.Description,] + OLD_TERMS
-PROPERTY_ATTRIBUTE_EXCEPTIONS = CORE_SYNTAX_TERMS + [RDF.Description, RDF.li] + OLD_TERMS
-PROPERTY_ELEMENT_ATTRIBUTES = [RDF.ID, RDF.resource, RDF.nodeID]
-
-XMLNS = "http://www.w3.org/XML/1998/namespace"
-BASE = (XMLNS, "base")
-LANG = (XMLNS, "lang")
-
-
-class BagID(URIRef):
- __slots__ = ['li']
- def __init__(self, val):
- super(URIRef, self).__init__(val)
- self.li = 0
-
- def next_li(self):
- self.li += 1
- return RDFNS[self.li]
-
-
-class ElementHandler(object):
- __slots__ = ['start', 'char', 'end', 'li', 'id',
- 'base', 'subject', 'predicate', 'object',
- 'list', 'language', 'datatype', 'declared', 'data']
- def __init__(self):
- self.start = None
- self.char = None
- self.end = None
- self.li = 0
- self.id = None
- self.base = None
- self.subject = None
- self.object = None
- self.list = None
- self.language = None
- self.datatype = None
- self.declared = None
- self.data = None
-
- def next_li(self):
- self.li += 1
- return RDFNS[self.li]
-
-
-class RDFXMLHandler(handler.ContentHandler):
-
- def __init__(self, store):
- self.store = store
- self.preserve_bnode_ids = False
- self.reset()
-
- def reset(self):
- document_element = ElementHandler()
- document_element.start = self.document_element_start
- document_element.end = lambda name, qname: None
- self.stack = [None, document_element,]
- self.ids = {} # remember IDs we have already seen
- self.bnode = {}
- self._ns_contexts = [{}] # contains uri -> prefix dicts
- self._current_context = self._ns_contexts[-1]
-
- # ContentHandler methods
-
- def setDocumentLocator(self, locator):
- self.locator = locator
-
- def startDocument(self):
- pass
-
- def startPrefixMapping(self, prefix, namespace):
- self._ns_contexts.append(self._current_context.copy())
- self._current_context[namespace] = prefix
- self.store.bind(prefix, URIRef(namespace), override=False)
-
- def endPrefixMapping(self, prefix):
- self._current_context = self._ns_contexts[-1]
- del self._ns_contexts[-1]
-
- def startElementNS(self, name, qname, attrs):
- stack = self.stack
- stack.append(ElementHandler())
- current = self.current
- parent = self.parent
- base = attrs.get(BASE, None)
- if base is not None:
- base, frag = urldefrag(base)
- if parent and parent.base:
- base = urljoin(parent.base, base)
- else:
- systemId = self.locator.getPublicId() or self.locator.getSystemId()
- if systemId:
- base = urljoin(systemId, base)
- else:
- if parent:
- base = parent.base
- if base is None:
- systemId = self.locator.getPublicId() or self.locator.getSystemId()
- if systemId:
- base, frag = urldefrag(systemId)
- current.base = base
- language = attrs.get(LANG, None)
- if language is None:
- if parent:
- language = parent.language
- current.language = language
- current.start(name, qname, attrs)
-
- def endElementNS(self, name, qname):
- self.current.end(name, qname)
- self.stack.pop()
-
- def characters(self, content):
- char = self.current.char
- if char:
- char(content)
-
- def ignorableWhitespace(self, content):
- pass
-
- def processingInstruction(self, target, data):
- pass
-
- def add_reified(self, sid, (s, p, o)):
- self.store.add((sid, RDF.type, RDF.Statement))
- self.store.add((sid, RDF.subject, s))
- self.store.add((sid, RDF.predicate, p))
- self.store.add((sid, RDF.object, o))
-
- def error(self, message):
- locator = self.locator
- info = "%s:%s:%s: " % (locator.getSystemId(),
- locator.getLineNumber(), locator.getColumnNumber())
- raise ParserError(info + message)
-
- def get_current(self):
- return self.stack[-2]
- # Create a read only property called current so that self.current
- # give the current element handler.
- current = property(get_current)
-
- def get_next(self):
- return self.stack[-1]
- # Create a read only property that gives the element handler to be
- # used for the next element.
- next = property(get_next)
-
- def get_parent(self):
- return self.stack[-3]
- # Create a read only property that gives the current parent
- # element handler
- parent = property(get_parent)
-
- def absolutize(self, uri):
- result = urljoin(self.current.base, uri, allow_fragments=1)
- if uri and uri[-1]=="#" and result[-1]!="#":
- result = "%s#" % result
- return URIRef(result)
-
- def convert(self, name, qname, attrs):
- if name[0] is None:
- name = URIRef(name[1])
- else:
- name = URIRef("".join(name))
- atts = {}
- for (n, v) in attrs.items(): #attrs._attrs.iteritems(): #
- if n[0] is None:
- att = URIRef(n[1])
- else:
- att = URIRef("".join(n))
- if att.startswith(XMLNS) or att[0:3].lower()=="xml":
- pass
- elif att in UNQUALIFIED:
- #if not RDFNS[att] in atts:
- atts[RDFNS[att]] = v
- else:
- atts[URIRef(att)] = v
- return name, atts
-
- def document_element_start(self, name, qname, attrs):
- if name[0] and URIRef("".join(name)) == RDF.RDF:
- # Cheap hack so 2to3 doesn't turn it into __next__
- next = getattr(self, 'next')
- next.start = self.node_element_start
- next.end = self.node_element_end
- else:
- self.node_element_start(name, qname, attrs)
- #self.current.end = self.node_element_end
- # TODO... set end to something that sets start such that
- # another element will cause error
-
-
- def node_element_start(self, name, qname, attrs):
- name, atts = self.convert(name, qname, attrs)
- current = self.current
- absolutize = self.absolutize
-
- # Cheap hack so 2to3 doesn't turn it into __next__
- next = getattr(self, 'next')
- next.start = self.property_element_start
- next.end = self.property_element_end
-
- if name in NODE_ELEMENT_EXCEPTIONS:
- self.error("Invalid node element URI: %s" % name)
-
- if RDF.ID in atts:
- if RDF.about in atts or RDF.nodeID in atts:
- self.error("Can have at most one of rdf:ID, rdf:about, and rdf:nodeID")
-
- id = atts[RDF.ID]
- if not is_ncname(id):
- self.error("rdf:ID value is not a valid NCName: %s" % id)
- subject = absolutize("#%s" % id)
- if subject in self.ids:
- self.error("two elements cannot use the same ID: '%s'" % subject)
- self.ids[subject] = 1 # IDs can only appear once within a document
- elif RDF.nodeID in atts:
- if RDF.ID in atts or RDF.about in atts:
- self.error("Can have at most one of rdf:ID, rdf:about, and rdf:nodeID")
- nodeID = atts[RDF.nodeID]
- if not is_ncname(nodeID):
- self.error("rdf:nodeID value is not a valid NCName: %s" % nodeID)
- if self.preserve_bnode_ids is False:
- if nodeID in self.bnode:
- subject = self.bnode[nodeID]
- else:
- subject = BNode()
- self.bnode[nodeID] = subject
- else:
- subject = BNode(nodeID)
- elif RDF.about in atts:
- if RDF.ID in atts or RDF.nodeID in atts:
- self.error("Can have at most one of rdf:ID, rdf:about, and rdf:nodeID")
- subject = absolutize(atts[RDF.about])
- else:
- subject = BNode()
-
- if name!=RDF.Description: # S1
- self.store.add((subject, RDF.type, absolutize(name)))
-
- language = current.language
- for att in atts:
- if not att.startswith(str(RDFNS)):
- predicate = absolutize(att)
- try:
- object = Literal(atts[att], language)
- except Error, e:
- self.error(e.msg)
- elif att==RDF.type: #S2
- predicate = RDF.type
- object = absolutize(atts[RDF.type])
- elif att in NODE_ELEMENT_ATTRIBUTES:
- continue
- elif att in PROPERTY_ATTRIBUTE_EXCEPTIONS: #S3
- self.error("Invalid property attribute URI: %s" % att)
- continue # for when error does not throw an exception
- else:
- predicate = absolutize(att)
- try:
- object = Literal(atts[att], language)
- except Error, e:
- self.error(e.msg)
- self.store.add((subject, predicate, object))
-
- current.subject = subject
-
-
- def node_element_end(self, name, qname):
- self.parent.object = self.current.subject
-
- def property_element_start(self, name, qname, attrs):
- name, atts = self.convert(name, qname, attrs)
- current = self.current
- absolutize = self.absolutize
-
- # Cheap hack so 2to3 doesn't turn it into __next__
- next = getattr(self, 'next')
- object = None
- current.data = None
- current.list = None
-
- if not name.startswith(str(RDFNS)):
- current.predicate = absolutize(name)
- elif name==RDF.li:
- current.predicate = current.next_li()
- elif name in PROPERTY_ELEMENT_EXCEPTIONS:
- self.error("Invalid property element URI: %s" % name)
- else:
- current.predicate = absolutize(name)
-
- id = atts.get(RDF.ID, None)
- if id is not None:
- if not is_ncname(id):
- self.error("rdf:ID value is not a value NCName: %s" % id)
- current.id = absolutize("#%s" % id)
- else:
- current.id = None
-
- resource = atts.get(RDF.resource, None)
- nodeID = atts.get(RDF.nodeID, None)
- parse_type = atts.get(RDF.parseType, None)
- if resource is not None and nodeID is not None:
- self.error("Property element cannot have both rdf:nodeID and rdf:resource")
- if resource is not None:
- object = absolutize(resource)
- next.start = self.node_element_start
- next.end = self.node_element_end
- elif nodeID is not None:
- if not is_ncname(nodeID):
- self.error("rdf:nodeID value is not a valid NCName: %s" % nodeID)
- if self.preserve_bnode_ids is False:
- if nodeID in self.bnode:
- object = self.bnode[nodeID]
- else:
- subject = BNode()
- self.bnode[nodeID] = subject
- object = subject
- else:
- object = subject = BNode(nodeID)
- next.start = self.node_element_start
- next.end = self.node_element_end
- else:
- if parse_type is not None:
- for att in atts:
- if att!=RDF.parseType and att!=RDF.ID:
- self.error("Property attr '%s' now allowed here" % att)
- if parse_type=="Resource":
- current.subject = object = BNode()
- current.char = self.property_element_char
- next.start = self.property_element_start
- next.end = self.property_element_end
- elif parse_type=="Collection":
- current.char = None
- object = current.list = RDF.nil #BNode()#self.parent.subject
- next.start = self.node_element_start
- next.end = self.list_node_element_end
- else: #if parse_type=="Literal":
- # All other values are treated as Literal
- # See: http://www.w3.org/TR/rdf-syntax-grammar/#parseTypeOtherPropertyElt
- object = Literal("", datatype=RDF.XMLLiteral)
- current.char = self.literal_element_char
- current.declared = {}
- next.start = self.literal_element_start
- next.char = self.literal_element_char
- next.end = self.literal_element_end
- current.object = object
- return
- else:
- object = None
- current.char = self.property_element_char
- next.start = self.node_element_start
- next.end = self.node_element_end
-
- datatype = current.datatype = atts.get(RDF.datatype, None)
- language = current.language
- if datatype is not None:
- # TODO: check that there are no atts other than datatype and id
- datatype = absolutize(datatype)
- else:
- for att in atts:
- if not att.startswith(str(RDFNS)):
- predicate = absolutize(att)
- elif att in PROPERTY_ELEMENT_ATTRIBUTES:
- continue
- elif att in PROPERTY_ATTRIBUTE_EXCEPTIONS:
- self.error("""Invalid property attribute URI: %s""" % att)
- else:
- predicate = absolutize(att)
-
- if att==RDF.type:
- o = URIRef(atts[att])
- else:
- if datatype is not None:
- language = None
- o = Literal(atts[att], language, datatype)
-
- if object is None:
- object = BNode()
- self.store.add((object, predicate, o))
- if object is None:
- current.data = ""
- current.object = None
- else:
- current.data = None
- current.object = object
-
- def property_element_char(self, data):
- current = self.current
- if current.data is not None:
- current.data += data
-
- def property_element_end(self, name, qname):
- current = self.current
- if current.data is not None and current.object is None:
- literalLang = current.language
- if current.datatype is not None:
- literalLang = None
- current.object = Literal(current.data, literalLang, current.datatype)
- current.data = None
- if self.next.end==self.list_node_element_end:
- if current.object!=RDF.nil:
- self.store.add((current.list, RDF.rest, RDF.nil))
- if current.object is not None:
- self.store.add((self.parent.subject, current.predicate, current.object))
- if current.id is not None:
- self.add_reified(current.id, (self.parent.subject,
- current.predicate, current.object))
- current.subject = None
-
- def list_node_element_end(self, name, qname):
- current = self.current
- if self.parent.list==RDF.nil:
- list = BNode()
- # Removed between 20030123 and 20030905
- #self.store.add((list, RDF.type, LIST))
- self.parent.list = list
- self.store.add((self.parent.list, RDF.first, current.subject))
- self.parent.object = list
- self.parent.char = None
- else:
- list = BNode()
- # Removed between 20030123 and 20030905
- #self.store.add((list, RDF.type, LIST))
- self.store.add((self.parent.list, RDF.rest, list))
- self.store.add((list, RDF.first, current.subject))
- self.parent.list = list
-
- def literal_element_start(self, name, qname, attrs):
- current = self.current
- self.next.start = self.literal_element_start
- self.next.char = self.literal_element_char
- self.next.end = self.literal_element_end
- current.declared = self.parent.declared.copy()
- if name[0]:
- prefix = self._current_context[name[0]]
- if prefix:
- current.object = "<%s:%s" % (prefix, name[1])
- else:
- current.object = "<%s" % name[1]
- if not name[0] in current.declared:
- current.declared[name[0]] = prefix
- if prefix:
- current.object += (' xmlns:%s="%s"' % (prefix, name[0]))
- else:
- current.object += (' xmlns="%s"' % name[0])
- else:
- current.object = "<%s" % name[1]
-
- for (name, value) in attrs.items():
- if name[0]:
- if not name[0] in current.declared:
- current.declared[name[0]] = self._current_context[name[0]]
- name = current.declared[name[0]] + ":" + name[1]
- else:
- name = name[1]
- current.object += (' %s=%s' % (name, quoteattr(value)))
- current.object += ">"
-
- def literal_element_char(self, data):
- self.current.object += escape(data)
-
- def literal_element_end(self, name, qname):
- if name[0]:
- prefix = self._current_context[name[0]]
- if prefix:
- end = u"</%s:%s>" % (prefix, name[1])
- else:
- end = u"</%s>" % name[1]
- else:
- end = u"</%s>" % name[1]
- self.parent.object += self.current.object + end
-
-
-def create_parser(target, store):
- parser = make_parser()
- try:
- # Workaround for bug in expatreader.py. Needed when
- # expatreader is trying to guess a prefix.
- parser.start_namespace_decl("xml", "http://www.w3.org/XML/1998/namespace")
- except AttributeError:
- pass # Not present in Jython (at least)
- parser.setFeature(handler.feature_namespaces, 1)
- rdfxml = RDFXMLHandler(store)
- rdfxml.setDocumentLocator(target)
- #rdfxml.setDocumentLocator(_Locator(self.url, self.parser))
- parser.setContentHandler(rdfxml)
- parser.setErrorHandler(ErrorHandler())
- return parser
-
-
-class RDFXMLParser(Parser):
-
- def __init__(self):
- pass
-
- def parse(self, source, sink, **args):
- self._parser = create_parser(source, sink)
- content_handler = self._parser.getContentHandler()
- preserve_bnode_ids = args.get("preserve_bnode_ids", None)
- if preserve_bnode_ids is not None:
- content_handler.preserve_bnode_ids = preserve_bnode_ids
- # We're only using it once now
- #content_handler.reset()
- #self._parser.reset()
- self._parser.parse(source)
-
-
-