Web   ·   Wiki   ·   Activities   ·   Blog   ·   Lists   ·   Chat   ·   Meeting   ·   Bugs   ·   Git   ·   Translate   ·   Archive   ·   People   ·   Donate
summaryrefslogtreecommitdiffstats
path: root/creactistore/_templates/lib/rdflib_/plugins
diff options
context:
space:
mode:
Diffstat (limited to 'creactistore/_templates/lib/rdflib_/plugins')
-rw-r--r--creactistore/_templates/lib/rdflib_/plugins/__init__.py7
-rw-r--r--creactistore/_templates/lib/rdflib_/plugins/memory.py563
-rw-r--r--creactistore/_templates/lib/rdflib_/plugins/memory.py~563
-rw-r--r--creactistore/_templates/lib/rdflib_/plugins/parsers/__init__.py3
-rw-r--r--creactistore/_templates/lib/rdflib_/plugins/parsers/notation3.py2314
-rw-r--r--creactistore/_templates/lib/rdflib_/plugins/parsers/notation3.py~2314
-rw-r--r--creactistore/_templates/lib/rdflib_/plugins/parsers/nquads.py107
-rw-r--r--creactistore/_templates/lib/rdflib_/plugins/parsers/nquads.py~107
-rw-r--r--creactistore/_templates/lib/rdflib_/plugins/parsers/nt.py28
-rw-r--r--creactistore/_templates/lib/rdflib_/plugins/parsers/nt.py~28
-rw-r--r--creactistore/_templates/lib/rdflib_/plugins/parsers/ntriples.py243
-rw-r--r--creactistore/_templates/lib/rdflib_/plugins/parsers/ntriples.py~243
-rw-r--r--creactistore/_templates/lib/rdflib_/plugins/parsers/rdfa/__init__.py168
-rw-r--r--creactistore/_templates/lib/rdflib_/plugins/parsers/rdfa/__init__.py~168
-rw-r--r--creactistore/_templates/lib/rdflib_/plugins/parsers/rdfa/embeddedrdf.py36
-rw-r--r--creactistore/_templates/lib/rdflib_/plugins/parsers/rdfa/embeddedrdf.py~36
-rw-r--r--creactistore/_templates/lib/rdflib_/plugins/parsers/rdfa/literal.py180
-rw-r--r--creactistore/_templates/lib/rdflib_/plugins/parsers/rdfa/literal.py~180
-rw-r--r--creactistore/_templates/lib/rdflib_/plugins/parsers/rdfa/options.py173
-rw-r--r--creactistore/_templates/lib/rdflib_/plugins/parsers/rdfa/options.py~173
-rw-r--r--creactistore/_templates/lib/rdflib_/plugins/parsers/rdfa/parse.py200
-rw-r--r--creactistore/_templates/lib/rdflib_/plugins/parsers/rdfa/parse.py~200
-rw-r--r--creactistore/_templates/lib/rdflib_/plugins/parsers/rdfa/state.py434
-rw-r--r--creactistore/_templates/lib/rdflib_/plugins/parsers/rdfa/state.py~434
-rw-r--r--creactistore/_templates/lib/rdflib_/plugins/parsers/rdfa/transform/__init__.py0
-rw-r--r--creactistore/_templates/lib/rdflib_/plugins/parsers/rdfa/transform/headabout.py27
-rw-r--r--creactistore/_templates/lib/rdflib_/plugins/parsers/rdfa/transform/headabout.py~27
-rw-r--r--creactistore/_templates/lib/rdflib_/plugins/parsers/rdfxml.py579
-rw-r--r--creactistore/_templates/lib/rdflib_/plugins/parsers/rdfxml.py~579
-rw-r--r--creactistore/_templates/lib/rdflib_/plugins/parsers/trix.py286
-rw-r--r--creactistore/_templates/lib/rdflib_/plugins/parsers/trix.py~286
-rw-r--r--creactistore/_templates/lib/rdflib_/plugins/serializers/__init__.py0
-rw-r--r--creactistore/_templates/lib/rdflib_/plugins/serializers/n3.py123
-rw-r--r--creactistore/_templates/lib/rdflib_/plugins/serializers/n3.py~123
-rw-r--r--creactistore/_templates/lib/rdflib_/plugins/serializers/nquads.py34
-rw-r--r--creactistore/_templates/lib/rdflib_/plugins/serializers/nquads.py~34
-rw-r--r--creactistore/_templates/lib/rdflib_/plugins/serializers/nt.py76
-rw-r--r--creactistore/_templates/lib/rdflib_/plugins/serializers/nt.py~76
-rw-r--r--creactistore/_templates/lib/rdflib_/plugins/serializers/rdfxml.py282
-rw-r--r--creactistore/_templates/lib/rdflib_/plugins/serializers/rdfxml.py~282
-rw-r--r--creactistore/_templates/lib/rdflib_/plugins/serializers/trix.py72
-rw-r--r--creactistore/_templates/lib/rdflib_/plugins/serializers/trix.py~72
-rw-r--r--creactistore/_templates/lib/rdflib_/plugins/serializers/turtle.py364
-rw-r--r--creactistore/_templates/lib/rdflib_/plugins/serializers/turtle.py~364
-rw-r--r--creactistore/_templates/lib/rdflib_/plugins/serializers/xmlwriter.py103
-rw-r--r--creactistore/_templates/lib/rdflib_/plugins/serializers/xmlwriter.py~103
-rw-r--r--creactistore/_templates/lib/rdflib_/plugins/sleepycat.py531
-rw-r--r--creactistore/_templates/lib/rdflib_/plugins/sleepycat.py~531
48 files changed, 13856 insertions, 0 deletions
diff --git a/creactistore/_templates/lib/rdflib_/plugins/__init__.py b/creactistore/_templates/lib/rdflib_/plugins/__init__.py
new file mode 100644
index 0000000..4622bb0
--- /dev/null
+++ b/creactistore/_templates/lib/rdflib_/plugins/__init__.py
@@ -0,0 +1,7 @@
+"""
+Default plugins for rdflib.
+
+This is a namespace package and contains the default plugins for
+rdflib.
+
+"""
diff --git a/creactistore/_templates/lib/rdflib_/plugins/memory.py b/creactistore/_templates/lib/rdflib_/plugins/memory.py
new file mode 100644
index 0000000..a9d6fad
--- /dev/null
+++ b/creactistore/_templates/lib/rdflib_/plugins/memory.py
@@ -0,0 +1,563 @@
+from __future__ import generators
+from rdflib_.term import BNode
+from rdflib_.store import Store, NO_STORE, VALID_STORE
+
+__all__ = ['Memory', 'IOMemory']
+
+ANY = Any = None
+
+class Memory(Store):
+ """\
+ An in memory implementation of a triple store.
+
+ This triple store uses nested dictionaries to store triples. Each
+ triple is stored in two such indices as follows spo[s][p][o] = 1 and
+ pos[p][o][s] = 1.
+
+ Authors: Michel Pelletier, Daniel Krech, Stefan Niederhauser
+ """
+ def __init__(self, configuration=None, identifier=None):
+ super(Memory, self).__init__(configuration)
+ self.identifier = identifier
+
+ # indexed by [subject][predicate][object]
+ self.__spo = {}
+
+ # indexed by [predicate][object][subject]
+ self.__pos = {}
+
+ # indexed by [predicate][object][subject]
+ self.__osp = {}
+
+ self.__namespace = {}
+ self.__prefix = {}
+
+ def add(self, (subject, predicate, object), context, quoted=False):
+ """\
+ Add a triple to the store of triples.
+ """
+ # add dictionary entries for spo[s][p][p] = 1 and pos[p][o][s]
+ # = 1, creating the nested dictionaries where they do not yet
+ # exits.
+ spo = self.__spo
+ try:
+ po = spo[subject]
+ except:
+ po = spo[subject] = {}
+ try:
+ o = po[predicate]
+ except:
+ o = po[predicate] = {}
+ o[object] = 1
+
+ pos = self.__pos
+ try:
+ os = pos[predicate]
+ except:
+ os = pos[predicate] = {}
+ try:
+ s = os[object]
+ except:
+ s = os[object] = {}
+ s[subject] = 1
+
+ osp = self.__osp
+ try:
+ sp = osp[object]
+ except:
+ sp = osp[object] = {}
+ try:
+ p = sp[subject]
+ except:
+ p = sp[subject] = {}
+ p[predicate] = 1
+
+ def remove(self, (subject, predicate, object), context=None):
+ for (subject, predicate, object), c in self.triples(
+ (subject, predicate, object)):
+ del self.__spo[subject][predicate][object]
+ del self.__pos[predicate][object][subject]
+ del self.__osp[object][subject][predicate]
+
+ def triples(self, (subject, predicate, object), context=None):
+ """A generator over all the triples matching """
+ if subject!=ANY: # subject is given
+ spo = self.__spo
+ if subject in spo:
+ subjectDictionary = spo[subject]
+ if predicate!=ANY: # subject+predicate is given
+ if predicate in subjectDictionary:
+ if object!=ANY: # subject+predicate+object is given
+ if object in subjectDictionary[predicate]:
+ yield (subject, predicate, object), \
+ self.__contexts()
+ else: # given object not found
+ pass
+ else: # subject+predicate is given, object unbound
+ for o in subjectDictionary[predicate].keys():
+ yield (subject, predicate, o), \
+ self.__contexts()
+ else: # given predicate not found
+ pass
+ else: # subject given, predicate unbound
+ for p in subjectDictionary.keys():
+ if object!=ANY: # object is given
+ if object in subjectDictionary[p]:
+ yield (subject, p, object), self.__contexts()
+ else: # given object not found
+ pass
+ else: # object unbound
+ for o in subjectDictionary[p].keys():
+ yield (subject, p, o), self.__contexts()
+ else: # given subject not found
+ pass
+ elif predicate!=ANY: # predicate is given, subject unbound
+ pos = self.__pos
+ if predicate in pos:
+ predicateDictionary = pos[predicate]
+ if object!=ANY: # predicate+object is given, subject unbound
+ if object in predicateDictionary:
+ for s in predicateDictionary[object].keys():
+ yield (s, predicate, object), self.__contexts()
+ else: # given object not found
+ pass
+ else: # predicate is given, object+subject unbound
+ for o in predicateDictionary.keys():
+ for s in predicateDictionary[o].keys():
+ yield (s, predicate, o), self.__contexts()
+ elif object!=ANY: # object is given, subject+predicate unbound
+ osp = self.__osp
+ if object in osp:
+ objectDictionary = osp[object]
+ for s in objectDictionary.keys():
+ for p in objectDictionary[s].keys():
+ yield (s, p, object), self.__contexts()
+ else: # subject+predicate+object unbound
+ spo = self.__spo
+ for s in spo.keys():
+ subjectDictionary = spo[s]
+ for p in subjectDictionary.keys():
+ for o in subjectDictionary[p].keys():
+ yield (s, p, o), self.__contexts()
+
+ def __len__(self, context=None):
+ #@@ optimize
+ i = 0
+ for triple in self.triples((None, None, None)):
+ i += 1
+ return i
+
+ def bind(self, prefix, namespace):
+ self.__prefix[namespace] = prefix
+ self.__namespace[prefix] = namespace
+
+ def namespace(self, prefix):
+ return self.__namespace.get(prefix, None)
+
+ def prefix(self, namespace):
+ return self.__prefix.get(namespace, None)
+
+ def namespaces(self):
+ for prefix, namespace in self.__namespace.iteritems():
+ yield prefix, namespace
+
+ def __contexts(self):
+ return (c for c in []) # TODO: best way to return empty generator
+
+class IOMemory(Store):
+ """\
+ An integer-key-optimized-context-aware-in-memory store.
+
+ Uses nested dictionaries to store triples and context. Each triple
+ is stored in six such indices as follows cspo[c][s][p][o] = 1
+ and cpos[c][p][o][s] = 1 and cosp[c][o][s][p] = 1 as well as
+ spo[s][p][o] = [c] and pos[p][o][s] = [c] and pos[o][s][p] = [c]
+
+ Context information is used to track the 'source' of the triple
+ data for merging, unmerging, remerging purposes. context aware
+ store stores consume more memory size than non context stores.
+
+ """
+
+ context_aware = True
+ formula_aware = True
+
+ def __init__(self, configuration=None, identifier=None):
+ super(IOMemory, self).__init__()
+
+ # indexed by [context][subject][predicate][object] = 1
+ self.cspo = self.createIndex()
+
+ # indexed by [context][predicate][object][subject] = 1
+ self.cpos = self.createIndex()
+
+ # indexed by [context][object][subject][predicate] = 1
+ self.cosp = self.createIndex()
+
+ # indexed by [subject][predicate][object] = [context]
+ self.spo = self.createIndex()
+
+ # indexed by [predicate][object][subject] = [context]
+ self.pos = self.createIndex()
+
+ # indexed by [object][subject][predicate] = [context]
+ self.osp = self.createIndex()
+
+ # indexes integer keys to identifiers
+ self.forward = self.createForward()
+
+ # reverse index of forward
+ self.reverse = self.createReverse()
+
+ self.identifier = identifier or BNode()
+
+ self.__namespace = self.createPrefixMap()
+ self.__prefix = self.createPrefixMap()
+
+ def open(self, configuration, create=False):
+ if not create:
+ # An IOMemory Store never exists.
+ return NO_STORE
+ else:
+ return VALID_STORE
+
+ def bind(self, prefix, namespace):
+ self.__prefix[namespace] = prefix
+ self.__namespace[prefix] = namespace
+
+ def namespace(self, prefix):
+ return self.__namespace.get(prefix, None)
+
+ def prefix(self, namespace):
+ return self.__prefix.get(namespace, None)
+
+ def namespaces(self):
+ for prefix, namespace in self.__namespace.iteritems():
+ yield prefix, namespace
+
+ def defaultContext(self):
+ return self.default_context
+
+ def addContext(self, context):
+ """ Add context w/o adding statement. Dan you can remove this if you want """
+
+ if not self.reverse.has_key(context):
+ ci=randid()
+ while not self.forward.insert(ci, context):
+ ci=randid()
+ self.reverse[context] = ci
+
+ def intToIdentifier(self, (si, pi, oi)):
+ """ Resolve an integer triple into identifers. """
+ return (self.forward[si], self.forward[pi], self.forward[oi])
+
+ def identifierToInt(self, (s, p, o)):
+ """ Resolve an identifier triple into integers. """
+ return (self.reverse[s], self.reverse[p], self.reverse[o])
+
+ def uniqueSubjects(self, context=None):
+ if context is None:
+ index = self.spo
+ else:
+ index = self.cspo[context]
+ for si in index.keys():
+ yield self.forward[si]
+
+ def uniquePredicates(self, context=None):
+ if context is None:
+ index = self.pos
+ else:
+ index = self.cpos[context]
+ for pi in index.keys():
+ yield self.forward[pi]
+
+ def uniqueObjects(self, context=None):
+ if context is None:
+ index = self.osp
+ else:
+ index = self.cosp[context]
+ for oi in index.keys():
+ yield self.forward[oi]
+
+ def createForward(self):
+ return {}
+
+ def createReverse(self):
+ return {}
+
+ def createIndex(self):
+ return {}
+
+ def createPrefixMap(self):
+ return {}
+
+ def add(self, triple, context, quoted=False):
+ """\
+ Add a triple to the store.
+ """
+ Store.add(self, triple, context, quoted)
+ for triple, cg in self.triples(triple, context):
+ #triple is already in the store.
+ return
+
+ subject, predicate, object = triple
+
+ f = self.forward
+ r = self.reverse
+
+ # assign keys for new identifiers
+
+ if not r.has_key(subject):
+ si=randid()
+ while f.has_key(si):
+ si=randid()
+ f[si] = subject
+ r[subject] = si
+ else:
+ si = r[subject]
+
+ if not r.has_key(predicate):
+ pi=randid()
+ while f.has_key(pi):
+ pi=randid()
+ f[pi] = predicate
+ r[predicate] = pi
+ else:
+ pi = r[predicate]
+
+ if not r.has_key(object):
+ oi=randid()
+ while f.has_key(oi):
+ oi=randid()
+ f[oi] = object
+ r[object] = oi
+ else:
+ oi = r[object]
+
+ if not r.has_key(context):
+ ci=randid()
+ while f.has_key(ci):
+ ci=randid()
+ f[ci] = context
+ r[context] = ci
+ else:
+ ci = r[context]
+
+ # add dictionary entries for cspo[c][s][p][o] = 1,
+ # cpos[c][p][o][s] = 1, and cosp[c][o][s][p] = 1, creating the
+ # nested {} where they do not yet exits.
+ self._setNestedIndex(self.cspo, ci, si, pi, oi)
+ self._setNestedIndex(self.cpos, ci, pi, oi, si)
+ self._setNestedIndex(self.cosp, ci, oi, si, pi)
+
+ if not quoted:
+ self._setNestedIndex(self.spo, si, pi, oi, ci)
+ self._setNestedIndex(self.pos, pi, oi, si, ci)
+ self._setNestedIndex(self.osp, oi, si, pi, ci)
+
+ def _setNestedIndex(self, index, *keys):
+ for key in keys[:-1]:
+ if not index.has_key(key):
+ index[key] = self.createIndex()
+ index = index[key]
+ index[keys[-1]] = 1
+
+
+ def _removeNestedIndex(self, index, *keys):
+ """ Remove context from the list of contexts in a nested index.
+
+ Afterwards, recursively remove nested indexes when they became empty.
+ """
+ parents = []
+ for key in keys[:-1]:
+ parents.append(index)
+ index = index[key]
+ del index[keys[-1]]
+
+ n = len(parents)
+ for i in xrange(n):
+ index = parents[n-1-i]
+ key = keys[n-1-i]
+ if len(index[key]) == 0:
+ del index[key]
+
+ def remove(self, triple, context=None):
+ Store.remove(self, triple, context)
+ if context is not None:
+ if context == self:
+ context = None
+
+ f = self.forward
+ r = self.reverse
+ if context is None:
+ for triple, cg in self.triples(triple):
+ subject, predicate, object = triple
+ si, pi, oi = self.identifierToInt((subject, predicate, object))
+ contexts = list(self.contexts(triple))
+ for context in contexts:
+ ci = r[context]
+ del self.cspo[ci][si][pi][oi]
+ del self.cpos[ci][pi][oi][si]
+ del self.cosp[ci][oi][si][pi]
+
+ self._removeNestedIndex(self.spo, si, pi, oi, ci)
+ self._removeNestedIndex(self.pos, pi, oi, si, ci)
+ self._removeNestedIndex(self.osp, oi, si, pi, ci)
+ # grr!! hafta ref-count these before you can collect them dumbass!
+ #del f[si], f[pi], f[oi]
+ #del r[subject], r[predicate], r[object]
+ else:
+ subject, predicate, object = triple
+ ci = r.get(context, None)
+ if ci:
+ for triple, cg in self.triples(triple, context):
+ si, pi, oi = self.identifierToInt(triple)
+ del self.cspo[ci][si][pi][oi]
+ del self.cpos[ci][pi][oi][si]
+ del self.cosp[ci][oi][si][pi]
+
+ try:
+ self._removeNestedIndex(self.spo, si, pi, oi, ci)
+ self._removeNestedIndex(self.pos, pi, oi, si, ci)
+ self._removeNestedIndex(self.osp, oi, si, pi, ci)
+ except KeyError:
+ # the context may be a quoted one in which
+ # there will not be a triple in spo, pos or
+ # osp. So ignore any KeyErrors
+ pass
+ # TODO delete references to resources in self.forward/self.reverse
+ # that are not in use anymore...
+
+ if subject is None and predicate is None and object is None:
+ # remove context
+ try:
+ ci = self.reverse[context]
+ del self.cspo[ci], self.cpos[ci], self.cosp[ci]
+ except KeyError:
+ # TODO: no exception when removing non-existant context?
+ pass
+
+
+ def triples(self, triple, context=None):
+ """A generator over all the triples matching """
+
+ if context is not None:
+ if context == self:
+ context = None
+
+ subject, predicate, object = triple
+ ci = si = pi = oi = Any
+
+ if context is None:
+ spo = self.spo
+ pos = self.pos
+ osp = self.osp
+ else:
+ try:
+ ci = self.reverse[context] # TODO: Really ignore keyerror here
+ spo = self.cspo[ci]
+ pos = self.cpos[ci]
+ osp = self.cosp[ci]
+ except KeyError:
+ return
+ try:
+ if subject is not Any:
+ si = self.reverse[subject] # throws keyerror if subject doesn't exist ;(
+ if predicate is not Any:
+ pi = self.reverse[predicate]
+ if object is not Any:
+ oi = self.reverse[object]
+ except KeyError, e:
+ return #raise StopIteration
+
+ if si != Any: # subject is given
+ if spo.has_key(si):
+ subjectDictionary = spo[si]
+ if pi != Any: # subject+predicate is given
+ if subjectDictionary.has_key(pi):
+ if oi!= Any: # subject+predicate+object is given
+ if subjectDictionary[pi].has_key(oi):
+ ss, pp, oo = self.intToIdentifier((si, pi, oi))
+ yield (ss, pp, oo), (c for c in self.contexts((ss, pp, oo)))
+ else: # given object not found
+ pass
+ else: # subject+predicate is given, object unbound
+ for o in subjectDictionary[pi].keys():
+ ss, pp, oo = self.intToIdentifier((si, pi, o))
+ yield (ss, pp, oo), (c for c in self.contexts((ss, pp, oo)))
+ else: # given predicate not found
+ pass
+ else: # subject given, predicate unbound
+ for p in subjectDictionary.keys():
+ if oi != Any: # object is given
+ if subjectDictionary[p].has_key(oi):
+ ss, pp, oo = self.intToIdentifier((si, p, oi))
+ yield (ss, pp, oo), (c for c in self.contexts((ss, pp, oo)))
+ else: # given object not found
+ pass
+ else: # object unbound
+ for o in subjectDictionary[p].keys():
+ ss, pp, oo = self.intToIdentifier((si, p, o))
+ yield (ss, pp, oo), (c for c in self.contexts((ss, pp, oo)))
+ else: # given subject not found
+ pass
+ elif pi != Any: # predicate is given, subject unbound
+ if pos.has_key(pi):
+ predicateDictionary = pos[pi]
+ if oi != Any: # predicate+object is given, subject unbound
+ if predicateDictionary.has_key(oi):
+ for s in predicateDictionary[oi].keys():
+ ss, pp, oo = self.intToIdentifier((s, pi, oi))
+ yield (ss, pp, oo), (c for c in self.contexts((ss, pp, oo)))
+ else: # given object not found
+ pass
+ else: # predicate is given, object+subject unbound
+ for o in predicateDictionary.keys():
+ for s in predicateDictionary[o].keys():
+ ss, pp, oo = self.intToIdentifier((s, pi, o))
+ yield (ss, pp, oo), (c for c in self.contexts((ss, pp, oo)))
+ elif oi != Any: # object is given, subject+predicate unbound
+ if osp.has_key(oi):
+ objectDictionary = osp[oi]
+ for s in objectDictionary.keys():
+ for p in objectDictionary[s].keys():
+ ss, pp, oo = self.intToIdentifier((s, p, oi))
+ yield (ss, pp, oo), (c for c in self.contexts((ss, pp, oo)))
+ else: # subject+predicate+object unbound
+ for s in spo.keys():
+ subjectDictionary = spo[s]
+ for p in subjectDictionary.keys():
+ for o in subjectDictionary[p].keys():
+ ss, pp, oo = self.intToIdentifier((s, p, o))
+ yield (ss, pp, oo), (c for c in self.contexts((ss, pp, oo)))
+
+ def __len__(self, context=None):
+
+ if context is not None:
+ if context == self:
+ context = None
+
+ # TODO: for eff. implementation
+ count = 0
+ for triple, cg in self.triples((Any, Any, Any), context):
+ count += 1
+ return count
+
+ def contexts(self, triple=None):
+ if triple:
+ si, pi, oi = self.identifierToInt(triple)
+ for ci in self.spo[si][pi][oi]:
+ yield self.forward[ci]
+ else:
+ for ci in self.cspo.keys():
+ yield self.forward[ci]
+
+
+
+
+import random
+
+def randid(randint=random.randint, choice=random.choice, signs=(-1,1)):
+ return choice(signs)*randint(1,2000000000)
+
+del random
diff --git a/creactistore/_templates/lib/rdflib_/plugins/memory.py~ b/creactistore/_templates/lib/rdflib_/plugins/memory.py~
new file mode 100644
index 0000000..3a9d9f8
--- /dev/null
+++ b/creactistore/_templates/lib/rdflib_/plugins/memory.py~
@@ -0,0 +1,563 @@
+from __future__ import generators
+from rdflib.term import BNode
+from rdflib.store import Store, NO_STORE, VALID_STORE
+
+__all__ = ['Memory', 'IOMemory']
+
+ANY = Any = None
+
+class Memory(Store):
+ """\
+ An in memory implementation of a triple store.
+
+ This triple store uses nested dictionaries to store triples. Each
+ triple is stored in two such indices as follows spo[s][p][o] = 1 and
+ pos[p][o][s] = 1.
+
+ Authors: Michel Pelletier, Daniel Krech, Stefan Niederhauser
+ """
+ def __init__(self, configuration=None, identifier=None):
+ super(Memory, self).__init__(configuration)
+ self.identifier = identifier
+
+ # indexed by [subject][predicate][object]
+ self.__spo = {}
+
+ # indexed by [predicate][object][subject]
+ self.__pos = {}
+
+ # indexed by [predicate][object][subject]
+ self.__osp = {}
+
+ self.__namespace = {}
+ self.__prefix = {}
+
+ def add(self, (subject, predicate, object), context, quoted=False):
+ """\
+ Add a triple to the store of triples.
+ """
+ # add dictionary entries for spo[s][p][p] = 1 and pos[p][o][s]
+ # = 1, creating the nested dictionaries where they do not yet
+ # exits.
+ spo = self.__spo
+ try:
+ po = spo[subject]
+ except:
+ po = spo[subject] = {}
+ try:
+ o = po[predicate]
+ except:
+ o = po[predicate] = {}
+ o[object] = 1
+
+ pos = self.__pos
+ try:
+ os = pos[predicate]
+ except:
+ os = pos[predicate] = {}
+ try:
+ s = os[object]
+ except:
+ s = os[object] = {}
+ s[subject] = 1
+
+ osp = self.__osp
+ try:
+ sp = osp[object]
+ except:
+ sp = osp[object] = {}
+ try:
+ p = sp[subject]
+ except:
+ p = sp[subject] = {}
+ p[predicate] = 1
+
+ def remove(self, (subject, predicate, object), context=None):
+ for (subject, predicate, object), c in self.triples(
+ (subject, predicate, object)):
+ del self.__spo[subject][predicate][object]
+ del self.__pos[predicate][object][subject]
+ del self.__osp[object][subject][predicate]
+
+ def triples(self, (subject, predicate, object), context=None):
+ """A generator over all the triples matching """
+ if subject!=ANY: # subject is given
+ spo = self.__spo
+ if subject in spo:
+ subjectDictionary = spo[subject]
+ if predicate!=ANY: # subject+predicate is given
+ if predicate in subjectDictionary:
+ if object!=ANY: # subject+predicate+object is given
+ if object in subjectDictionary[predicate]:
+ yield (subject, predicate, object), \
+ self.__contexts()
+ else: # given object not found
+ pass
+ else: # subject+predicate is given, object unbound
+ for o in subjectDictionary[predicate].keys():
+ yield (subject, predicate, o), \
+ self.__contexts()
+ else: # given predicate not found
+ pass
+ else: # subject given, predicate unbound
+ for p in subjectDictionary.keys():
+ if object!=ANY: # object is given
+ if object in subjectDictionary[p]:
+ yield (subject, p, object), self.__contexts()
+ else: # given object not found
+ pass
+ else: # object unbound
+ for o in subjectDictionary[p].keys():
+ yield (subject, p, o), self.__contexts()
+ else: # given subject not found
+ pass
+ elif predicate!=ANY: # predicate is given, subject unbound
+ pos = self.__pos
+ if predicate in pos:
+ predicateDictionary = pos[predicate]
+ if object!=ANY: # predicate+object is given, subject unbound
+ if object in predicateDictionary:
+ for s in predicateDictionary[object].keys():
+ yield (s, predicate, object), self.__contexts()
+ else: # given object not found
+ pass
+ else: # predicate is given, object+subject unbound
+ for o in predicateDictionary.keys():
+ for s in predicateDictionary[o].keys():
+ yield (s, predicate, o), self.__contexts()
+ elif object!=ANY: # object is given, subject+predicate unbound
+ osp = self.__osp
+ if object in osp:
+ objectDictionary = osp[object]
+ for s in objectDictionary.keys():
+ for p in objectDictionary[s].keys():
+ yield (s, p, object), self.__contexts()
+ else: # subject+predicate+object unbound
+ spo = self.__spo
+ for s in spo.keys():
+ subjectDictionary = spo[s]
+ for p in subjectDictionary.keys():
+ for o in subjectDictionary[p].keys():
+ yield (s, p, o), self.__contexts()
+
+ def __len__(self, context=None):
+ #@@ optimize
+ i = 0
+ for triple in self.triples((None, None, None)):
+ i += 1
+ return i
+
+ def bind(self, prefix, namespace):
+ self.__prefix[namespace] = prefix
+ self.__namespace[prefix] = namespace
+
+ def namespace(self, prefix):
+ return self.__namespace.get(prefix, None)
+
+ def prefix(self, namespace):
+ return self.__prefix.get(namespace, None)
+
+ def namespaces(self):
+ for prefix, namespace in self.__namespace.iteritems():
+ yield prefix, namespace
+
+ def __contexts(self):
+ return (c for c in []) # TODO: best way to return empty generator
+
+class IOMemory(Store):
+ """\
+ An integer-key-optimized-context-aware-in-memory store.
+
+ Uses nested dictionaries to store triples and context. Each triple
+ is stored in six such indices as follows cspo[c][s][p][o] = 1
+ and cpos[c][p][o][s] = 1 and cosp[c][o][s][p] = 1 as well as
+ spo[s][p][o] = [c] and pos[p][o][s] = [c] and pos[o][s][p] = [c]
+
+ Context information is used to track the 'source' of the triple
+ data for merging, unmerging, remerging purposes. context aware
+ store stores consume more memory size than non context stores.
+
+ """
+
+ context_aware = True
+ formula_aware = True
+
+ def __init__(self, configuration=None, identifier=None):
+ super(IOMemory, self).__init__()
+
+ # indexed by [context][subject][predicate][object] = 1
+ self.cspo = self.createIndex()
+
+ # indexed by [context][predicate][object][subject] = 1
+ self.cpos = self.createIndex()
+
+ # indexed by [context][object][subject][predicate] = 1
+ self.cosp = self.createIndex()
+
+ # indexed by [subject][predicate][object] = [context]
+ self.spo = self.createIndex()
+
+ # indexed by [predicate][object][subject] = [context]
+ self.pos = self.createIndex()
+
+ # indexed by [object][subject][predicate] = [context]
+ self.osp = self.createIndex()
+
+ # indexes integer keys to identifiers
+ self.forward = self.createForward()
+
+ # reverse index of forward
+ self.reverse = self.createReverse()
+
+ self.identifier = identifier or BNode()
+
+ self.__namespace = self.createPrefixMap()
+ self.__prefix = self.createPrefixMap()
+
+ def open(self, configuration, create=False):
+ if not create:
+ # An IOMemory Store never exists.
+ return NO_STORE
+ else:
+ return VALID_STORE
+
+ def bind(self, prefix, namespace):
+ self.__prefix[namespace] = prefix
+ self.__namespace[prefix] = namespace
+
+ def namespace(self, prefix):
+ return self.__namespace.get(prefix, None)
+
+ def prefix(self, namespace):
+ return self.__prefix.get(namespace, None)
+
+ def namespaces(self):
+ for prefix, namespace in self.__namespace.iteritems():
+ yield prefix, namespace
+
+ def defaultContext(self):
+ return self.default_context
+
+ def addContext(self, context):
+ """ Add context w/o adding statement. Dan you can remove this if you want """
+
+ if not self.reverse.has_key(context):
+ ci=randid()
+ while not self.forward.insert(ci, context):
+ ci=randid()
+ self.reverse[context] = ci
+
+ def intToIdentifier(self, (si, pi, oi)):
+ """ Resolve an integer triple into identifers. """
+ return (self.forward[si], self.forward[pi], self.forward[oi])
+
+ def identifierToInt(self, (s, p, o)):
+ """ Resolve an identifier triple into integers. """
+ return (self.reverse[s], self.reverse[p], self.reverse[o])
+
+ def uniqueSubjects(self, context=None):
+ if context is None:
+ index = self.spo
+ else:
+ index = self.cspo[context]
+ for si in index.keys():
+ yield self.forward[si]
+
+ def uniquePredicates(self, context=None):
+ if context is None:
+ index = self.pos
+ else:
+ index = self.cpos[context]
+ for pi in index.keys():
+ yield self.forward[pi]
+
+ def uniqueObjects(self, context=None):
+ if context is None:
+ index = self.osp
+ else:
+ index = self.cosp[context]
+ for oi in index.keys():
+ yield self.forward[oi]
+
+ def createForward(self):
+ return {}
+
+ def createReverse(self):
+ return {}
+
+ def createIndex(self):
+ return {}
+
+ def createPrefixMap(self):
+ return {}
+
+ def add(self, triple, context, quoted=False):
+ """\
+ Add a triple to the store.
+ """
+ Store.add(self, triple, context, quoted)
+ for triple, cg in self.triples(triple, context):
+ #triple is already in the store.
+ return
+
+ subject, predicate, object = triple
+
+ f = self.forward
+ r = self.reverse
+
+ # assign keys for new identifiers
+
+ if not r.has_key(subject):
+ si=randid()
+ while f.has_key(si):
+ si=randid()
+ f[si] = subject
+ r[subject] = si
+ else:
+ si = r[subject]
+
+ if not r.has_key(predicate):
+ pi=randid()
+ while f.has_key(pi):
+ pi=randid()
+ f[pi] = predicate
+ r[predicate] = pi
+ else:
+ pi = r[predicate]
+
+ if not r.has_key(object):
+ oi=randid()
+ while f.has_key(oi):
+ oi=randid()
+ f[oi] = object
+ r[object] = oi
+ else:
+ oi = r[object]
+
+ if not r.has_key(context):
+ ci=randid()
+ while f.has_key(ci):
+ ci=randid()
+ f[ci] = context
+ r[context] = ci
+ else:
+ ci = r[context]
+
+ # add dictionary entries for cspo[c][s][p][o] = 1,
+ # cpos[c][p][o][s] = 1, and cosp[c][o][s][p] = 1, creating the
+ # nested {} where they do not yet exits.
+ self._setNestedIndex(self.cspo, ci, si, pi, oi)
+ self._setNestedIndex(self.cpos, ci, pi, oi, si)
+ self._setNestedIndex(self.cosp, ci, oi, si, pi)
+
+ if not quoted:
+ self._setNestedIndex(self.spo, si, pi, oi, ci)
+ self._setNestedIndex(self.pos, pi, oi, si, ci)
+ self._setNestedIndex(self.osp, oi, si, pi, ci)
+
+ def _setNestedIndex(self, index, *keys):
+ for key in keys[:-1]:
+ if not index.has_key(key):
+ index[key] = self.createIndex()
+ index = index[key]
+ index[keys[-1]] = 1
+
+
+ def _removeNestedIndex(self, index, *keys):
+ """ Remove context from the list of contexts in a nested index.
+
+ Afterwards, recursively remove nested indexes when they became empty.
+ """
+ parents = []
+ for key in keys[:-1]:
+ parents.append(index)
+ index = index[key]
+ del index[keys[-1]]
+
+ n = len(parents)
+ for i in xrange(n):
+ index = parents[n-1-i]
+ key = keys[n-1-i]
+ if len(index[key]) == 0:
+ del index[key]
+
+ def remove(self, triple, context=None):
+ Store.remove(self, triple, context)
+ if context is not None:
+ if context == self:
+ context = None
+
+ f = self.forward
+ r = self.reverse
+ if context is None:
+ for triple, cg in self.triples(triple):
+ subject, predicate, object = triple
+ si, pi, oi = self.identifierToInt((subject, predicate, object))
+ contexts = list(self.contexts(triple))
+ for context in contexts:
+ ci = r[context]
+ del self.cspo[ci][si][pi][oi]
+ del self.cpos[ci][pi][oi][si]
+ del self.cosp[ci][oi][si][pi]
+
+ self._removeNestedIndex(self.spo, si, pi, oi, ci)
+ self._removeNestedIndex(self.pos, pi, oi, si, ci)
+ self._removeNestedIndex(self.osp, oi, si, pi, ci)
+ # grr!! hafta ref-count these before you can collect them dumbass!
+ #del f[si], f[pi], f[oi]
+ #del r[subject], r[predicate], r[object]
+ else:
+ subject, predicate, object = triple
+ ci = r.get(context, None)
+ if ci:
+ for triple, cg in self.triples(triple, context):
+ si, pi, oi = self.identifierToInt(triple)
+ del self.cspo[ci][si][pi][oi]
+ del self.cpos[ci][pi][oi][si]
+ del self.cosp[ci][oi][si][pi]
+
+ try:
+ self._removeNestedIndex(self.spo, si, pi, oi, ci)
+ self._removeNestedIndex(self.pos, pi, oi, si, ci)
+ self._removeNestedIndex(self.osp, oi, si, pi, ci)
+ except KeyError:
+ # the context may be a quoted one in which
+ # there will not be a triple in spo, pos or
+ # osp. So ignore any KeyErrors
+ pass
+ # TODO delete references to resources in self.forward/self.reverse
+ # that are not in use anymore...
+
+ if subject is None and predicate is None and object is None:
+ # remove context
+ try:
+ ci = self.reverse[context]
+ del self.cspo[ci], self.cpos[ci], self.cosp[ci]
+ except KeyError:
+ # TODO: no exception when removing non-existant context?
+ pass
+
+
+ def triples(self, triple, context=None):
+ """A generator over all the triples matching """
+
+ if context is not None:
+ if context == self:
+ context = None
+
+ subject, predicate, object = triple
+ ci = si = pi = oi = Any
+
+ if context is None:
+ spo = self.spo
+ pos = self.pos
+ osp = self.osp
+ else:
+ try:
+ ci = self.reverse[context] # TODO: Really ignore keyerror here
+ spo = self.cspo[ci]
+ pos = self.cpos[ci]
+ osp = self.cosp[ci]
+ except KeyError:
+ return
+ try:
+ if subject is not Any:
+ si = self.reverse[subject] # throws keyerror if subject doesn't exist ;(
+ if predicate is not Any:
+ pi = self.reverse[predicate]
+ if object is not Any:
+ oi = self.reverse[object]
+ except KeyError, e:
+ return #raise StopIteration
+
+ if si != Any: # subject is given
+ if spo.has_key(si):
+ subjectDictionary = spo[si]
+ if pi != Any: # subject+predicate is given
+ if subjectDictionary.has_key(pi):
+ if oi!= Any: # subject+predicate+object is given
+ if subjectDictionary[pi].has_key(oi):
+ ss, pp, oo = self.intToIdentifier((si, pi, oi))
+ yield (ss, pp, oo), (c for c in self.contexts((ss, pp, oo)))
+ else: # given object not found
+ pass
+ else: # subject+predicate is given, object unbound
+ for o in subjectDictionary[pi].keys():
+ ss, pp, oo = self.intToIdentifier((si, pi, o))
+ yield (ss, pp, oo), (c for c in self.contexts((ss, pp, oo)))
+ else: # given predicate not found
+ pass
+ else: # subject given, predicate unbound
+ for p in subjectDictionary.keys():
+ if oi != Any: # object is given
+ if subjectDictionary[p].has_key(oi):
+ ss, pp, oo = self.intToIdentifier((si, p, oi))
+ yield (ss, pp, oo), (c for c in self.contexts((ss, pp, oo)))
+ else: # given object not found
+ pass
+ else: # object unbound
+ for o in subjectDictionary[p].keys():
+ ss, pp, oo = self.intToIdentifier((si, p, o))
+ yield (ss, pp, oo), (c for c in self.contexts((ss, pp, oo)))
+ else: # given subject not found
+ pass
+ elif pi != Any: # predicate is given, subject unbound
+ if pos.has_key(pi):
+ predicateDictionary = pos[pi]
+ if oi != Any: # predicate+object is given, subject unbound
+ if predicateDictionary.has_key(oi):
+ for s in predicateDictionary[oi].keys():
+ ss, pp, oo = self.intToIdentifier((s, pi, oi))
+ yield (ss, pp, oo), (c for c in self.contexts((ss, pp, oo)))
+ else: # given object not found
+ pass
+ else: # predicate is given, object+subject unbound
+ for o in predicateDictionary.keys():
+ for s in predicateDictionary[o].keys():
+ ss, pp, oo = self.intToIdentifier((s, pi, o))
+ yield (ss, pp, oo), (c for c in self.contexts((ss, pp, oo)))
+ elif oi != Any: # object is given, subject+predicate unbound
+ if osp.has_key(oi):
+ objectDictionary = osp[oi]
+ for s in objectDictionary.keys():
+ for p in objectDictionary[s].keys():
+ ss, pp, oo = self.intToIdentifier((s, p, oi))
+ yield (ss, pp, oo), (c for c in self.contexts((ss, pp, oo)))
+ else: # subject+predicate+object unbound
+ for s in spo.keys():
+ subjectDictionary = spo[s]
+ for p in subjectDictionary.keys():
+ for o in subjectDictionary[p].keys():
+ ss, pp, oo = self.intToIdentifier((s, p, o))
+ yield (ss, pp, oo), (c for c in self.contexts((ss, pp, oo)))
+
+ def __len__(self, context=None):
+
+ if context is not None:
+ if context == self:
+ context = None
+
+ # TODO: for eff. implementation
+ count = 0
+ for triple, cg in self.triples((Any, Any, Any), context):
+ count += 1
+ return count
+
+ def contexts(self, triple=None):
+ if triple:
+ si, pi, oi = self.identifierToInt(triple)
+ for ci in self.spo[si][pi][oi]:
+ yield self.forward[ci]
+ else:
+ for ci in self.cspo.keys():
+ yield self.forward[ci]
+
+
+
+
+import random
+
+def randid(randint=random.randint, choice=random.choice, signs=(-1,1)):
+ return choice(signs)*randint(1,2000000000)
+
+del random
diff --git a/creactistore/_templates/lib/rdflib_/plugins/parsers/__init__.py b/creactistore/_templates/lib/rdflib_/plugins/parsers/__init__.py
new file mode 100644
index 0000000..8062daa
--- /dev/null
+++ b/creactistore/_templates/lib/rdflib_/plugins/parsers/__init__.py
@@ -0,0 +1,3 @@
+"""
+
+"""
diff --git a/creactistore/_templates/lib/rdflib_/plugins/parsers/notation3.py b/creactistore/_templates/lib/rdflib_/plugins/parsers/notation3.py
new file mode 100644
index 0000000..32da08e
--- /dev/null
+++ b/creactistore/_templates/lib/rdflib_/plugins/parsers/notation3.py
@@ -0,0 +1,2314 @@
+#!/usr/bin/env python
+u"""
+notation3.py - Standalone Notation3 Parser
+Derived from CWM, the Closed World Machine
+
+Authors of the original suite:
+
+* Dan Connolly <@@>
+* Tim Berners-Lee <@@>
+* Yosi Scharf <@@>
+* Joseph M. Reagle Jr. <reagle@w3.org>
+* Rich Salz <rsalz@zolera.com>
+
+http://www.w3.org/2000/10/swap/notation3.py
+
+Copyright 2000-2007, World Wide Web Consortium.
+Copyright 2001, MIT.
+Copyright 2001, Zolera Systems Inc.
+
+License: W3C Software License
+http://www.w3.org/Consortium/Legal/copyright-software
+
+Modified by Sean B. Palmer
+Copyright 2007, Sean B. Palmer. \u32E1
+
+Modified to work with rdflib_ by Gunnar Aastrand Grimnes
+Copyright 2010, Gunnar A. Grimnes
+
+"""
+
+# Python standard libraries
+import types
+import sys
+import os
+import string
+import re
+import time
+import StringIO
+import codecs
+
+from binascii import a2b_hex
+from decimal import Decimal
+
+from rdflib_.term import URIRef, BNode, Literal, Variable, _XSD_PFX, _unique_id
+from rdflib_.graph import QuotedGraph, ConjunctiveGraph
+from rdflib_ import py3compat
+b = py3compat.b
+
+__all__ = ['URISyntaxError', 'BadSyntax', 'N3Parser', "verbosity", "setVerbosity", "progress", "splitFrag", "splitFragP", "join", "refTo", "base", "canonical", "runNamespace", "uniqueURI", "Canonicalize", "stripCR", "dummyWrite", "toBool", "stringToN3", "backslashUify", "hexify", "dummy"]
+
+from rdflib_.parser import Parser
+
+# Incestuous.. would be nice to separate N3 and XML
+# from sax2rdf import XMLtoDOM
+def XMLtoDOM(*args, **kargs):
+ # print >> sys.stderr, args, kargs
+ pass
+
+# SWAP http://www.w3.org/2000/10/swap
+# from diag import verbosity, setVerbosity, progress
+def verbosity(*args, **kargs):
+ # print >> sys.stderr, args, kargs
+ pass
+def setVerbosity(*args, **kargs):
+ # print >> sys.stderr, args, kargs
+ pass
+def progress(*args, **kargs):
+ # print >> sys.stderr, args, kargs
+ pass
+
+
+
+def splitFrag(uriref):
+ """split a URI reference between the fragment and the rest.
+
+ Punctuation is thrown away.
+
+ e.g.
+
+ >>> splitFrag("abc#def")
+ ('abc', 'def')
+
+ >>> splitFrag("abcdef")
+ ('abcdef', None)
+
+ """
+
+ i = uriref.rfind("#")
+ if i>= 0: return uriref[:i], uriref[i+1:]
+ else: return uriref, None
+
+def splitFragP(uriref, punct=0):
+ """split a URI reference before the fragment
+
+ Punctuation is kept.
+
+ e.g.
+
+ >>> splitFragP("abc#def")
+ ('abc', '#def')
+
+ >>> splitFragP("abcdef")
+ ('abcdef', '')
+
+ """
+
+ i = uriref.rfind("#")
+ if i>= 0: return uriref[:i], uriref[i:]
+ else: return uriref, ''
+
+@py3compat.format_doctest_out
+def join(here, there):
+ """join an absolute URI and URI reference
+ (non-ascii characters are supported/doctested;
+ haven't checked the details of the IRI spec though)
+
+ here is assumed to be absolute.
+ there is URI reference.
+
+ >>> join('http://example/x/y/z', '../abc')
+ 'http://example/x/abc'
+
+ Raise ValueError if there uses relative path
+ syntax but here has no hierarchical path.
+
+ >>> join('mid:foo@example', '../foo')
+ Traceback (most recent call last):
+ raise ValueError, here
+ ValueError: Base <mid:foo@example> has no slash after colon - with relative '../foo'.
+
+ >>> join('http://example/x/y/z', '')
+ 'http://example/x/y/z'
+
+ >>> join('mid:foo@example', '#foo')
+ 'mid:foo@example#foo'
+
+ We grok IRIs
+
+ >>> len(u'Andr\\xe9')
+ 5
+
+ >>> join('http://example.org/', u'#Andr\\xe9')
+ %(u)s'http://example.org/#Andr\\xe9'
+ """
+
+ assert(here.find("#") < 0), "Base may not contain hash: '%s'"% here # caller must splitFrag (why?)
+
+ slashl = there.find('/')
+ colonl = there.find(':')
+
+ # join(base, 'foo:/') -- absolute
+ if colonl >= 0 and (slashl < 0 or colonl < slashl):
+ return there
+
+ bcolonl = here.find(':')
+ assert(bcolonl >= 0), "Base uri '%s' is not absolute" % here # else it's not absolute
+
+ path, frag = splitFragP(there)
+ if not path: return here + frag
+
+ # join('mid:foo@example', '../foo') bzzt
+ if here[bcolonl+1:bcolonl+2] <> '/':
+ raise ValueError ("Base <%s> has no slash after colon - with relative '%s'." %(here, there))
+
+ if here[bcolonl+1:bcolonl+3] == '//':
+ bpath = here.find('/', bcolonl+3)
+ else:
+ bpath = bcolonl+1
+
+ # join('http://xyz', 'foo')
+ if bpath < 0:
+ bpath = len(here)
+ here = here + '/'
+
+ # join('http://xyz/', '//abc') => 'http://abc'
+ if there[:2] == '//':
+ return here[:bcolonl+1] + there
+
+ # join('http://xyz/', '/abc') => 'http://xyz/abc'
+ if there[:1] == '/':
+ return here[:bpath] + there
+
+ slashr = here.rfind('/')
+
+ while 1:
+ if path[:2] == './':
+ path = path[2:]
+ if path == '.':
+ path = ''
+ elif path[:3] == '../' or path == '..':
+ path = path[3:]
+ i = here.rfind('/', bpath, slashr)
+ if i >= 0:
+ here = here[:i+1]
+ slashr = i
+ else:
+ break
+
+ return here[:slashr+1] + path + frag
+
+commonHost = re.compile(r'^[-_a-zA-Z0-9.]+:(//[^/]*)?/[^/]*$')
+
+def refTo(base, uri):
+ """figure out a relative URI reference from base to uri
+
+ >>> refTo('http://example/x/y/z', 'http://example/x/abc')
+ '../abc'
+
+ >>> refTo('file:/ex/x/y', 'file:/ex/x/q/r#s')
+ 'q/r#s'
+
+ >>> refTo(None, 'http://ex/x/y')
+ 'http://ex/x/y'
+
+ >>> refTo('http://ex/x/y', 'http://ex/x/y')
+ ''
+
+ Note the relationship between refTo and join:
+ join(x, refTo(x, y)) == y
+ which points out certain strings which cannot be URIs. e.g.
+ >>> x='http://ex/x/y';y='http://ex/x/q:r';join(x, refTo(x, y)) == y
+ 0
+
+ So 'http://ex/x/q:r' is not a URI. Use 'http://ex/x/q%3ar' instead:
+ >>> x='http://ex/x/y';y='http://ex/x/q%3ar';join(x, refTo(x, y)) == y
+ 1
+
+ This one checks that it uses a root-realtive one where that is
+ all they share. Now uses root-relative where no path is shared.
+ This is a matter of taste but tends to give more resilience IMHO
+ -- and shorter paths
+
+ Note that base may be None, meaning no base. In some situations, there
+ just ain't a base. Slife. In these cases, relTo returns the absolute value.
+ The axiom abs(,rel(b,x))=x still holds.
+ This saves people having to set the base to "bogus:".
+
+ >>> refTo('http://ex/x/y/z', 'http://ex/r')
+ '/r'
+
+ """
+
+# assert base # don't mask bugs -danc # not a bug. -tim
+ if not base: return uri
+ if base == uri: return ""
+
+ # Find how many path segments in common
+ i=0
+ while i<len(uri) and i<len(base):
+ if uri[i] == base[i]: i = i + 1
+ else: break
+ # print "# relative", base, uri, " same up to ", i
+ # i point to end of shortest one or first difference
+
+ m = commonHost.match(base[:i])
+ if m:
+ k=uri.find("//")
+ if k<0: k=-2 # no host
+ l=uri.find("/", k+2)
+ if uri[l+1:l+2] != "/" and base[l+1:l+2] != "/" and uri[:l]==base[:l]:
+ return uri[l:]
+
+ if uri[i:i+1] =="#" and len(base) == i: return uri[i:] # fragment of base
+
+ while i>0 and uri[i-1] != '/' : i=i-1 # scan for slash
+
+ if i < 3: return uri # No way.
+ if base.find("//", i-2)>0 \
+ or uri.find("//", i-2)>0: return uri # An unshared "//"
+ if base.find(":", i)>0: return uri # An unshared ":"
+ n = base.count("/", i)
+ if n == 0 and i<len(uri) and uri[i] == '#':
+ return "./" + uri[i:]
+ elif n == 0 and i == len(uri):
+ return "./"
+ else:
+ return ("../" * n) + uri[i:]
+
+
+def base():
+ """The base URI for this process - the Web equiv of cwd
+
+ Relative or abolute unix-standard filenames parsed relative to
+ this yeild the URI of the file.
+ If we had a reliable way of getting a computer name,
+ we should put it in the hostname just to prevent ambiguity
+
+ """
+# return "file://" + hostname + os.getcwd() + "/"
+ return "file://" + _fixslash(os.getcwd()) + "/"
+
+
+def _fixslash(str):
+ """ Fix windowslike filename to unixlike - (#ifdef WINDOWS)"""
+ s = str
+ for i in range(len(s)):
+ if s[i] == "\\": s = s[:i] + "/" + s[i+1:]
+ if s[0] != "/" and s[1] == ":": s = s[2:] # @@@ Hack when drive letter present
+ return s
+
+URI_unreserved = b("ABCDEFGHIJJLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-._~")
+ # unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~"
+
+@py3compat.format_doctest_out
+def canonical(str_in):
+ """Convert equivalent URIs (or parts) to the same string
+
+ There are many differenet levels of URI canonicalization
+ which are possible. See http://www.ietf.org/rfc/rfc3986.txt
+ Done:
+ - Converfting unicode IRI to utf-8
+ - Escaping all non-ASCII
+ - De-escaping, if escaped, ALPHA (%%41-%%5A and %%61-%%7A), DIGIT (%%30-%%39),
+ hyphen (%%2D), period (%%2E), underscore (%%5F), or tilde (%%7E) (Sect 2.4)
+ - Making all escapes uppercase hexadecimal
+
+ Not done:
+ - Making URI scheme lowercase
+ - changing /./ or /foo/../ to / with care not to change host part
+
+
+ >>> canonical("foo bar")
+ %(b)s'foo%%20bar'
+
+ >>> canonical(u'http:')
+ %(b)s'http:'
+
+ >>> canonical('fran%%c3%%83%%c2%%a7ois')
+ %(b)s'fran%%C3%%83%%C2%%A7ois'
+
+ >>> canonical('a')
+ %(b)s'a'
+
+ >>> canonical('%%4e')
+ %(b)s'N'
+
+ >>> canonical('%%9d')
+ %(b)s'%%9D'
+
+ >>> canonical('%%2f')
+ %(b)s'%%2F'
+
+ >>> canonical('%%2F')
+ %(b)s'%%2F'
+
+ """
+ if type(str_in) == type(u''):
+ s8 = str_in.encode('utf-8')
+ else:
+ s8 = str_in
+ s = b('')
+ i = 0
+ while i < len(s8):
+ if py3compat.PY3:
+ n = s8[i]; ch = bytes([n])
+ else:
+ ch = s8[i]; n = ord(ch)
+ if (n > 126) or (n < 33) : # %-encode controls, SP, DEL, and utf-8
+ s += b("%%%02X" % ord(ch))
+ elif ch == b('%') and i+2 < len(s8):
+ ch2 = a2b_hex(s8[i+1:i+3])
+ if ch2 in URI_unreserved: s += ch2
+ else: s += b("%%%02X" % ord(ch2))
+ i = i+3
+ continue
+ else:
+ s += ch
+ i = i +1
+ return s
+
+
+
+
+
+
+CONTEXT = 0
+PRED = 1
+SUBJ = 2
+OBJ = 3
+
+PARTS = PRED, SUBJ, OBJ
+ALL4 = CONTEXT, PRED, SUBJ, OBJ
+
+SYMBOL = 0
+FORMULA = 1
+LITERAL = 2
+LITERAL_DT = 21
+LITERAL_LANG = 22
+ANONYMOUS = 3
+XMLLITERAL = 25
+
+Logic_NS = "http://www.w3.org/2000/10/swap/log#"
+NODE_MERGE_URI = Logic_NS + "is" # Pseudo-property indicating node merging
+forSomeSym = Logic_NS + "forSome"
+forAllSym = Logic_NS + "forAll"
+
+RDF_type_URI = "http://www.w3.org/1999/02/22-rdf-syntax-ns#type"
+RDF_NS_URI = "http://www.w3.org/1999/02/22-rdf-syntax-ns#"
+OWL_NS = "http://www.w3.org/2002/07/owl#"
+DAML_sameAs_URI = OWL_NS+"sameAs"
+parsesTo_URI = Logic_NS + "parsesTo"
+RDF_spec = "http://www.w3.org/TR/REC-rdf-syntax/"
+
+List_NS = RDF_NS_URI # From 20030808
+_Old_Logic_NS = "http://www.w3.org/2000/10/swap/log.n3#"
+
+N3_first = (SYMBOL, List_NS + "first")
+N3_rest = (SYMBOL, List_NS + "rest")
+N3_li = (SYMBOL, List_NS + "li")
+N3_nil = (SYMBOL, List_NS + "nil")
+N3_List = (SYMBOL, List_NS + "List")
+N3_Empty = (SYMBOL, List_NS + "Empty")
+
+
+
+runNamespaceValue = None
+
+def runNamespace():
+ "Return a URI suitable as a namespace for run-local objects"
+ # @@@ include hostname (privacy?) (hash it?)
+ global runNamespaceValue
+ if runNamespaceValue == None:
+ runNamespaceValue = join(base(), _unique_id()) + '#'
+ return runNamespaceValue
+
+nextu = 0
+def uniqueURI():
+ "A unique URI"
+ global nextu
+ nextu += 1
+ return runNamespace() + "u_" + `nextu`
+
+class URISyntaxError(ValueError):
+ """A parameter is passed to a routine that requires a URI reference"""
+ pass
+
+
+tracking = False
+chatty_flag = 50
+
+
+from xml.dom import Node
+try:
+ from xml.ns import XMLNS
+except:
+ class XMLNS:
+ BASE = "http://www.w3.org/2000/xmlns/"
+ XML = "http://www.w3.org/XML/1998/namespace"
+
+
+_attrs = lambda E: (E.attributes and E.attributes.values()) or []
+_children = lambda E: E.childNodes or []
+_IN_XML_NS = lambda n: n.namespaceURI == XMLNS.XML
+_inclusive = lambda n: n.unsuppressedPrefixes == None
+
+# Does a document/PI has lesser/greater document order than the
+# first element?
+_LesserElement, _Element, _GreaterElement = range(3)
+
+def _sorter(n1,n2):
+ '''_sorter(n1,n2) -> int
+ Sorting predicate for non-NS attributes.'''
+
+ i = cmp(n1.namespaceURI, n2.namespaceURI)
+ if i: return i
+ return cmp(n1.localName, n2.localName)
+
+
+def _sorter_ns(n1,n2):
+ '''_sorter_ns((n,v),(n,v)) -> int
+ "(an empty namespace URI is lexicographically least)."'''
+
+ if n1[0] == 'xmlns': return -1
+ if n2[0] == 'xmlns': return 1
+ return cmp(n1[0], n2[0])
+
+def _utilized(n, node, other_attrs, unsuppressedPrefixes):
+ '''_utilized(n, node, other_attrs, unsuppressedPrefixes) -> boolean
+ Return true if that nodespace is utilized within the node'''
+
+ if n.startswith('xmlns:'):
+ n = n[6:]
+ elif n.startswith('xmlns'):
+ n = n[5:]
+ if (n=="" and node.prefix in ["#default", None]) or \
+ n == node.prefix or n in unsuppressedPrefixes:
+ return 1
+ for attr in other_attrs:
+ if n == attr.prefix: return 1
+ return 0
+
+#_in_subset = lambda subset, node: not subset or node in subset
+_in_subset = lambda subset, node: subset is None or node in subset # rich's tweak
+
+class _implementation:
+ '''Implementation class for C14N. This accompanies a node during it's
+ processing and includes the parameters and processing state.'''
+
+ # Handler for each node type; populated during module instantiation.
+ handlers = {}
+
+ def __init__(self, node, write, **kw):
+ '''Create and run the implementation.'''
+ self.write = write
+ self.subset = kw.get('subset')
+ self.comments = kw.get('comments', 0)
+ self.unsuppressedPrefixes = kw.get('unsuppressedPrefixes')
+ nsdict = kw.get('nsdict', { 'xml': XMLNS.XML, 'xmlns': XMLNS.BASE })
+
+ # Processing state.
+ self.state = (nsdict, {'xml':''}, {}) #0422
+
+ if node.nodeType == Node.DOCUMENT_NODE:
+ self._do_document(node)
+ elif node.nodeType == Node.ELEMENT_NODE:
+ self.documentOrder = _Element # At document element
+ if not _inclusive(self):
+ self._do_element(node)
+ else:
+ inherited = self._inherit_context(node)
+ self._do_element(node, inherited)
+ elif node.nodeType == Node.DOCUMENT_TYPE_NODE:
+ pass
+ elif node.nodeType == Node.TEXT_NODE:
+ self._do_text(node)
+ else:
+ raise TypeError, str(node)
+
+
+ def _inherit_context(self, node):
+ '''_inherit_context(self, node) -> list
+ Scan ancestors of attribute and namespace context. Used only
+ for single element node canonicalization, not for subset
+ canonicalization.'''
+
+ # Collect the initial list of xml:foo attributes.
+ xmlattrs = filter(_IN_XML_NS, _attrs(node))
+
+ # Walk up and get all xml:XXX attributes we inherit.
+ inherited, parent = [], node.parentNode
+ while parent and parent.nodeType == Node.ELEMENT_NODE:
+ for a in filter(_IN_XML_NS, _attrs(parent)):
+ n = a.localName
+ if n not in xmlattrs:
+ xmlattrs.append(n)
+ inherited.append(a)
+ parent = parent.parentNode
+ return inherited
+
+
+ def _do_document(self, node):
+ '''_do_document(self, node) -> None
+ Process a document node. documentOrder holds whether the document
+ element has been encountered such that PIs/comments can be written
+ as specified.'''
+
+ self.documentOrder = _LesserElement
+ for child in node.childNodes:
+ if child.nodeType == Node.ELEMENT_NODE:
+ self.documentOrder = _Element # At document element
+ self._do_element(child)
+ self.documentOrder = _GreaterElement # After document element
+ elif child.nodeType == Node.PROCESSING_INSTRUCTION_NODE:
+ self._do_pi(child)
+ elif child.nodeType == Node.COMMENT_NODE:
+ self._do_comment(child)
+ elif child.nodeType == Node.DOCUMENT_TYPE_NODE:
+ pass
+ else:
+ raise TypeError, str(child)
+ handlers[Node.DOCUMENT_NODE] = _do_document
+
+
+ def _do_text(self, node):
+ '''_do_text(self, node) -> None
+ Process a text or CDATA node. Render various special characters
+ as their C14N entity representations.'''
+ if not _in_subset(self.subset, node): return
+ s = node.data.replace("&", "&amp;")
+ s = s.replace("<", "&lt;")
+ s = s.replace(">", "&gt;")
+ s = s.replace("\015", "&#xD;")
+ if s: self.write(s)
+ handlers[Node.TEXT_NODE] = _do_text
+ handlers[Node.CDATA_SECTION_NODE] = _do_text
+
+
+ def _do_pi(self, node):
+ '''_do_pi(self, node) -> None
+ Process a PI node. Render a leading or trailing #xA if the
+ document order of the PI is greater or lesser (respectively)
+ than the document element.
+ '''
+ if not _in_subset(self.subset, node): return
+ W = self.write
+ if self.documentOrder == _GreaterElement: W('\n')
+ W('<?')
+ W(node.nodeName)
+ s = node.data
+ if s:
+ W(' ')
+ W(s)
+ W('?>')
+ if self.documentOrder == _LesserElement: W('\n')
+ handlers[Node.PROCESSING_INSTRUCTION_NODE] = _do_pi
+
+
+ def _do_comment(self, node):
+ '''_do_comment(self, node) -> None
+ Process a comment node. Render a leading or trailing #xA if the
+ document order of the comment is greater or lesser (respectively)
+ than the document element.
+ '''
+ if not _in_subset(self.subset, node): return
+ if self.comments:
+ W = self.write
+ if self.documentOrder == _GreaterElement: W('\n')
+ W('<!--')
+ W(node.data)
+ W('-->')
+ if self.documentOrder == _LesserElement: W('\n')
+ handlers[Node.COMMENT_NODE] = _do_comment
+
+
+ def _do_attr(self, n, value):
+ ''''_do_attr(self, node) -> None
+ Process an attribute.'''
+
+ W = self.write
+ W(' ')
+ W(n)
+ W('="')
+ s = value.replace(value, "&", "&amp;")
+ s = s.replace("<", "&lt;")
+ s = s.replace('"', '&quot;')
+ s = s.replace('\011', '&#x9')
+ s = s.replace('\012', '&#xA')
+ s = s.replace('\015', '&#xD')
+ W(s)
+ W('"')
+
+
+ def _do_element(self, node, initial_other_attrs = []):
+ '''_do_element(self, node, initial_other_attrs = []) -> None
+ Process an element (and its children).'''
+
+ # Get state (from the stack) make local copies.
+ # ns_parent -- NS declarations in parent
+ # ns_rendered -- NS nodes rendered by ancestors
+ # ns_local -- NS declarations relevant to this element
+ # xml_attrs -- Attributes in XML namespace from parent
+ # xml_attrs_local -- Local attributes in XML namespace.
+ ns_parent, ns_rendered, xml_attrs = \
+ self.state[0], self.state[1].copy(), self.state[2].copy() #0422
+ ns_local = ns_parent.copy()
+ xml_attrs_local = {}
+
+ # progress("_do_element node.nodeName=", node.nodeName)
+ # progress("_do_element node.namespaceURI", node.namespaceURI)
+ # progress("_do_element node.tocml()", node.toxml())
+ # Divide attributes into NS, XML, and others.
+ other_attrs = initial_other_attrs[:]
+ in_subset = _in_subset(self.subset, node)
+ for a in _attrs(node):
+ # progress("\t_do_element a.nodeName=", a.nodeName)
+ if a.namespaceURI == XMLNS.BASE:
+ n = a.nodeName
+ if n == "xmlns:": n = "xmlns" # DOM bug workaround
+ ns_local[n] = a.nodeValue
+ elif a.namespaceURI == XMLNS.XML:
+ if _inclusive(self) or in_subset:
+ xml_attrs_local[a.nodeName] = a #0426
+ else:
+ other_attrs.append(a)
+ #add local xml:foo attributes to ancestor's xml:foo attributes
+ xml_attrs.update(xml_attrs_local)
+
+ # Render the node
+ W, name = self.write, None
+ if in_subset:
+ name = node.nodeName
+ W('<')
+ W(name)
+
+ # Create list of NS attributes to render.
+ ns_to_render = []
+ for n,v in ns_local.items():
+
+ # If default namespace is XMLNS.BASE or empty,
+ # and if an ancestor was the same
+ if n == "xmlns" and v in [ XMLNS.BASE, '' ] \
+ and ns_rendered.get('xmlns') in [ XMLNS.BASE, '', None ]:
+ continue
+
+ # "omit namespace node with local name xml, which defines
+ # the xml prefix, if its string value is
+ # http://www.w3.org/XML/1998/namespace."
+ if n in ["xmlns:xml", "xml"] \
+ and v in [ 'http://www.w3.org/XML/1998/namespace' ]:
+ continue
+
+
+ # If not previously rendered
+ # and it's inclusive or utilized
+ if (n,v) not in ns_rendered.items() \
+ and (_inclusive(self) or \
+ _utilized(n, node, other_attrs, self.unsuppressedPrefixes)):
+ ns_to_render.append((n, v))
+
+ # Sort and render the ns, marking what was rendered.
+ ns_to_render.sort(_sorter_ns)
+ for n,v in ns_to_render:
+ self._do_attr(n, v)
+ ns_rendered[n]=v #0417
+
+ # If exclusive or the parent is in the subset, add the local xml attributes
+ # Else, add all local and ancestor xml attributes
+ # Sort and render the attributes.
+ if not _inclusive(self) or _in_subset(self.subset,node.parentNode): #0426
+ other_attrs.extend(xml_attrs_local.values())
+ else:
+ other_attrs.extend(xml_attrs.values())
+ other_attrs.sort(_sorter)
+ for a in other_attrs:
+ self._do_attr(a.nodeName, a.value)
+ W('>')
+
+ # Push state, recurse, pop state.
+ state, self.state = self.state, (ns_local, ns_rendered, xml_attrs)
+ for c in _children(node):
+ _implementation.handlers[c.nodeType](self, c)
+ self.state = state
+
+ if name: W('</%s>' % name)
+ handlers[Node.ELEMENT_NODE] = _do_element
+
+
+def Canonicalize(node, output=None, **kw):
+ '''Canonicalize(node, output=None, **kw) -> UTF-8
+
+ Canonicalize a DOM document/element node and all descendents.
+ Return the text; if output is specified then output.write will
+ be called to output the text and None will be returned
+ Keyword parameters:
+ nsdict -- a dictionary of prefix:uri namespace entries
+ assumed to exist in the surrounding context
+ comments -- keep comments if non-zero (default is 0)
+ subset -- Canonical XML subsetting resulting from XPath (default is [])
+ unsuppressedPrefixes -- do exclusive C14N, and this specifies the
+ prefixes that should be inherited.
+ '''
+ if output:
+ apply(_implementation, (node, output.write), kw)
+ else:
+ s = StringIO.StringIO()
+ apply(_implementation, (node, s.write), kw)
+ return s.getvalue()
+
+# end of xmlC14n.py
+
+# from why import BecauseOfData, becauseSubexpression
+def BecauseOfData(*args, **kargs):
+ # print args, kargs
+ pass
+def becauseSubexpression(*args, **kargs):
+ # print args, kargs
+ pass
+
+N3_forSome_URI = forSomeSym
+N3_forAll_URI = forAllSym
+
+# Magic resources we know about
+
+
+
+ADDED_HASH = "#" # Stop where we use this in case we want to remove it!
+# This is the hash on namespace URIs
+
+RDF_type = ( SYMBOL , RDF_type_URI )
+DAML_sameAs = ( SYMBOL, DAML_sameAs_URI )
+
+LOG_implies_URI = "http://www.w3.org/2000/10/swap/log#implies"
+
+BOOLEAN_DATATYPE = _XSD_PFX + "boolean"
+DECIMAL_DATATYPE = _XSD_PFX + "decimal"
+DOUBLE_DATATYPE = _XSD_PFX + "double"
+FLOAT_DATATYPE = _XSD_PFX + "float"
+INTEGER_DATATYPE = _XSD_PFX + "integer"
+
+option_noregen = 0 # If set, do not regenerate genids on output
+
+# @@ I18n - the notname chars need extending for well known unicode non-text
+# characters. The XML spec switched to assuming unknown things were name
+# characaters.
+# _namechars = string.lowercase + string.uppercase + string.digits + '_-'
+_notQNameChars = "\t\r\n !\"#$%&'()*.,+/;<=>?@[\\]^`{|}~" # else valid qname :-/
+_notNameChars = _notQNameChars + ":" # Assume anything else valid name :-/
+_rdfns = 'http://www.w3.org/1999/02/22-rdf-syntax-ns#'
+
+
+N3CommentCharacter = "#" # For unix script #! compatabilty
+
+########################################## Parse string to sink
+#
+# Regular expressions:
+eol = re.compile(r'[ \t]*(#[^\n]*)?\r?\n') # end of line, poss. w/comment
+eof = re.compile(r'[ \t]*(#[^\n]*)?$') # end of file, poss. w/comment
+ws = re.compile(r'[ \t]*') # Whitespace not including NL
+signed_integer = re.compile(r'[-+]?[0-9]+') # integer
+number_syntax = re.compile(r'(?P<integer>[-+]?[0-9]+)(?P<decimal>\.[0-9]+)?(?P<exponent>e[-+]?[0-9]+)?')
+digitstring = re.compile(r'[0-9]+') # Unsigned integer
+interesting = re.compile(r'[\\\r\n\"]')
+langcode = re.compile(r'[a-zA-Z0-9]+(-[a-zA-Z0-9]+)?')
+#"
+
+
+
+class SinkParser:
+ def __init__(self, store, openFormula=None, thisDoc="", baseURI=None,
+ genPrefix = "", flags="",
+ why=None):
+ """ note: namespace names should *not* end in #;
+ the # will get added during qname processing """
+
+ self._bindings = {}
+ self._flags = flags
+ if thisDoc != "":
+ assert ':' in thisDoc, "Document URI not absolute: <%s>" % thisDoc
+ self._bindings[""] = thisDoc + "#" # default
+
+ self._store = store
+ if genPrefix: store.setGenPrefix(genPrefix) # pass it on
+
+ self._thisDoc = thisDoc
+ self.lines = 0 # for error handling
+ self.startOfLine = 0 # For calculating character number
+ self._genPrefix = genPrefix
+ self.keywords = ['a', 'this', 'bind', 'has', 'is', 'of', 'true', 'false' ]
+ self.keywordsSet = 0 # Then only can others be considerd qnames
+ self._anonymousNodes = {} # Dict of anon nodes already declared ln: Term
+ self._variables = {}
+ self._parentVariables = {}
+ self._reason = why # Why the parser was asked to parse this
+
+ self._reason2 = None # Why these triples
+ # was: diag.tracking
+ if tracking: self._reason2 = BecauseOfData(
+ store.newSymbol(thisDoc), because=self._reason)
+
+ if baseURI: self._baseURI = baseURI
+ else:
+ if thisDoc:
+ self._baseURI = thisDoc
+ else:
+ self._baseURI = None
+
+ assert not self._baseURI or ':' in self._baseURI
+
+ if not self._genPrefix:
+ if self._thisDoc: self._genPrefix = self._thisDoc + "#_g"
+ else: self._genPrefix = uniqueURI()
+
+ if openFormula ==None:
+ if self._thisDoc:
+ self._formula = store.newFormula(thisDoc + "#_formula")
+ else:
+ self._formula = store.newFormula()
+ else:
+ self._formula = openFormula
+
+
+ self._context = self._formula
+ self._parentContext = None
+
+
+ def here(self, i):
+ """String generated from position in file
+
+ This is for repeatability when refering people to bnodes in a document.
+ This has diagnostic uses less formally, as it should point one to which
+ bnode the arbitrary identifier actually is. It gives the
+ line and character number of the '[' charcacter or path character
+ which introduced the blank node. The first blank node is boringly _L1C1.
+ It used to be used only for tracking, but for tests in general
+ it makes the canonical ordering of bnodes repeatable."""
+
+ return "%s_L%iC%i" % (self._genPrefix , self.lines,
+ i - self.startOfLine + 1)
+
+ def formula(self):
+ return self._formula
+
+ def loadStream(self, stream):
+ return self.loadBuf(stream.read()) # Not ideal
+
+ def loadBuf(self, buf):
+ """Parses a buffer and returns its top level formula"""
+ self.startDoc()
+
+ self.feed(buf)
+ return self.endDoc() # self._formula
+
+
+ def feed(self, octets):
+ """Feed an octet stream tothe parser
+
+ if BadSyntax is raised, the string
+ passed in the exception object is the
+ remainder after any statements have been parsed.
+ So if there is more data to feed to the
+ parser, it should be straightforward to recover."""
+
+ if not isinstance(octets, unicode):
+ s = octets.decode('utf-8')
+ # NB already decoded, so \ufeff
+ if len(s) > 0 and s[0] == codecs.BOM_UTF8.decode('utf-8'):
+ s = s[1:]
+ else:
+ s=octets
+
+ i = 0
+ while i >= 0:
+ j = self.skipSpace(s, i)
+ if j<0: return
+
+ i = self.directiveOrStatement(s,j)
+ if i<0:
+ print "# next char: ", `s[j]`
+ raise BadSyntax(self._thisDoc, self.lines, s, j,
+ "expected directive or statement")
+
+ def directiveOrStatement(self, str,h):
+
+ i = self.skipSpace(str, h)
+ if i<0: return i # EOF
+
+ j = self.directive(str, i)
+ if j>=0: return self.checkDot(str,j)
+
+ j = self.statement(str, i)
+ if j>=0: return self.checkDot(str,j)
+
+ return j
+
+
+ #@@I18N
+ global _notNameChars
+ #_namechars = string.lowercase + string.uppercase + string.digits + '_-'
+
+ def tok(self, tok, str, i):
+ """Check for keyword. Space must have been stripped on entry and
+ we must not be at end of file."""
+
+ assert tok[0] not in _notNameChars # not for punctuation
+ if str[i:i+1] == "@":
+ i = i+1
+ else:
+ if tok not in self.keywords:
+ return -1 # No, this has neither keywords declaration nor "@"
+
+ if (str[i:i+len(tok)] == tok
+ and (str[i+len(tok)] in _notQNameChars )):
+ i = i + len(tok)
+ return i
+ else:
+ return -1
+
+ def directive(self, str, i):
+ j = self.skipSpace(str, i)
+ if j<0: return j # eof
+ res = []
+
+ j = self.tok('bind', str, i) # implied "#". Obsolete.
+ if j>0: raise BadSyntax(self._thisDoc, self.lines, str, i,
+ "keyword bind is obsolete: use @prefix")
+
+ j = self.tok('keywords', str, i)
+ if j>0:
+ i = self.commaSeparatedList(str, j, res, self.bareWord)
+ if i < 0:
+ raise BadSyntax(self._thisDoc, self.lines, str, i,
+ "'@keywords' needs comma separated list of words")
+ self.setKeywords(res[:])
+ # was: diag.chatty_flag
+ if chatty_flag > 80: progress("Keywords ", self.keywords)
+ return i
+
+
+ j = self.tok('forAll', str, i)
+ if j > 0:
+ i = self.commaSeparatedList(str, j, res, self.uri_ref2)
+ if i <0: raise BadSyntax(self._thisDoc, self.lines, str, i,
+ "Bad variable list after @forAll")
+ for x in res:
+ #self._context.declareUniversal(x)
+ if x not in self._variables or x in self._parentVariables:
+ self._variables[x] = self._context.newUniversal(x)
+ return i
+
+ j = self.tok('forSome', str, i)
+ if j > 0:
+ i = self. commaSeparatedList(str, j, res, self.uri_ref2)
+ if i <0: raise BadSyntax(self._thisDoc, self.lines, str, i,
+ "Bad variable list after @forSome")
+ for x in res:
+ self._context.declareExistential(x)
+ return i
+
+
+ j=self.tok('prefix', str, i) # no implied "#"
+ if j>=0:
+ t = []
+ i = self.qname(str, j, t)
+ if i<0: raise BadSyntax(self._thisDoc, self.lines, str, j,
+ "expected qname after @prefix")
+ j = self.uri_ref2(str, i, t)
+ if j<0: raise BadSyntax(self._thisDoc, self.lines, str, i,
+ "expected <uriref> after @prefix _qname_")
+ ns = self.uriOf(t[1])
+
+ if self._baseURI:
+ ns = join(self._baseURI, ns)
+ elif ":" not in ns:
+ raise BadSyntax(self._thisDoc, self.lines, str, j,
+ "With no base URI, cannot use relative URI in @prefix <"+ns+">")
+ assert ':' in ns # must be absolute
+ self._bindings[t[0][0]] = ns
+ self.bind(t[0][0], hexify(ns))
+ return j
+
+ j=self.tok('base', str, i) # Added 2007/7/7
+ if j >= 0:
+ t = []
+ i = self.uri_ref2(str, j, t)
+ if i<0: raise BadSyntax(self._thisDoc, self.lines, str, j,
+ "expected <uri> after @base ")
+ ns = self.uriOf(t[0])
+
+ if self._baseURI:
+ ns = join(self._baseURI, ns)
+ else:
+ raise BadSyntax(self._thisDoc, self.lines, str, j,
+ "With no previous base URI, cannot use relative URI in @base <"+ns+">")
+ assert ':' in ns # must be absolute
+ self._baseURI = ns
+ return i
+
+ return -1 # Not a directive, could be something else.
+
+ def bind(self, qn, uri):
+ assert isinstance(uri,
+ types.StringType), "Any unicode must be %x-encoded already"
+ if qn == "":
+ self._store.setDefaultNamespace(uri)
+ else:
+ self._store.bind(qn, uri)
+
+ def setKeywords(self, k):
+ "Takes a list of strings"
+ if k == None:
+ self.keywordsSet = 0
+ else:
+ self.keywords = k
+ self.keywordsSet = 1
+
+
+ def startDoc(self):
+ # was: self._store.startDoc()
+ self._store.startDoc(self._formula)
+
+ def endDoc(self):
+ """Signal end of document and stop parsing. returns formula"""
+ self._store.endDoc(self._formula) # don't canonicalize yet
+ return self._formula
+
+ def makeStatement(self, quadruple):
+ #$$$$$$$$$$$$$$$$$$$$$
+# print "# Parser output: ", `quadruple`
+ self._store.makeStatement(quadruple, why=self._reason2)
+
+
+
+ def statement(self, str, i):
+ r = []
+
+ i = self.object(str, i, r) # Allow literal for subject - extends RDF
+ if i<0: return i
+
+ j = self.property_list(str, i, r[0])
+
+ if j<0: raise BadSyntax(self._thisDoc, self.lines,
+ str, i, "expected propertylist")
+ return j
+
+ def subject(self, str, i, res):
+ return self.item(str, i, res)
+
+ def verb(self, str, i, res):
+ """ has _prop_
+ is _prop_ of
+ a
+ =
+ _prop_
+ >- prop ->
+ <- prop -<
+ _operator_"""
+
+ j = self.skipSpace(str, i)
+ if j<0:return j # eof
+
+ r = []
+
+ j = self.tok('has', str, i)
+ if j>=0:
+ i = self.prop(str, j, r)
+ if i < 0: raise BadSyntax(self._thisDoc, self.lines,
+ str, j, "expected property after 'has'")
+ res.append(('->', r[0]))
+ return i
+
+ j = self.tok('is', str, i)
+ if j>=0:
+ i = self.prop(str, j, r)
+ if i < 0: raise BadSyntax(self._thisDoc, self.lines, str, j,
+ "expected <property> after 'is'")
+ j = self.skipSpace(str, i)
+ if j<0:
+ raise BadSyntax(self._thisDoc, self.lines, str, i,
+ "End of file found, expected property after 'is'")
+ return j # eof
+ i=j
+ j = self.tok('of', str, i)
+ if j<0: raise BadSyntax(self._thisDoc, self.lines, str, i,
+ "expected 'of' after 'is' <prop>")
+ res.append(('<-', r[0]))
+ return j
+
+ j = self.tok('a', str, i)
+ if j>=0:
+ res.append(('->', RDF_type))
+ return j
+
+
+ if str[i:i+2] == "<=":
+ res.append(('<-', self._store.newSymbol(Logic_NS+"implies")))
+ return i+2
+
+ if str[i:i+1] == "=":
+ if str[i+1:i+2] == ">":
+ res.append(('->', self._store.newSymbol(Logic_NS+"implies")))
+ return i+2
+ res.append(('->', DAML_sameAs))
+ return i+1
+
+ if str[i:i+2] == ":=":
+ # patch file relates two formulae, uses this @@ really?
+ res.append(('->', Logic_NS+"becomes"))
+ return i+2
+
+ j = self.prop(str, i, r)
+ if j >= 0:
+ res.append(('->', r[0]))
+ return j
+
+ if str[i:i+2] == ">-" or str[i:i+2] == "<-":
+ raise BadSyntax(self._thisDoc, self.lines, str, j,
+ ">- ... -> syntax is obsolete.")
+
+ return -1
+
+ def prop(self, str, i, res):
+ return self.item(str, i, res)
+
+ def item(self, str, i, res):
+ return self.path(str, i, res)
+
+ def blankNode(self, uri=None):
+ if "B" not in self._flags:
+ return self._context.newBlankNode(uri, why=self._reason2)
+ x = self._context.newSymbol(uri)
+ self._context.declareExistential(x)
+ return x
+
+ def path(self, str, i, res):
+ """Parse the path production.
+ """
+ j = self.nodeOrLiteral(str, i, res)
+ if j<0: return j # nope
+
+ while str[j:j+1] in "!^.": # no spaces, must follow exactly (?)
+ ch = str[j:j+1] # @@ Allow "." followed IMMEDIATELY by a node.
+ if ch == ".":
+ ahead = str[j+1:j+2]
+ if not ahead or (ahead in _notNameChars
+ and ahead not in ":?<[{("): break
+ subj = res.pop()
+ obj = self.blankNode(uri=self.here(j))
+ j = self.node(str, j+1, res)
+ if j<0: raise BadSyntax(self._thisDoc, self.lines, str, j,
+ "EOF found in middle of path syntax")
+ pred = res.pop()
+ if ch == "^": # Reverse traverse
+ self.makeStatement((self._context, pred, obj, subj))
+ else:
+ self.makeStatement((self._context, pred, subj, obj))
+ res.append(obj)
+ return j
+
+ def anonymousNode(self, ln):
+ """Remember or generate a term for one of these _: anonymous nodes"""
+ term = self._anonymousNodes.get(ln, None)
+ if term != None: return term
+ term = self._store.newBlankNode(self._context, why=self._reason2)
+ self._anonymousNodes[ln] = term
+ return term
+
+ def node(self, str, i, res, subjectAlready=None):
+ """Parse the <node> production.
+ Space is now skipped once at the beginning
+ instead of in multipe calls to self.skipSpace().
+ """
+ subj = subjectAlready
+
+ j = self.skipSpace(str,i)
+ if j<0: return j #eof
+ i=j
+ ch = str[i:i+1] # Quick 1-character checks first:
+
+ if ch == "[":
+ bnodeID = self.here(i)
+ j=self.skipSpace(str,i+1)
+ if j<0: raise BadSyntax(self._thisDoc,
+ self.lines, str, i, "EOF after '['")
+ if str[j:j+1] == "=": # Hack for "is" binding name to anon node
+ i = j+1
+ objs = []
+ j = self.objectList(str, i, objs);
+ if j>=0:
+ subj = objs[0]
+ if len(objs)>1:
+ for obj in objs:
+ self.makeStatement((self._context,
+ DAML_sameAs, subj, obj))
+ j = self.skipSpace(str, j)
+ if j<0: raise BadSyntax(self._thisDoc, self.lines, str, i,
+ "EOF when objectList expected after [ = ")
+ if str[j:j+1] == ";":
+ j=j+1
+ else:
+ raise BadSyntax(self._thisDoc, self.lines, str, i,
+ "objectList expected after [= ")
+
+ if subj is None:
+ subj=self.blankNode(uri= bnodeID)
+
+ i = self.property_list(str, j, subj)
+ if i<0: raise BadSyntax(self._thisDoc, self.lines, str, j,
+ "property_list expected")
+
+ j = self.skipSpace(str, i)
+ if j<0: raise BadSyntax(self._thisDoc, self.lines, str, i,
+ "EOF when ']' expected after [ <propertyList>")
+ if str[j:j+1] != "]":
+ raise BadSyntax(self._thisDoc,
+ self.lines, str, j, "']' expected")
+ res.append(subj)
+ return j+1
+
+ if ch == "{":
+ ch2 = str[i+1:i+2]
+ if ch2 == '$':
+ i += 1
+ j = i + 1
+ List = []
+ first_run = True
+ while 1:
+ i = self.skipSpace(str, j)
+ if i<0: raise BadSyntax(self._thisDoc, self.lines, str, i,
+ "needed '$}', found end.")
+ if str[i:i+2] == '$}':
+ j = i+2
+ break
+
+ if not first_run:
+ if str[i:i+1] == ',':
+ i+=1
+ else:
+ raise BadSyntax(self._thisDoc, self.lines,
+ str, i, "expected: ','")
+ else: first_run = False
+
+ item = []
+ j = self.item(str,i, item) #@@@@@ should be path, was object
+ if j<0: raise BadSyntax(self._thisDoc, self.lines, str, i,
+ "expected item in set or '$}'")
+ List.append(self._store.intern(item[0]))
+ res.append(self._store.newSet(List, self._context))
+ return j
+ else:
+ j=i+1
+ oldParentContext = self._parentContext
+ self._parentContext = self._context
+ parentAnonymousNodes = self._anonymousNodes
+ grandParentVariables = self._parentVariables
+ self._parentVariables = self._variables
+ self._anonymousNodes = {}
+ self._variables = self._variables.copy()
+ reason2 = self._reason2
+ self._reason2 = becauseSubexpression
+ if subj is None: subj = self._store.newFormula()
+ self._context = subj
+
+ while 1:
+ i = self.skipSpace(str, j)
+ if i<0: raise BadSyntax(self._thisDoc, self.lines,
+ str, i, "needed '}', found end.")
+
+ if str[i:i+1] == "}":
+ j = i+1
+ break
+
+ j = self.directiveOrStatement(str,i)
+ if j<0: raise BadSyntax(self._thisDoc, self.lines,
+ str, i, "expected statement or '}'")
+
+ self._anonymousNodes = parentAnonymousNodes
+ self._variables = self._parentVariables
+ self._parentVariables = grandParentVariables
+ self._context = self._parentContext
+ self._reason2 = reason2
+ self._parentContext = oldParentContext
+ res.append(subj.close()) # No use until closed
+ return j
+
+ if ch == "(":
+ thing_type = self._store.newList
+ ch2 = str[i+1:i+2]
+ if ch2 == '$':
+ thing_type = self._store.newSet
+ i += 1
+ j=i+1
+
+ List = []
+ while 1:
+ i = self.skipSpace(str, j)
+ if i<0: raise BadSyntax(self._thisDoc, self.lines,
+ str, i, "needed ')', found end.")
+ if str[i:i+1] == ')':
+ j = i+1
+ break
+
+ item = []
+ j = self.item(str,i, item) #@@@@@ should be path, was object
+ if j<0: raise BadSyntax(self._thisDoc, self.lines, str, i,
+ "expected item in list or ')'")
+ List.append(self._store.intern(item[0]))
+ res.append(thing_type(List, self._context))
+ return j
+
+ j = self.tok('this', str, i) # This context
+ if j>=0:
+ raise BadSyntax(self._thisDoc, self.lines, str, i,
+ "Keyword 'this' was ancient N3. Now use @forSome and @forAll keywords.")
+ res.append(self._context)
+ return j
+
+ #booleans
+ j = self.tok('true', str, i)
+ if j>=0:
+ res.append(True)
+ return j
+ j = self.tok('false', str, i)
+ if j>=0:
+ res.append(False)
+ return j
+
+ if subj is None: # If this can be a named node, then check for a name.
+ j = self.uri_ref2(str, i, res)
+ if j >= 0:
+ return j
+
+ return -1
+
+ def property_list(self, str, i, subj):
+ """Parse property list
+ Leaves the terminating punctuation in the buffer
+ """
+ while 1:
+ j = self.skipSpace(str, i)
+ if j<0:
+ raise BadSyntax(self._thisDoc, self.lines, str, i,
+ "EOF found when expected verb in property list")
+ return j #eof
+
+ if str[j:j+2] ==":-":
+ i = j + 2
+ res = []
+ j = self.node(str, i, res, subj)
+ if j<0: raise BadSyntax(self._thisDoc, self.lines, str, i,
+ "bad {} or () or [] node after :- ")
+ i=j
+ continue
+ i=j
+ v = []
+ j = self.verb(str, i, v)
+ if j<=0:
+ return i # void but valid
+
+ objs = []
+ i = self.objectList(str, j, objs)
+ if i<0: raise BadSyntax(self._thisDoc, self.lines, str, j,
+ "objectList expected")
+ for obj in objs:
+ dir, sym = v[0]
+ if dir == '->':
+ self.makeStatement((self._context, sym, subj, obj))
+ else:
+ self.makeStatement((self._context, sym, obj, subj))
+
+ j = self.skipSpace(str, i)
+ if j<0:
+ raise BadSyntax(self._thisDoc, self.lines, str, j,
+ "EOF found in list of objects")
+ return j #eof
+ if str[i:i+1] != ";":
+ return i
+ i = i+1 # skip semicolon and continue
+
+ def commaSeparatedList(self, str, j, res, what):
+ """return value: -1 bad syntax; >1 new position in str
+ res has things found appended
+ """
+ i = self.skipSpace(str, j)
+ if i<0:
+ raise BadSyntax(self._thisDoc, self.lines, str, i,
+ "EOF found expecting comma sep list")
+ return i
+ if str[i] == ".": return j # empty list is OK
+ i = what(str, i, res)
+ if i<0: return -1
+
+ while 1:
+ j = self.skipSpace(str, i)
+ if j<0: return j # eof
+ ch = str[j:j+1]
+ if ch != ",":
+ if ch != ".":
+ return -1
+ return j # Found but not swallowed "."
+ i = what(str, j+1, res)
+ if i<0:
+ raise BadSyntax(self._thisDoc, self.lines, str, i,
+ "bad list content")
+ return i
+
+ def objectList(self, str, i, res):
+ i = self.object(str, i, res)
+ if i<0: return -1
+ while 1:
+ j = self.skipSpace(str, i)
+ if j<0:
+ raise BadSyntax(self._thisDoc, self.lines, str, j,
+ "EOF found after object")
+ return j #eof
+ if str[j:j+1] != ",":
+ return j # Found something else!
+ i = self.object(str, j+1, res)
+ if i<0: return i
+
+ def checkDot(self, str, i):
+ j = self.skipSpace(str, i)
+ if j<0: return j #eof
+ if str[j:j+1] == ".":
+ return j+1 # skip
+ if str[j:j+1] == "}":
+ return j # don't skip it
+ if str[j:j+1] == "]":
+ return j
+ raise BadSyntax(self._thisDoc, self.lines,
+ str, j, "expected '.' or '}' or ']' at end of statement")
+ return i
+
+
+ def uri_ref2(self, str, i, res):
+ """Generate uri from n3 representation.
+
+ Note that the RDF convention of directly concatenating
+ NS and local name is now used though I prefer inserting a '#'
+ to make the namesapces look more like what XML folks expect.
+ """
+ qn = []
+ j = self.qname(str, i, qn)
+ if j>=0:
+ pfx, ln = qn[0]
+ if pfx is None:
+ assert 0, "not used?"
+ ns = self._baseURI + ADDED_HASH
+ else:
+ try:
+ ns = self._bindings[pfx]
+ except KeyError:
+ if pfx == "_": # Magic prefix 2001/05/30, can be overridden
+ res.append(self.anonymousNode(ln))
+ return j
+ raise BadSyntax(self._thisDoc, self.lines, str, i,
+ "Prefix \"%s:\" not bound" % (pfx))
+ symb = self._store.newSymbol(ns + ln)
+ if symb in self._variables:
+ res.append(self._variables[symb])
+ else:
+ res.append(symb) # @@@ "#" CONVENTION
+ if not ns.find("#"):progress(
+ "Warning: no # on namespace %s," % ns)
+ return j
+
+
+ i = self.skipSpace(str, i)
+ if i<0: return -1
+
+ if str[i] == "?":
+ v = []
+ j = self.variable(str,i,v)
+ if j>0: #Forget varibles as a class, only in context.
+ res.append(v[0])
+ return j
+ return -1
+
+ elif str[i]=="<":
+ i = i + 1
+ st = i
+ while i < len(str):
+ if str[i] == ">":
+ uref = str[st:i] # the join should dealt with "":
+ if self._baseURI:
+ uref = join(self._baseURI, uref) # was: uripath.join
+ else:
+ assert ":" in uref, \
+ "With no base URI, cannot deal with relative URIs"
+ if str[i-1:i]=="#" and not uref[-1:]=="#":
+ uref = uref + "#" # She meant it! Weirdness in urlparse?
+ symb = self._store.newSymbol(uref)
+ if symb in self._variables:
+ res.append(self._variables[symb])
+ else:
+ res.append(symb)
+ return i+1
+ i = i + 1
+ raise BadSyntax(self._thisDoc, self.lines, str, j,
+ "unterminated URI reference")
+
+ elif self.keywordsSet:
+ v = []
+ j = self.bareWord(str,i,v)
+ if j<0: return -1 #Forget varibles as a class, only in context.
+ if v[0] in self.keywords:
+ raise BadSyntax(self._thisDoc, self.lines, str, i,
+ 'Keyword "%s" not allowed here.' % v[0])
+ res.append(self._store.newSymbol(self._bindings[""]+v[0]))
+ return j
+ else:
+ return -1
+
+ def skipSpace(self, str, i):
+ """Skip white space, newlines and comments.
+ return -1 if EOF, else position of first non-ws character"""
+ while 1:
+ m = eol.match(str, i)
+ if m == None: break
+ self.lines = self.lines + 1
+ i = m.end() # Point to first character unmatched
+ self.startOfLine = i
+ m = ws.match(str, i)
+ if m != None:
+ i = m.end()
+ m = eof.match(str, i)
+ if m != None: return -1
+ return i
+
+ def variable(self, str, i, res):
+ """ ?abc -> variable(:abc)
+ """
+
+ j = self.skipSpace(str, i)
+ if j<0: return -1
+
+ if str[j:j+1] != "?": return -1
+ j=j+1
+ i = j
+ if str[j] in "0123456789-":
+ raise BadSyntax(self._thisDoc, self.lines, str, j,
+ "Varible name can't start with '%s'" % str[j])
+ return -1
+ while i <len(str) and str[i] not in _notNameChars:
+ i = i+1
+ if self._parentContext == None:
+ varURI = self._store.newSymbol(self._baseURI + "#" +str[j:i])
+ if varURI not in self._variables:
+ self._variables[varURI] = self._context.newUniversal(varURI
+ , why=self._reason2)
+ res.append(self._variables[varURI])
+ return i
+ # @@ was:
+ # raise BadSyntax(self._thisDoc, self.lines, str, j,
+ # "Can't use ?xxx syntax for variable in outermost level: %s"
+ # % str[j-1:i])
+ varURI = self._store.newSymbol(self._baseURI + "#" +str[j:i])
+ if varURI not in self._parentVariables:
+ self._parentVariables[varURI] = self._parentContext.newUniversal(varURI
+ , why=self._reason2)
+ res.append(self._parentVariables[varURI])
+ return i
+
+ def bareWord(self, str, i, res):
+ """ abc -> :abc
+ """
+ j = self.skipSpace(str, i)
+ if j<0: return -1
+
+ if str[j] in "0123456789-" or str[j] in _notNameChars: return -1
+ i = j
+ while i <len(str) and str[i] not in _notNameChars:
+ i = i+1
+ res.append(str[j:i])
+ return i
+
+ def qname(self, str, i, res):
+ """
+ xyz:def -> ('xyz', 'def')
+ If not in keywords and keywordsSet: def -> ('', 'def')
+ :def -> ('', 'def')
+ """
+
+ i = self.skipSpace(str, i)
+ if i<0: return -1
+
+ c = str[i]
+ if c in "0123456789-+": return -1
+ if c not in _notNameChars:
+ ln = c
+ i = i + 1
+ while i < len(str):
+ c = str[i]
+ if c not in _notNameChars:
+ ln = ln + c
+ i = i + 1
+ else: break
+ else: # First character is non-alpha
+ ln = '' # Was: None - TBL (why? useful?)
+
+ if i<len(str) and str[i] == ':':
+ pfx = ln
+ i = i + 1
+ ln = ''
+ while i < len(str):
+ c = str[i]
+ if c not in _notNameChars:
+ ln = ln + c
+ i = i + 1
+ else: break
+
+ res.append((pfx, ln))
+ return i
+
+ else: # delimiter was not ":"
+ if ln and self.keywordsSet and ln not in self.keywords:
+ res.append(('', ln))
+ return i
+ return -1
+
+ def object(self, str, i, res):
+ j = self.subject(str, i, res)
+ if j>= 0:
+ return j
+ else:
+ j = self.skipSpace(str, i)
+ if j<0: return -1
+ else: i=j
+
+ if str[i]=='"':
+ if str[i:i+3] == '"""': delim = '"""'
+ else: delim = '"'
+ i = i + len(delim)
+
+ j, s = self.strconst(str, i, delim)
+
+ res.append(self._store.newLiteral(s))
+ progress("New string const ", s, j)
+ return j
+ else:
+ return -1
+
+ def nodeOrLiteral(self, str, i, res):
+ j = self.node(str, i, res)
+ startline = self.lines # Remember where for error messages
+ if j>= 0:
+ return j
+ else:
+ j = self.skipSpace(str, i)
+ if j<0: return -1
+ else: i=j
+
+ ch = str[i]
+ if ch in "-+0987654321":
+ m = number_syntax.match(str, i)
+ if m == None:
+ raise BadSyntax(self._thisDoc, self.lines, str, i,
+ "Bad number syntax")
+ j = m.end()
+ if m.group('exponent') != None: # includes decimal exponent
+ res.append(float(str[i:j]))
+# res.append(self._store.newLiteral(str[i:j],
+# self._store.newSymbol(FLOAT_DATATYPE)))
+ elif m.group('decimal') != None:
+ res.append(Decimal(str[i:j]))
+ else:
+ res.append(long(str[i:j]))
+# res.append(self._store.newLiteral(str[i:j],
+# self._store.newSymbol(INTEGER_DATATYPE)))
+ return j
+
+ if str[i]=='"':
+ if str[i:i+3] == '"""': delim = '"""'
+ else: delim = '"'
+ i = i + len(delim)
+
+ dt = None
+ j, s = self.strconst(str, i, delim)
+ lang = None
+ if str[j:j+1] == "@": # Language?
+ m = langcode.match(str, j+1)
+ if m == None:
+ raise BadSyntax(self._thisDoc, startline, str, i,
+ "Bad language code syntax on string literal, after @")
+ i = m.end()
+ lang = str[j+1:i]
+ j = i
+ if str[j:j+2] == "^^":
+ res2 = []
+ j = self.uri_ref2(str, j+2, res2) # Read datatype URI
+ dt = res2[0]
+# if dt.uriref() == "http://www.w3.org/1999/02/22-rdf-syntax-ns#XMLLiteral":
+ if dt == "http://www.w3.org/1999/02/22-rdf-syntax-ns#XMLLiteral":
+ try:
+ dom = XMLtoDOM('<rdf:envelope xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns">'
+ + s
+ + '</rdf:envelope>').firstChild
+ except:
+ raise ValueError('s="%s"' % s)
+ res.append(self._store.newXMLLiteral(dom))
+ return j
+ res.append(self._store.newLiteral(s, dt, lang))
+ return j
+ else:
+ return -1
+
+ def uriOf(self, sym):
+ if isinstance(sym, types.TupleType):
+ return sym[1] # old system for --pipe
+ # return sym.uriref() # cwm api
+ return sym
+
+
+ def strconst(self, str, i, delim):
+ """parse an N3 string constant delimited by delim.
+ return index, val
+ """
+
+
+ j = i
+ ustr = u"" # Empty unicode string
+ startline = self.lines # Remember where for error messages
+ while j<len(str):
+ if str[j] == '"':
+ if delim == '"': # done when delim is "
+ i = j + 1
+ return i, ustr
+ if delim == '"""': # done when delim is """ and ...
+ if str[j:j+5] == '"""""': # ... we have "" before
+ i = j + 5
+ ustr = ustr + '""'
+ return i, ustr
+ if str[j:j+4] == '""""': # ... we have " before
+ i = j + 4
+ ustr = ustr + '"'
+ return i, ustr
+ if str[j:j+3] == '"""': # ... current " is part of delim
+ i = j + 3
+ return i, ustr
+
+ # we are inside of the string and current char is "
+ j = j + 1
+ ustr = ustr + '"'
+ continue
+
+ m = interesting.search(str, j) # was str[j:].
+ # Note for pos param to work, MUST be compiled ... re bug?
+ assert m , "Quote expected in string at ^ in %s^%s" %(
+ str[j-20:j], str[j:j+20]) # we at least have to find a quote
+
+ i = m.start()
+ try:
+ ustr = ustr + str[j:i]
+ except UnicodeError:
+ err = ""
+ for c in str[j:i]:
+ err = err + (" %02x" % ord(c))
+ streason = sys.exc_info()[1].__str__()
+ raise BadSyntax(self._thisDoc, startline, str, j,
+ "Unicode error appending characters %s to string, because\n\t%s"
+ % (err, streason))
+
+# print "@@@ i = ",i, " j=",j, "m.end=", m.end()
+
+ ch = str[i]
+ if ch == '"':
+ j = i
+ continue
+ elif ch == "\r": # Strip carriage returns
+ j = i+1
+ continue
+ elif ch == "\n":
+ if delim == '"':
+ raise BadSyntax(self._thisDoc, startline, str, i,
+ "newline found in string literal")
+ self.lines = self.lines + 1
+ ustr = ustr + ch
+ j = i + 1
+ self.startOfLine = j
+
+ elif ch == "\\":
+ j = i + 1
+ ch = str[j:j+1] # Will be empty if string ends
+ if not ch:
+ raise BadSyntax(self._thisDoc, startline, str, i,
+ "unterminated string literal (2)")
+ k = 'abfrtvn\\"'.find(ch)
+ if k >= 0:
+ uch = '\a\b\f\r\t\v\n\\"'[k]
+ ustr = ustr + uch
+ j = j + 1
+ elif ch == "u":
+ j, ch = self.uEscape(str, j+1, startline)
+ ustr = ustr + ch
+ elif ch == "U":
+ j, ch = self.UEscape(str, j+1, startline)
+ ustr = ustr + ch
+ else:
+ raise BadSyntax(self._thisDoc, self.lines, str, i,
+ "bad escape")
+
+ raise BadSyntax(self._thisDoc, self.lines, str, i,
+ "unterminated string literal")
+
+
+ def uEscape(self, str, i, startline):
+ j = i
+ count = 0
+ value = 0
+ while count < 4: # Get 4 more characters
+ ch = str[j:j+1].lower()
+ # sbp http://ilrt.org/discovery/chatlogs/rdfig/2002-07-05
+ j = j + 1
+ if ch == "":
+ raise BadSyntax(self._thisDoc, startline, str, i,
+ "unterminated string literal(3)")
+ k = "0123456789abcdef".find(ch)
+ if k < 0:
+ raise BadSyntax(self._thisDoc, startline, str, i,
+ "bad string literal hex escape")
+ value = value * 16 + k
+ count = count + 1
+ uch = unichr(value)
+ return j, uch
+
+ def UEscape(self, str, i, startline):
+ stringType = type('')
+ j = i
+ count = 0
+ value = '\\U'
+ while count < 8: # Get 8 more characters
+ ch = str[j:j+1].lower()
+ # sbp http://ilrt.org/discovery/chatlogs/rdfig/2002-07-05
+ j = j + 1
+ if ch == "":
+ raise BadSyntax(self._thisDoc, startline, str, i,
+ "unterminated string literal(3)")
+ k = "0123456789abcdef".find(ch)
+ if k < 0:
+ raise BadSyntax(self._thisDoc, startline, str, i,
+ "bad string literal hex escape")
+ value = value + ch
+ count = count + 1
+
+ uch = stringType(value).decode('unicode-escape')
+ return j, uch
+
+wide_build = True
+try:
+ unichr(0x10000)
+except ValueError:
+ wide_build = False
+
+# If we are going to do operators then they should generate
+# [ is operator:plus of ( \1 \2 ) ]
+
+
+class BadSyntax(SyntaxError):
+ def __init__(self, uri, lines, str, i, why):
+ self._str = str.encode('utf-8') # Better go back to strings for errors
+ self._i = i
+ self._why = why
+ self.lines = lines
+ self._uri = uri
+
+ def __str__(self):
+ str = self._str
+ i = self._i
+ st = 0
+ if i>60:
+ pre="..."
+ st = i - 60
+ else: pre=""
+ if len(str)-i > 60: post="..."
+ else: post=""
+
+ return 'at line %i of <%s>:\nBad syntax (%s) at ^ in:\n"%s%s^%s%s"' \
+ % (self.lines +1, self._uri, self._why, pre,
+ str[st:i], str[i:i+60], post)
+
+
+
+def stripCR(str):
+ res = ""
+ for ch in str:
+ if ch != "\r":
+ res = res + ch
+ return res
+
+def dummyWrite(x):
+ pass
+
+################################################################################
+
+
+def toBool(s):
+ if s == 'true' or s == 'True' or s == '1':
+ return True
+ if s == 'false' or s == 'False' or s == '0':
+ return False
+ raise ValueError(s)
+
+
+
+
+
+class Formula(object):
+ number = 0
+
+ def __init__(self, parent):
+ self.counter = 0
+ Formula.number += 1
+ self.number = Formula.number
+ self.existentials = {}
+ self.universals = {}
+
+ self.quotedgraph=QuotedGraph(store=parent.store, identifier=self.id())
+
+ def __str__(self):
+ return '_:Formula%s' % self.number
+
+ def id(self):
+ return BNode('_:Formula%s' % self.number)
+
+ def newBlankNode(self, uri=None, why=None):
+ if uri is None:
+ self.counter += 1
+ b = BNode('f%sb%s' % (id(self), self.counter))
+ else: b = BNode(uri.split('#').pop().replace('_', 'b'))
+ return b
+
+ def newUniversal(self, uri, why=None):
+ return Variable(uri.split('#').pop())
+
+ def declareExistential(self, x):
+ self.existentials[x] = self.newBlankNode()
+
+ def close(self):
+
+ return self.quotedgraph
+
+r_hibyte = re.compile(r'([\x80-\xff])')
+def iri(uri):
+ return uri.decode('utf-8')
+ # return unicode(r_hibyte.sub(lambda m: '%%%02X' % ord(m.group(1)), uri))
+
+class RDFSink(object):
+ def __init__(self, graph):
+ self.rootFormula = None
+ self.counter = 0
+ self.graph=graph
+
+
+ def newFormula(self):
+ assert self.graph.store.formula_aware
+ f = Formula(self.graph)
+ return f
+
+ def newSymbol(self, *args):
+ uri = args[0].encode('utf-8')
+ return URIRef(iri(uri))
+
+ def newBlankNode(self, arg=None, **kargs):
+ if isinstance(arg, Formula):
+ return arg.newBlankNode()
+ elif arg is None:
+ self.counter += 1
+ b = BNode('n' + str(self.counter))
+ else: b = BNode(str(arg[0]).split('#').pop().replace('_', 'b'))
+ return b
+
+ def newLiteral(self, s, dt, lang):
+ if dt: return Literal(s, datatype=dt)
+ else: return Literal(s, lang=lang)
+
+ def newList(self, n, f):
+ if not n:
+ return self.newSymbol(
+ 'http://www.w3.org/1999/02/22-rdf-syntax-ns#nil'
+ )
+
+ a = self.newBlankNode(f)
+ first = self.newSymbol(
+ 'http://www.w3.org/1999/02/22-rdf-syntax-ns#first'
+ )
+ rest = self.newSymbol('http://www.w3.org/1999/02/22-rdf-syntax-ns#rest')
+ self.makeStatement((f, first, a, n[0]))
+ self.makeStatement((f, rest, a, self.newList(n[1:], f)))
+ return a
+
+ def newSet(self, *args):
+ return set(args)
+
+ def setDefaultNamespace(self, *args):
+ return ':'.join(repr(n) for n in args)
+
+ def makeStatement(self, quadruple, why=None):
+ f, p, s, o = quadruple
+
+ if hasattr(p, 'formula'):
+ raise Exception("Formula used as predicate")
+
+ s = self.normalise(f, s)
+ p = self.normalise(f, p)
+ o = self.normalise(f, o)
+
+
+ if f == self.rootFormula:
+ # print s, p, o, '.'
+ self.graph.add((s, p, o))
+ else:
+ f.quotedgraph.add((s,p,o))
+
+
+ #return str(quadruple)
+
+ def normalise(self, f, n):
+ if isinstance(n, tuple):
+ return URIRef(unicode(n[1]))
+
+ # if isinstance(n, list):
+ # rdflist, f = n
+ # name = self.newBlankNode()
+ # if f == self.rootFormula:
+ # sublist = name
+ # for i in xrange(0, len(rdflist) - 1):
+ # print sublist, 'first', rdflist[i]
+ # rest = self.newBlankNode()
+ # print sublist, 'rest', rest
+ # sublist = rest
+ # print sublist, 'first', rdflist[-1]
+ # print sublist, 'rest', 'nil'
+ # return name
+
+ if isinstance(n, bool):
+ s = Literal(str(n).lower(), datatype=BOOLEAN_DATATYPE)
+ return s
+
+ if isinstance(n, int) or isinstance(n, long):
+ s = Literal(unicode(n), datatype=INTEGER_DATATYPE)
+ return s
+
+ if isinstance(n, Decimal):
+ value = str(n.normalize())
+ if value == '-0':
+ value = '0'
+ s = Literal(value, datatype=DECIMAL_DATATYPE )
+ return s
+
+ if isinstance(n, float):
+ s = Literal(str(n), datatype=DOUBLE_DATATYPE )
+ return s
+
+ if f.existentials.has_key(n):
+ return f.existentials[n]
+
+ # if isinstance(n, Var):
+ # if f.universals.has_key(n):
+ # return f.universals[n]
+ # f.universals[n] = f.newBlankNode()
+ # return f.universals[n]
+
+ return n
+
+ def intern(self, something):
+ return something
+
+ def bind(self, pfx, uri):
+ pass # print pfx, ':', uri
+
+ def startDoc(self, formula):
+ self.rootFormula = formula
+
+ def endDoc(self, formula):
+ pass
+
+
+###################################################
+#
+# Utilities
+#
+
+Escapes = {'a': '\a',
+ 'b': '\b',
+ 'f': '\f',
+ 'r': '\r',
+ 't': '\t',
+ 'v': '\v',
+ 'n': '\n',
+ '\\': '\\',
+ '"': '"'}
+
+forbidden1 = re.compile(ur'[\\\"\a\b\f\r\v\u0080-\U0000ffff]')
+forbidden2 = re.compile(ur'[\\\"\a\b\f\r\v\t\n\u0080-\U0000ffff]')
+#"
+def stringToN3(str, singleLine=0, flags=""):
+ res = ''
+ if (len(str) > 20 and
+ str[-1] <> '"' and
+ not singleLine and
+ (str.find("\n") >=0
+ or str.find('"') >=0)):
+ delim= '"""'
+ forbidden = forbidden1 # (allow tabs too now)
+ else:
+ delim = '"'
+ forbidden = forbidden2
+
+ i = 0
+
+ while i < len(str):
+ m = forbidden.search(str, i)
+ if not m:
+ break
+
+ j = m.start()
+ res = res + str[i:j]
+ ch = m.group(0)
+ if ch == '"' and delim == '"""' and str[j:j+3] != '"""': #"
+ res = res + ch
+ else:
+ k = '\a\b\f\r\t\v\n\\"'.find(ch)
+ if k >= 0: res = res + "\\" + 'abfrtvn\\"'[k]
+ else:
+ if 'e' in flags:
+# res = res + ('\\u%04x' % ord(ch))
+ res = res + ('\\u%04X' % ord(ch))
+ # http://www.w3.org/TR/rdf-testcases/#ntriples
+ else:
+ res = res + ch
+ i = j + 1
+
+ # The following code fixes things for really high range Unicode
+ newstr = ""
+ for ch in res + str[i:]:
+ if ord(ch)>65535:
+ newstr = newstr + ('\\U%08X' % ord(ch))
+ # http://www.w3.org/TR/rdf-testcases/#ntriples
+ else:
+ newstr = newstr + ch
+ #
+
+ return delim + newstr + delim
+
+def backslashUify(ustr):
+ """Use URL encoding to return an ASCII string corresponding
+ to the given unicode"""
+# progress("String is "+`ustr`)
+# s1=ustr.encode('utf-8')
+ s = ""
+ for ch in ustr: # .encode('utf-8'):
+ if ord(ch) > 65535:
+ ch = "\\U%08X" % ord(ch)
+ elif ord(ch) > 126:
+ ch = "\\u%04X" % ord(ch)
+ else:
+ ch = "%c" % ord(ch)
+ s = s + ch
+ return b(s)
+
+@py3compat.format_doctest_out
+def hexify(ustr):
+ """Use URL encoding to return an ASCII string
+ corresponding to the given UTF8 string
+
+ >>> hexify("http://example/a b")
+ %(b)s'http://example/a%%20b'
+
+ """ #"
+# progress("String is "+`ustr`)
+# s1=ustr.encode('utf-8')
+ s = ""
+ for ch in ustr: # .encode('utf-8'):
+ if ord(ch) > 126 or ord(ch) < 33 :
+ ch = "%%%02X" % ord(ch)
+ else:
+ ch = "%c" % ord(ch)
+ s = s + ch
+ return b(s)
+
+def dummy():
+ res = ""
+ if len(str) > 20 and (str.find("\n") >=0
+ or str.find('"') >=0):
+ delim= '"""'
+ forbidden = "\\\"\a\b\f\r\v" # (allow tabs too now)
+ else:
+ delim = '"'
+ forbidden = "\\\"\a\b\f\r\v\t\n"
+ for i in range(len(str)):
+ ch = str[i]
+ j = forbidden.find(ch)
+ if ch == '"' and delim == '"""' \
+ and i+1 < len(str) and str[i+1] != '"':
+ j=-1 # Single quotes don't need escaping in long format
+ if j>=0: ch = "\\" + '\\"abfrvtn'[j]
+ elif ch not in "\n\t" and (ch < " " or ch > "}"):
+ ch = "[[" + `ch` + "]]" #[2:-1] # Use python
+ res = res + ch
+ return delim + res + delim
+
+
+class N3Parser(Parser):
+
+ def __init__(self):
+ pass
+
+ def parse(self, source, graph, encoding="utf-8"):
+ # we're currently being handed a Graph, not a ConjunctiveGraph
+ assert graph.store.context_aware # is this implied by formula_aware
+ assert graph.store.formula_aware
+
+ if encoding not in [None, "utf-8"]:
+ raise Exception("N3 files are always utf-8 encoded, I was passed: %s"%encoding)
+
+ conj_graph = ConjunctiveGraph(store=graph.store)
+ conj_graph.default_context = graph # TODO: CG __init__ should have a default_context arg
+ # TODO: update N3Processor so that it can use conj_graph as the sink
+ conj_graph.namespace_manager = graph.namespace_manager
+ sink = RDFSink(conj_graph)
+
+ baseURI = graph.absolutize(source.getPublicId() or source.getSystemId() or "")
+ p = SinkParser(sink, baseURI=baseURI)
+
+ p.loadStream(source.getByteStream())
+
+ for prefix, namespace in p._bindings.items():
+ conj_graph.bind(prefix, namespace)
+
+
+
+
+def _test():
+ import doctest
+ doctest.testmod()
+
+
+# if __name__ == '__main__':
+# _test()
+
+def main():
+ g=ConjunctiveGraph()
+
+ sink = RDFSink(g)
+ base = 'file://' + os.path.join(os.getcwd(), sys.argv[1])
+
+ p = SinkParser(sink, baseURI=base)
+ p._bindings[''] = p._baseURI + '#'
+ p.startDoc()
+
+ f = open(sys.argv[1], 'rb')
+ bytes = f.read()
+ f.close()
+
+ p.feed(bytes)
+ p.endDoc()
+ for t in g.quads((None,None,None)):
+
+ print t
+
+if __name__ == '__main__':
+ main()
+
+#ends
+
diff --git a/creactistore/_templates/lib/rdflib_/plugins/parsers/notation3.py~ b/creactistore/_templates/lib/rdflib_/plugins/parsers/notation3.py~
new file mode 100644
index 0000000..ac48340
--- /dev/null
+++ b/creactistore/_templates/lib/rdflib_/plugins/parsers/notation3.py~
@@ -0,0 +1,2314 @@
+#!/usr/bin/env python
+u"""
+notation3.py - Standalone Notation3 Parser
+Derived from CWM, the Closed World Machine
+
+Authors of the original suite:
+
+* Dan Connolly <@@>
+* Tim Berners-Lee <@@>
+* Yosi Scharf <@@>
+* Joseph M. Reagle Jr. <reagle@w3.org>
+* Rich Salz <rsalz@zolera.com>
+
+http://www.w3.org/2000/10/swap/notation3.py
+
+Copyright 2000-2007, World Wide Web Consortium.
+Copyright 2001, MIT.
+Copyright 2001, Zolera Systems Inc.
+
+License: W3C Software License
+http://www.w3.org/Consortium/Legal/copyright-software
+
+Modified by Sean B. Palmer
+Copyright 2007, Sean B. Palmer. \u32E1
+
+Modified to work with rdflib by Gunnar Aastrand Grimnes
+Copyright 2010, Gunnar A. Grimnes
+
+"""
+
+# Python standard libraries
+import types
+import sys
+import os
+import string
+import re
+import time
+import StringIO
+import codecs
+
+from binascii import a2b_hex
+from decimal import Decimal
+
+from rdflib.term import URIRef, BNode, Literal, Variable, _XSD_PFX, _unique_id
+from rdflib.graph import QuotedGraph, ConjunctiveGraph
+from rdflib import py3compat
+b = py3compat.b
+
+__all__ = ['URISyntaxError', 'BadSyntax', 'N3Parser', "verbosity", "setVerbosity", "progress", "splitFrag", "splitFragP", "join", "refTo", "base", "canonical", "runNamespace", "uniqueURI", "Canonicalize", "stripCR", "dummyWrite", "toBool", "stringToN3", "backslashUify", "hexify", "dummy"]
+
+from rdflib.parser import Parser
+
+# Incestuous.. would be nice to separate N3 and XML
+# from sax2rdf import XMLtoDOM
+def XMLtoDOM(*args, **kargs):
+ # print >> sys.stderr, args, kargs
+ pass
+
+# SWAP http://www.w3.org/2000/10/swap
+# from diag import verbosity, setVerbosity, progress
+def verbosity(*args, **kargs):
+ # print >> sys.stderr, args, kargs
+ pass
+def setVerbosity(*args, **kargs):
+ # print >> sys.stderr, args, kargs
+ pass
+def progress(*args, **kargs):
+ # print >> sys.stderr, args, kargs
+ pass
+
+
+
+def splitFrag(uriref):
+ """split a URI reference between the fragment and the rest.
+
+ Punctuation is thrown away.
+
+ e.g.
+
+ >>> splitFrag("abc#def")
+ ('abc', 'def')
+
+ >>> splitFrag("abcdef")
+ ('abcdef', None)
+
+ """
+
+ i = uriref.rfind("#")
+ if i>= 0: return uriref[:i], uriref[i+1:]
+ else: return uriref, None
+
+def splitFragP(uriref, punct=0):
+ """split a URI reference before the fragment
+
+ Punctuation is kept.
+
+ e.g.
+
+ >>> splitFragP("abc#def")
+ ('abc', '#def')
+
+ >>> splitFragP("abcdef")
+ ('abcdef', '')
+
+ """
+
+ i = uriref.rfind("#")
+ if i>= 0: return uriref[:i], uriref[i:]
+ else: return uriref, ''
+
+@py3compat.format_doctest_out
+def join(here, there):
+ """join an absolute URI and URI reference
+ (non-ascii characters are supported/doctested;
+ haven't checked the details of the IRI spec though)
+
+ here is assumed to be absolute.
+ there is URI reference.
+
+ >>> join('http://example/x/y/z', '../abc')
+ 'http://example/x/abc'
+
+ Raise ValueError if there uses relative path
+ syntax but here has no hierarchical path.
+
+ >>> join('mid:foo@example', '../foo')
+ Traceback (most recent call last):
+ raise ValueError, here
+ ValueError: Base <mid:foo@example> has no slash after colon - with relative '../foo'.
+
+ >>> join('http://example/x/y/z', '')
+ 'http://example/x/y/z'
+
+ >>> join('mid:foo@example', '#foo')
+ 'mid:foo@example#foo'
+
+ We grok IRIs
+
+ >>> len(u'Andr\\xe9')
+ 5
+
+ >>> join('http://example.org/', u'#Andr\\xe9')
+ %(u)s'http://example.org/#Andr\\xe9'
+ """
+
+ assert(here.find("#") < 0), "Base may not contain hash: '%s'"% here # caller must splitFrag (why?)
+
+ slashl = there.find('/')
+ colonl = there.find(':')
+
+ # join(base, 'foo:/') -- absolute
+ if colonl >= 0 and (slashl < 0 or colonl < slashl):
+ return there
+
+ bcolonl = here.find(':')
+ assert(bcolonl >= 0), "Base uri '%s' is not absolute" % here # else it's not absolute
+
+ path, frag = splitFragP(there)
+ if not path: return here + frag
+
+ # join('mid:foo@example', '../foo') bzzt
+ if here[bcolonl+1:bcolonl+2] <> '/':
+ raise ValueError ("Base <%s> has no slash after colon - with relative '%s'." %(here, there))
+
+ if here[bcolonl+1:bcolonl+3] == '//':
+ bpath = here.find('/', bcolonl+3)
+ else:
+ bpath = bcolonl+1
+
+ # join('http://xyz', 'foo')
+ if bpath < 0:
+ bpath = len(here)
+ here = here + '/'
+
+ # join('http://xyz/', '//abc') => 'http://abc'
+ if there[:2] == '//':
+ return here[:bcolonl+1] + there
+
+ # join('http://xyz/', '/abc') => 'http://xyz/abc'
+ if there[:1] == '/':
+ return here[:bpath] + there
+
+ slashr = here.rfind('/')
+
+ while 1:
+ if path[:2] == './':
+ path = path[2:]
+ if path == '.':
+ path = ''
+ elif path[:3] == '../' or path == '..':
+ path = path[3:]
+ i = here.rfind('/', bpath, slashr)
+ if i >= 0:
+ here = here[:i+1]
+ slashr = i
+ else:
+ break
+
+ return here[:slashr+1] + path + frag
+
+commonHost = re.compile(r'^[-_a-zA-Z0-9.]+:(//[^/]*)?/[^/]*$')
+
+def refTo(base, uri):
+ """figure out a relative URI reference from base to uri
+
+ >>> refTo('http://example/x/y/z', 'http://example/x/abc')
+ '../abc'
+
+ >>> refTo('file:/ex/x/y', 'file:/ex/x/q/r#s')
+ 'q/r#s'
+
+ >>> refTo(None, 'http://ex/x/y')
+ 'http://ex/x/y'
+
+ >>> refTo('http://ex/x/y', 'http://ex/x/y')
+ ''
+
+ Note the relationship between refTo and join:
+ join(x, refTo(x, y)) == y
+ which points out certain strings which cannot be URIs. e.g.
+ >>> x='http://ex/x/y';y='http://ex/x/q:r';join(x, refTo(x, y)) == y
+ 0
+
+ So 'http://ex/x/q:r' is not a URI. Use 'http://ex/x/q%3ar' instead:
+ >>> x='http://ex/x/y';y='http://ex/x/q%3ar';join(x, refTo(x, y)) == y
+ 1
+
+ This one checks that it uses a root-realtive one where that is
+ all they share. Now uses root-relative where no path is shared.
+ This is a matter of taste but tends to give more resilience IMHO
+ -- and shorter paths
+
+ Note that base may be None, meaning no base. In some situations, there
+ just ain't a base. Slife. In these cases, relTo returns the absolute value.
+ The axiom abs(,rel(b,x))=x still holds.
+ This saves people having to set the base to "bogus:".
+
+ >>> refTo('http://ex/x/y/z', 'http://ex/r')
+ '/r'
+
+ """
+
+# assert base # don't mask bugs -danc # not a bug. -tim
+ if not base: return uri
+ if base == uri: return ""
+
+ # Find how many path segments in common
+ i=0
+ while i<len(uri) and i<len(base):
+ if uri[i] == base[i]: i = i + 1
+ else: break
+ # print "# relative", base, uri, " same up to ", i
+ # i point to end of shortest one or first difference
+
+ m = commonHost.match(base[:i])
+ if m:
+ k=uri.find("//")
+ if k<0: k=-2 # no host
+ l=uri.find("/", k+2)
+ if uri[l+1:l+2] != "/" and base[l+1:l+2] != "/" and uri[:l]==base[:l]:
+ return uri[l:]
+
+ if uri[i:i+1] =="#" and len(base) == i: return uri[i:] # fragment of base
+
+ while i>0 and uri[i-1] != '/' : i=i-1 # scan for slash
+
+ if i < 3: return uri # No way.
+ if base.find("//", i-2)>0 \
+ or uri.find("//", i-2)>0: return uri # An unshared "//"
+ if base.find(":", i)>0: return uri # An unshared ":"
+ n = base.count("/", i)
+ if n == 0 and i<len(uri) and uri[i] == '#':
+ return "./" + uri[i:]
+ elif n == 0 and i == len(uri):
+ return "./"
+ else:
+ return ("../" * n) + uri[i:]
+
+
+def base():
+ """The base URI for this process - the Web equiv of cwd
+
+ Relative or abolute unix-standard filenames parsed relative to
+ this yeild the URI of the file.
+ If we had a reliable way of getting a computer name,
+ we should put it in the hostname just to prevent ambiguity
+
+ """
+# return "file://" + hostname + os.getcwd() + "/"
+ return "file://" + _fixslash(os.getcwd()) + "/"
+
+
+def _fixslash(str):
+ """ Fix windowslike filename to unixlike - (#ifdef WINDOWS)"""
+ s = str
+ for i in range(len(s)):
+ if s[i] == "\\": s = s[:i] + "/" + s[i+1:]
+ if s[0] != "/" and s[1] == ":": s = s[2:] # @@@ Hack when drive letter present
+ return s
+
+URI_unreserved = b("ABCDEFGHIJJLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-._~")
+ # unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~"
+
+@py3compat.format_doctest_out
+def canonical(str_in):
+ """Convert equivalent URIs (or parts) to the same string
+
+ There are many differenet levels of URI canonicalization
+ which are possible. See http://www.ietf.org/rfc/rfc3986.txt
+ Done:
+ - Converfting unicode IRI to utf-8
+ - Escaping all non-ASCII
+ - De-escaping, if escaped, ALPHA (%%41-%%5A and %%61-%%7A), DIGIT (%%30-%%39),
+ hyphen (%%2D), period (%%2E), underscore (%%5F), or tilde (%%7E) (Sect 2.4)
+ - Making all escapes uppercase hexadecimal
+
+ Not done:
+ - Making URI scheme lowercase
+ - changing /./ or /foo/../ to / with care not to change host part
+
+
+ >>> canonical("foo bar")
+ %(b)s'foo%%20bar'
+
+ >>> canonical(u'http:')
+ %(b)s'http:'
+
+ >>> canonical('fran%%c3%%83%%c2%%a7ois')
+ %(b)s'fran%%C3%%83%%C2%%A7ois'
+
+ >>> canonical('a')
+ %(b)s'a'
+
+ >>> canonical('%%4e')
+ %(b)s'N'
+
+ >>> canonical('%%9d')
+ %(b)s'%%9D'
+
+ >>> canonical('%%2f')
+ %(b)s'%%2F'
+
+ >>> canonical('%%2F')
+ %(b)s'%%2F'
+
+ """
+ if type(str_in) == type(u''):
+ s8 = str_in.encode('utf-8')
+ else:
+ s8 = str_in
+ s = b('')
+ i = 0
+ while i < len(s8):
+ if py3compat.PY3:
+ n = s8[i]; ch = bytes([n])
+ else:
+ ch = s8[i]; n = ord(ch)
+ if (n > 126) or (n < 33) : # %-encode controls, SP, DEL, and utf-8
+ s += b("%%%02X" % ord(ch))
+ elif ch == b('%') and i+2 < len(s8):
+ ch2 = a2b_hex(s8[i+1:i+3])
+ if ch2 in URI_unreserved: s += ch2
+ else: s += b("%%%02X" % ord(ch2))
+ i = i+3
+ continue
+ else:
+ s += ch
+ i = i +1
+ return s
+
+
+
+
+
+
+CONTEXT = 0
+PRED = 1
+SUBJ = 2
+OBJ = 3
+
+PARTS = PRED, SUBJ, OBJ
+ALL4 = CONTEXT, PRED, SUBJ, OBJ
+
+SYMBOL = 0
+FORMULA = 1
+LITERAL = 2
+LITERAL_DT = 21
+LITERAL_LANG = 22
+ANONYMOUS = 3
+XMLLITERAL = 25
+
+Logic_NS = "http://www.w3.org/2000/10/swap/log#"
+NODE_MERGE_URI = Logic_NS + "is" # Pseudo-property indicating node merging
+forSomeSym = Logic_NS + "forSome"
+forAllSym = Logic_NS + "forAll"
+
+RDF_type_URI = "http://www.w3.org/1999/02/22-rdf-syntax-ns#type"
+RDF_NS_URI = "http://www.w3.org/1999/02/22-rdf-syntax-ns#"
+OWL_NS = "http://www.w3.org/2002/07/owl#"
+DAML_sameAs_URI = OWL_NS+"sameAs"
+parsesTo_URI = Logic_NS + "parsesTo"
+RDF_spec = "http://www.w3.org/TR/REC-rdf-syntax/"
+
+List_NS = RDF_NS_URI # From 20030808
+_Old_Logic_NS = "http://www.w3.org/2000/10/swap/log.n3#"
+
+N3_first = (SYMBOL, List_NS + "first")
+N3_rest = (SYMBOL, List_NS + "rest")
+N3_li = (SYMBOL, List_NS + "li")
+N3_nil = (SYMBOL, List_NS + "nil")
+N3_List = (SYMBOL, List_NS + "List")
+N3_Empty = (SYMBOL, List_NS + "Empty")
+
+
+
+runNamespaceValue = None
+
+def runNamespace():
+ "Return a URI suitable as a namespace for run-local objects"
+ # @@@ include hostname (privacy?) (hash it?)
+ global runNamespaceValue
+ if runNamespaceValue == None:
+ runNamespaceValue = join(base(), _unique_id()) + '#'
+ return runNamespaceValue
+
+nextu = 0
+def uniqueURI():
+ "A unique URI"
+ global nextu
+ nextu += 1
+ return runNamespace() + "u_" + `nextu`
+
+class URISyntaxError(ValueError):
+ """A parameter is passed to a routine that requires a URI reference"""
+ pass
+
+
+tracking = False
+chatty_flag = 50
+
+
+from xml.dom import Node
+try:
+ from xml.ns import XMLNS
+except:
+ class XMLNS:
+ BASE = "http://www.w3.org/2000/xmlns/"
+ XML = "http://www.w3.org/XML/1998/namespace"
+
+
+_attrs = lambda E: (E.attributes and E.attributes.values()) or []
+_children = lambda E: E.childNodes or []
+_IN_XML_NS = lambda n: n.namespaceURI == XMLNS.XML
+_inclusive = lambda n: n.unsuppressedPrefixes == None
+
+# Does a document/PI has lesser/greater document order than the
+# first element?
+_LesserElement, _Element, _GreaterElement = range(3)
+
+def _sorter(n1,n2):
+ '''_sorter(n1,n2) -> int
+ Sorting predicate for non-NS attributes.'''
+
+ i = cmp(n1.namespaceURI, n2.namespaceURI)
+ if i: return i
+ return cmp(n1.localName, n2.localName)
+
+
+def _sorter_ns(n1,n2):
+ '''_sorter_ns((n,v),(n,v)) -> int
+ "(an empty namespace URI is lexicographically least)."'''
+
+ if n1[0] == 'xmlns': return -1
+ if n2[0] == 'xmlns': return 1
+ return cmp(n1[0], n2[0])
+
+def _utilized(n, node, other_attrs, unsuppressedPrefixes):
+ '''_utilized(n, node, other_attrs, unsuppressedPrefixes) -> boolean
+ Return true if that nodespace is utilized within the node'''
+
+ if n.startswith('xmlns:'):
+ n = n[6:]
+ elif n.startswith('xmlns'):
+ n = n[5:]
+ if (n=="" and node.prefix in ["#default", None]) or \
+ n == node.prefix or n in unsuppressedPrefixes:
+ return 1
+ for attr in other_attrs:
+ if n == attr.prefix: return 1
+ return 0
+
+#_in_subset = lambda subset, node: not subset or node in subset
+_in_subset = lambda subset, node: subset is None or node in subset # rich's tweak
+
+class _implementation:
+ '''Implementation class for C14N. This accompanies a node during it's
+ processing and includes the parameters and processing state.'''
+
+ # Handler for each node type; populated during module instantiation.
+ handlers = {}
+
+ def __init__(self, node, write, **kw):
+ '''Create and run the implementation.'''
+ self.write = write
+ self.subset = kw.get('subset')
+ self.comments = kw.get('comments', 0)
+ self.unsuppressedPrefixes = kw.get('unsuppressedPrefixes')
+ nsdict = kw.get('nsdict', { 'xml': XMLNS.XML, 'xmlns': XMLNS.BASE })
+
+ # Processing state.
+ self.state = (nsdict, {'xml':''}, {}) #0422
+
+ if node.nodeType == Node.DOCUMENT_NODE:
+ self._do_document(node)
+ elif node.nodeType == Node.ELEMENT_NODE:
+ self.documentOrder = _Element # At document element
+ if not _inclusive(self):
+ self._do_element(node)
+ else:
+ inherited = self._inherit_context(node)
+ self._do_element(node, inherited)
+ elif node.nodeType == Node.DOCUMENT_TYPE_NODE:
+ pass
+ elif node.nodeType == Node.TEXT_NODE:
+ self._do_text(node)
+ else:
+ raise TypeError, str(node)
+
+
+ def _inherit_context(self, node):
+ '''_inherit_context(self, node) -> list
+ Scan ancestors of attribute and namespace context. Used only
+ for single element node canonicalization, not for subset
+ canonicalization.'''
+
+ # Collect the initial list of xml:foo attributes.
+ xmlattrs = filter(_IN_XML_NS, _attrs(node))
+
+ # Walk up and get all xml:XXX attributes we inherit.
+ inherited, parent = [], node.parentNode
+ while parent and parent.nodeType == Node.ELEMENT_NODE:
+ for a in filter(_IN_XML_NS, _attrs(parent)):
+ n = a.localName
+ if n not in xmlattrs:
+ xmlattrs.append(n)
+ inherited.append(a)
+ parent = parent.parentNode
+ return inherited
+
+
+ def _do_document(self, node):
+ '''_do_document(self, node) -> None
+ Process a document node. documentOrder holds whether the document
+ element has been encountered such that PIs/comments can be written
+ as specified.'''
+
+ self.documentOrder = _LesserElement
+ for child in node.childNodes:
+ if child.nodeType == Node.ELEMENT_NODE:
+ self.documentOrder = _Element # At document element
+ self._do_element(child)
+ self.documentOrder = _GreaterElement # After document element
+ elif child.nodeType == Node.PROCESSING_INSTRUCTION_NODE:
+ self._do_pi(child)
+ elif child.nodeType == Node.COMMENT_NODE:
+ self._do_comment(child)
+ elif child.nodeType == Node.DOCUMENT_TYPE_NODE:
+ pass
+ else:
+ raise TypeError, str(child)
+ handlers[Node.DOCUMENT_NODE] = _do_document
+
+
+ def _do_text(self, node):
+ '''_do_text(self, node) -> None
+ Process a text or CDATA node. Render various special characters
+ as their C14N entity representations.'''
+ if not _in_subset(self.subset, node): return
+ s = node.data.replace("&", "&amp;")
+ s = s.replace("<", "&lt;")
+ s = s.replace(">", "&gt;")
+ s = s.replace("\015", "&#xD;")
+ if s: self.write(s)
+ handlers[Node.TEXT_NODE] = _do_text
+ handlers[Node.CDATA_SECTION_NODE] = _do_text
+
+
+ def _do_pi(self, node):
+ '''_do_pi(self, node) -> None
+ Process a PI node. Render a leading or trailing #xA if the
+ document order of the PI is greater or lesser (respectively)
+ than the document element.
+ '''
+ if not _in_subset(self.subset, node): return
+ W = self.write
+ if self.documentOrder == _GreaterElement: W('\n')
+ W('<?')
+ W(node.nodeName)
+ s = node.data
+ if s:
+ W(' ')
+ W(s)
+ W('?>')
+ if self.documentOrder == _LesserElement: W('\n')
+ handlers[Node.PROCESSING_INSTRUCTION_NODE] = _do_pi
+
+
+ def _do_comment(self, node):
+ '''_do_comment(self, node) -> None
+ Process a comment node. Render a leading or trailing #xA if the
+ document order of the comment is greater or lesser (respectively)
+ than the document element.
+ '''
+ if not _in_subset(self.subset, node): return
+ if self.comments:
+ W = self.write
+ if self.documentOrder == _GreaterElement: W('\n')
+ W('<!--')
+ W(node.data)
+ W('-->')
+ if self.documentOrder == _LesserElement: W('\n')
+ handlers[Node.COMMENT_NODE] = _do_comment
+
+
+ def _do_attr(self, n, value):
+ ''''_do_attr(self, node) -> None
+ Process an attribute.'''
+
+ W = self.write
+ W(' ')
+ W(n)
+ W('="')
+ s = value.replace(value, "&", "&amp;")
+ s = s.replace("<", "&lt;")
+ s = s.replace('"', '&quot;')
+ s = s.replace('\011', '&#x9')
+ s = s.replace('\012', '&#xA')
+ s = s.replace('\015', '&#xD')
+ W(s)
+ W('"')
+
+
+ def _do_element(self, node, initial_other_attrs = []):
+ '''_do_element(self, node, initial_other_attrs = []) -> None
+ Process an element (and its children).'''
+
+ # Get state (from the stack) make local copies.
+ # ns_parent -- NS declarations in parent
+ # ns_rendered -- NS nodes rendered by ancestors
+ # ns_local -- NS declarations relevant to this element
+ # xml_attrs -- Attributes in XML namespace from parent
+ # xml_attrs_local -- Local attributes in XML namespace.
+ ns_parent, ns_rendered, xml_attrs = \
+ self.state[0], self.state[1].copy(), self.state[2].copy() #0422
+ ns_local = ns_parent.copy()
+ xml_attrs_local = {}
+
+ # progress("_do_element node.nodeName=", node.nodeName)
+ # progress("_do_element node.namespaceURI", node.namespaceURI)
+ # progress("_do_element node.tocml()", node.toxml())
+ # Divide attributes into NS, XML, and others.
+ other_attrs = initial_other_attrs[:]
+ in_subset = _in_subset(self.subset, node)
+ for a in _attrs(node):
+ # progress("\t_do_element a.nodeName=", a.nodeName)
+ if a.namespaceURI == XMLNS.BASE:
+ n = a.nodeName
+ if n == "xmlns:": n = "xmlns" # DOM bug workaround
+ ns_local[n] = a.nodeValue
+ elif a.namespaceURI == XMLNS.XML:
+ if _inclusive(self) or in_subset:
+ xml_attrs_local[a.nodeName] = a #0426
+ else:
+ other_attrs.append(a)
+ #add local xml:foo attributes to ancestor's xml:foo attributes
+ xml_attrs.update(xml_attrs_local)
+
+ # Render the node
+ W, name = self.write, None
+ if in_subset:
+ name = node.nodeName
+ W('<')
+ W(name)
+
+ # Create list of NS attributes to render.
+ ns_to_render = []
+ for n,v in ns_local.items():
+
+ # If default namespace is XMLNS.BASE or empty,
+ # and if an ancestor was the same
+ if n == "xmlns" and v in [ XMLNS.BASE, '' ] \
+ and ns_rendered.get('xmlns') in [ XMLNS.BASE, '', None ]:
+ continue
+
+ # "omit namespace node with local name xml, which defines
+ # the xml prefix, if its string value is
+ # http://www.w3.org/XML/1998/namespace."
+ if n in ["xmlns:xml", "xml"] \
+ and v in [ 'http://www.w3.org/XML/1998/namespace' ]:
+ continue
+
+
+ # If not previously rendered
+ # and it's inclusive or utilized
+ if (n,v) not in ns_rendered.items() \
+ and (_inclusive(self) or \
+ _utilized(n, node, other_attrs, self.unsuppressedPrefixes)):
+ ns_to_render.append((n, v))
+
+ # Sort and render the ns, marking what was rendered.
+ ns_to_render.sort(_sorter_ns)
+ for n,v in ns_to_render:
+ self._do_attr(n, v)
+ ns_rendered[n]=v #0417
+
+ # If exclusive or the parent is in the subset, add the local xml attributes
+ # Else, add all local and ancestor xml attributes
+ # Sort and render the attributes.
+ if not _inclusive(self) or _in_subset(self.subset,node.parentNode): #0426
+ other_attrs.extend(xml_attrs_local.values())
+ else:
+ other_attrs.extend(xml_attrs.values())
+ other_attrs.sort(_sorter)
+ for a in other_attrs:
+ self._do_attr(a.nodeName, a.value)
+ W('>')
+
+ # Push state, recurse, pop state.
+ state, self.state = self.state, (ns_local, ns_rendered, xml_attrs)
+ for c in _children(node):
+ _implementation.handlers[c.nodeType](self, c)
+ self.state = state
+
+ if name: W('</%s>' % name)
+ handlers[Node.ELEMENT_NODE] = _do_element
+
+
+def Canonicalize(node, output=None, **kw):
+ '''Canonicalize(node, output=None, **kw) -> UTF-8
+
+ Canonicalize a DOM document/element node and all descendents.
+ Return the text; if output is specified then output.write will
+ be called to output the text and None will be returned
+ Keyword parameters:
+ nsdict -- a dictionary of prefix:uri namespace entries
+ assumed to exist in the surrounding context
+ comments -- keep comments if non-zero (default is 0)
+ subset -- Canonical XML subsetting resulting from XPath (default is [])
+ unsuppressedPrefixes -- do exclusive C14N, and this specifies the
+ prefixes that should be inherited.
+ '''
+ if output:
+ apply(_implementation, (node, output.write), kw)
+ else:
+ s = StringIO.StringIO()
+ apply(_implementation, (node, s.write), kw)
+ return s.getvalue()
+
+# end of xmlC14n.py
+
+# from why import BecauseOfData, becauseSubexpression
+def BecauseOfData(*args, **kargs):
+ # print args, kargs
+ pass
+def becauseSubexpression(*args, **kargs):
+ # print args, kargs
+ pass
+
+N3_forSome_URI = forSomeSym
+N3_forAll_URI = forAllSym
+
+# Magic resources we know about
+
+
+
+ADDED_HASH = "#" # Stop where we use this in case we want to remove it!
+# This is the hash on namespace URIs
+
+RDF_type = ( SYMBOL , RDF_type_URI )
+DAML_sameAs = ( SYMBOL, DAML_sameAs_URI )
+
+LOG_implies_URI = "http://www.w3.org/2000/10/swap/log#implies"
+
+BOOLEAN_DATATYPE = _XSD_PFX + "boolean"
+DECIMAL_DATATYPE = _XSD_PFX + "decimal"
+DOUBLE_DATATYPE = _XSD_PFX + "double"
+FLOAT_DATATYPE = _XSD_PFX + "float"
+INTEGER_DATATYPE = _XSD_PFX + "integer"
+
+option_noregen = 0 # If set, do not regenerate genids on output
+
+# @@ I18n - the notname chars need extending for well known unicode non-text
+# characters. The XML spec switched to assuming unknown things were name
+# characaters.
+# _namechars = string.lowercase + string.uppercase + string.digits + '_-'
+_notQNameChars = "\t\r\n !\"#$%&'()*.,+/;<=>?@[\\]^`{|}~" # else valid qname :-/
+_notNameChars = _notQNameChars + ":" # Assume anything else valid name :-/
+_rdfns = 'http://www.w3.org/1999/02/22-rdf-syntax-ns#'
+
+
+N3CommentCharacter = "#" # For unix script #! compatabilty
+
+########################################## Parse string to sink
+#
+# Regular expressions:
+eol = re.compile(r'[ \t]*(#[^\n]*)?\r?\n') # end of line, poss. w/comment
+eof = re.compile(r'[ \t]*(#[^\n]*)?$') # end of file, poss. w/comment
+ws = re.compile(r'[ \t]*') # Whitespace not including NL
+signed_integer = re.compile(r'[-+]?[0-9]+') # integer
+number_syntax = re.compile(r'(?P<integer>[-+]?[0-9]+)(?P<decimal>\.[0-9]+)?(?P<exponent>e[-+]?[0-9]+)?')
+digitstring = re.compile(r'[0-9]+') # Unsigned integer
+interesting = re.compile(r'[\\\r\n\"]')
+langcode = re.compile(r'[a-zA-Z0-9]+(-[a-zA-Z0-9]+)?')
+#"
+
+
+
+class SinkParser:
+ def __init__(self, store, openFormula=None, thisDoc="", baseURI=None,
+ genPrefix = "", flags="",
+ why=None):
+ """ note: namespace names should *not* end in #;
+ the # will get added during qname processing """
+
+ self._bindings = {}
+ self._flags = flags
+ if thisDoc != "":
+ assert ':' in thisDoc, "Document URI not absolute: <%s>" % thisDoc
+ self._bindings[""] = thisDoc + "#" # default
+
+ self._store = store
+ if genPrefix: store.setGenPrefix(genPrefix) # pass it on
+
+ self._thisDoc = thisDoc
+ self.lines = 0 # for error handling
+ self.startOfLine = 0 # For calculating character number
+ self._genPrefix = genPrefix
+ self.keywords = ['a', 'this', 'bind', 'has', 'is', 'of', 'true', 'false' ]
+ self.keywordsSet = 0 # Then only can others be considerd qnames
+ self._anonymousNodes = {} # Dict of anon nodes already declared ln: Term
+ self._variables = {}
+ self._parentVariables = {}
+ self._reason = why # Why the parser was asked to parse this
+
+ self._reason2 = None # Why these triples
+ # was: diag.tracking
+ if tracking: self._reason2 = BecauseOfData(
+ store.newSymbol(thisDoc), because=self._reason)
+
+ if baseURI: self._baseURI = baseURI
+ else:
+ if thisDoc:
+ self._baseURI = thisDoc
+ else:
+ self._baseURI = None
+
+ assert not self._baseURI or ':' in self._baseURI
+
+ if not self._genPrefix:
+ if self._thisDoc: self._genPrefix = self._thisDoc + "#_g"
+ else: self._genPrefix = uniqueURI()
+
+ if openFormula ==None:
+ if self._thisDoc:
+ self._formula = store.newFormula(thisDoc + "#_formula")
+ else:
+ self._formula = store.newFormula()
+ else:
+ self._formula = openFormula
+
+
+ self._context = self._formula
+ self._parentContext = None
+
+
+ def here(self, i):
+ """String generated from position in file
+
+ This is for repeatability when refering people to bnodes in a document.
+ This has diagnostic uses less formally, as it should point one to which
+ bnode the arbitrary identifier actually is. It gives the
+ line and character number of the '[' charcacter or path character
+ which introduced the blank node. The first blank node is boringly _L1C1.
+ It used to be used only for tracking, but for tests in general
+ it makes the canonical ordering of bnodes repeatable."""
+
+ return "%s_L%iC%i" % (self._genPrefix , self.lines,
+ i - self.startOfLine + 1)
+
+ def formula(self):
+ return self._formula
+
+ def loadStream(self, stream):
+ return self.loadBuf(stream.read()) # Not ideal
+
+ def loadBuf(self, buf):
+ """Parses a buffer and returns its top level formula"""
+ self.startDoc()
+
+ self.feed(buf)
+ return self.endDoc() # self._formula
+
+
+ def feed(self, octets):
+ """Feed an octet stream tothe parser
+
+ if BadSyntax is raised, the string
+ passed in the exception object is the
+ remainder after any statements have been parsed.
+ So if there is more data to feed to the
+ parser, it should be straightforward to recover."""
+
+ if not isinstance(octets, unicode):
+ s = octets.decode('utf-8')
+ # NB already decoded, so \ufeff
+ if len(s) > 0 and s[0] == codecs.BOM_UTF8.decode('utf-8'):
+ s = s[1:]
+ else:
+ s=octets
+
+ i = 0
+ while i >= 0:
+ j = self.skipSpace(s, i)
+ if j<0: return
+
+ i = self.directiveOrStatement(s,j)
+ if i<0:
+ print "# next char: ", `s[j]`
+ raise BadSyntax(self._thisDoc, self.lines, s, j,
+ "expected directive or statement")
+
+ def directiveOrStatement(self, str,h):
+
+ i = self.skipSpace(str, h)
+ if i<0: return i # EOF
+
+ j = self.directive(str, i)
+ if j>=0: return self.checkDot(str,j)
+
+ j = self.statement(str, i)
+ if j>=0: return self.checkDot(str,j)
+
+ return j
+
+
+ #@@I18N
+ global _notNameChars
+ #_namechars = string.lowercase + string.uppercase + string.digits + '_-'
+
+ def tok(self, tok, str, i):
+ """Check for keyword. Space must have been stripped on entry and
+ we must not be at end of file."""
+
+ assert tok[0] not in _notNameChars # not for punctuation
+ if str[i:i+1] == "@":
+ i = i+1
+ else:
+ if tok not in self.keywords:
+ return -1 # No, this has neither keywords declaration nor "@"
+
+ if (str[i:i+len(tok)] == tok
+ and (str[i+len(tok)] in _notQNameChars )):
+ i = i + len(tok)
+ return i
+ else:
+ return -1
+
+ def directive(self, str, i):
+ j = self.skipSpace(str, i)
+ if j<0: return j # eof
+ res = []
+
+ j = self.tok('bind', str, i) # implied "#". Obsolete.
+ if j>0: raise BadSyntax(self._thisDoc, self.lines, str, i,
+ "keyword bind is obsolete: use @prefix")
+
+ j = self.tok('keywords', str, i)
+ if j>0:
+ i = self.commaSeparatedList(str, j, res, self.bareWord)
+ if i < 0:
+ raise BadSyntax(self._thisDoc, self.lines, str, i,
+ "'@keywords' needs comma separated list of words")
+ self.setKeywords(res[:])
+ # was: diag.chatty_flag
+ if chatty_flag > 80: progress("Keywords ", self.keywords)
+ return i
+
+
+ j = self.tok('forAll', str, i)
+ if j > 0:
+ i = self.commaSeparatedList(str, j, res, self.uri_ref2)
+ if i <0: raise BadSyntax(self._thisDoc, self.lines, str, i,
+ "Bad variable list after @forAll")
+ for x in res:
+ #self._context.declareUniversal(x)
+ if x not in self._variables or x in self._parentVariables:
+ self._variables[x] = self._context.newUniversal(x)
+ return i
+
+ j = self.tok('forSome', str, i)
+ if j > 0:
+ i = self. commaSeparatedList(str, j, res, self.uri_ref2)
+ if i <0: raise BadSyntax(self._thisDoc, self.lines, str, i,
+ "Bad variable list after @forSome")
+ for x in res:
+ self._context.declareExistential(x)
+ return i
+
+
+ j=self.tok('prefix', str, i) # no implied "#"
+ if j>=0:
+ t = []
+ i = self.qname(str, j, t)
+ if i<0: raise BadSyntax(self._thisDoc, self.lines, str, j,
+ "expected qname after @prefix")
+ j = self.uri_ref2(str, i, t)
+ if j<0: raise BadSyntax(self._thisDoc, self.lines, str, i,
+ "expected <uriref> after @prefix _qname_")
+ ns = self.uriOf(t[1])
+
+ if self._baseURI:
+ ns = join(self._baseURI, ns)
+ elif ":" not in ns:
+ raise BadSyntax(self._thisDoc, self.lines, str, j,
+ "With no base URI, cannot use relative URI in @prefix <"+ns+">")
+ assert ':' in ns # must be absolute
+ self._bindings[t[0][0]] = ns
+ self.bind(t[0][0], hexify(ns))
+ return j
+
+ j=self.tok('base', str, i) # Added 2007/7/7
+ if j >= 0:
+ t = []
+ i = self.uri_ref2(str, j, t)
+ if i<0: raise BadSyntax(self._thisDoc, self.lines, str, j,
+ "expected <uri> after @base ")
+ ns = self.uriOf(t[0])
+
+ if self._baseURI:
+ ns = join(self._baseURI, ns)
+ else:
+ raise BadSyntax(self._thisDoc, self.lines, str, j,
+ "With no previous base URI, cannot use relative URI in @base <"+ns+">")
+ assert ':' in ns # must be absolute
+ self._baseURI = ns
+ return i
+
+ return -1 # Not a directive, could be something else.
+
+ def bind(self, qn, uri):
+ assert isinstance(uri,
+ types.StringType), "Any unicode must be %x-encoded already"
+ if qn == "":
+ self._store.setDefaultNamespace(uri)
+ else:
+ self._store.bind(qn, uri)
+
+ def setKeywords(self, k):
+ "Takes a list of strings"
+ if k == None:
+ self.keywordsSet = 0
+ else:
+ self.keywords = k
+ self.keywordsSet = 1
+
+
+ def startDoc(self):
+ # was: self._store.startDoc()
+ self._store.startDoc(self._formula)
+
+ def endDoc(self):
+ """Signal end of document and stop parsing. returns formula"""
+ self._store.endDoc(self._formula) # don't canonicalize yet
+ return self._formula
+
+ def makeStatement(self, quadruple):
+ #$$$$$$$$$$$$$$$$$$$$$
+# print "# Parser output: ", `quadruple`
+ self._store.makeStatement(quadruple, why=self._reason2)
+
+
+
+ def statement(self, str, i):
+ r = []
+
+ i = self.object(str, i, r) # Allow literal for subject - extends RDF
+ if i<0: return i
+
+ j = self.property_list(str, i, r[0])
+
+ if j<0: raise BadSyntax(self._thisDoc, self.lines,
+ str, i, "expected propertylist")
+ return j
+
+ def subject(self, str, i, res):
+ return self.item(str, i, res)
+
+ def verb(self, str, i, res):
+ """ has _prop_
+ is _prop_ of
+ a
+ =
+ _prop_
+ >- prop ->
+ <- prop -<
+ _operator_"""
+
+ j = self.skipSpace(str, i)
+ if j<0:return j # eof
+
+ r = []
+
+ j = self.tok('has', str, i)
+ if j>=0:
+ i = self.prop(str, j, r)
+ if i < 0: raise BadSyntax(self._thisDoc, self.lines,
+ str, j, "expected property after 'has'")
+ res.append(('->', r[0]))
+ return i
+
+ j = self.tok('is', str, i)
+ if j>=0:
+ i = self.prop(str, j, r)
+ if i < 0: raise BadSyntax(self._thisDoc, self.lines, str, j,
+ "expected <property> after 'is'")
+ j = self.skipSpace(str, i)
+ if j<0:
+ raise BadSyntax(self._thisDoc, self.lines, str, i,
+ "End of file found, expected property after 'is'")
+ return j # eof
+ i=j
+ j = self.tok('of', str, i)
+ if j<0: raise BadSyntax(self._thisDoc, self.lines, str, i,
+ "expected 'of' after 'is' <prop>")
+ res.append(('<-', r[0]))
+ return j
+
+ j = self.tok('a', str, i)
+ if j>=0:
+ res.append(('->', RDF_type))
+ return j
+
+
+ if str[i:i+2] == "<=":
+ res.append(('<-', self._store.newSymbol(Logic_NS+"implies")))
+ return i+2
+
+ if str[i:i+1] == "=":
+ if str[i+1:i+2] == ">":
+ res.append(('->', self._store.newSymbol(Logic_NS+"implies")))
+ return i+2
+ res.append(('->', DAML_sameAs))
+ return i+1
+
+ if str[i:i+2] == ":=":
+ # patch file relates two formulae, uses this @@ really?
+ res.append(('->', Logic_NS+"becomes"))
+ return i+2
+
+ j = self.prop(str, i, r)
+ if j >= 0:
+ res.append(('->', r[0]))
+ return j
+
+ if str[i:i+2] == ">-" or str[i:i+2] == "<-":
+ raise BadSyntax(self._thisDoc, self.lines, str, j,
+ ">- ... -> syntax is obsolete.")
+
+ return -1
+
+ def prop(self, str, i, res):
+ return self.item(str, i, res)
+
+ def item(self, str, i, res):
+ return self.path(str, i, res)
+
+ def blankNode(self, uri=None):
+ if "B" not in self._flags:
+ return self._context.newBlankNode(uri, why=self._reason2)
+ x = self._context.newSymbol(uri)
+ self._context.declareExistential(x)
+ return x
+
+ def path(self, str, i, res):
+ """Parse the path production.
+ """
+ j = self.nodeOrLiteral(str, i, res)
+ if j<0: return j # nope
+
+ while str[j:j+1] in "!^.": # no spaces, must follow exactly (?)
+ ch = str[j:j+1] # @@ Allow "." followed IMMEDIATELY by a node.
+ if ch == ".":
+ ahead = str[j+1:j+2]
+ if not ahead or (ahead in _notNameChars
+ and ahead not in ":?<[{("): break
+ subj = res.pop()
+ obj = self.blankNode(uri=self.here(j))
+ j = self.node(str, j+1, res)
+ if j<0: raise BadSyntax(self._thisDoc, self.lines, str, j,
+ "EOF found in middle of path syntax")
+ pred = res.pop()
+ if ch == "^": # Reverse traverse
+ self.makeStatement((self._context, pred, obj, subj))
+ else:
+ self.makeStatement((self._context, pred, subj, obj))
+ res.append(obj)
+ return j
+
+ def anonymousNode(self, ln):
+ """Remember or generate a term for one of these _: anonymous nodes"""
+ term = self._anonymousNodes.get(ln, None)
+ if term != None: return term
+ term = self._store.newBlankNode(self._context, why=self._reason2)
+ self._anonymousNodes[ln] = term
+ return term
+
+ def node(self, str, i, res, subjectAlready=None):
+ """Parse the <node> production.
+ Space is now skipped once at the beginning
+ instead of in multipe calls to self.skipSpace().
+ """
+ subj = subjectAlready
+
+ j = self.skipSpace(str,i)
+ if j<0: return j #eof
+ i=j
+ ch = str[i:i+1] # Quick 1-character checks first:
+
+ if ch == "[":
+ bnodeID = self.here(i)
+ j=self.skipSpace(str,i+1)
+ if j<0: raise BadSyntax(self._thisDoc,
+ self.lines, str, i, "EOF after '['")
+ if str[j:j+1] == "=": # Hack for "is" binding name to anon node
+ i = j+1
+ objs = []
+ j = self.objectList(str, i, objs);
+ if j>=0:
+ subj = objs[0]
+ if len(objs)>1:
+ for obj in objs:
+ self.makeStatement((self._context,
+ DAML_sameAs, subj, obj))
+ j = self.skipSpace(str, j)
+ if j<0: raise BadSyntax(self._thisDoc, self.lines, str, i,
+ "EOF when objectList expected after [ = ")
+ if str[j:j+1] == ";":
+ j=j+1
+ else:
+ raise BadSyntax(self._thisDoc, self.lines, str, i,
+ "objectList expected after [= ")
+
+ if subj is None:
+ subj=self.blankNode(uri= bnodeID)
+
+ i = self.property_list(str, j, subj)
+ if i<0: raise BadSyntax(self._thisDoc, self.lines, str, j,
+ "property_list expected")
+
+ j = self.skipSpace(str, i)
+ if j<0: raise BadSyntax(self._thisDoc, self.lines, str, i,
+ "EOF when ']' expected after [ <propertyList>")
+ if str[j:j+1] != "]":
+ raise BadSyntax(self._thisDoc,
+ self.lines, str, j, "']' expected")
+ res.append(subj)
+ return j+1
+
+ if ch == "{":
+ ch2 = str[i+1:i+2]
+ if ch2 == '$':
+ i += 1
+ j = i + 1
+ List = []
+ first_run = True
+ while 1:
+ i = self.skipSpace(str, j)
+ if i<0: raise BadSyntax(self._thisDoc, self.lines, str, i,
+ "needed '$}', found end.")
+ if str[i:i+2] == '$}':
+ j = i+2
+ break
+
+ if not first_run:
+ if str[i:i+1] == ',':
+ i+=1
+ else:
+ raise BadSyntax(self._thisDoc, self.lines,
+ str, i, "expected: ','")
+ else: first_run = False
+
+ item = []
+ j = self.item(str,i, item) #@@@@@ should be path, was object
+ if j<0: raise BadSyntax(self._thisDoc, self.lines, str, i,
+ "expected item in set or '$}'")
+ List.append(self._store.intern(item[0]))
+ res.append(self._store.newSet(List, self._context))
+ return j
+ else:
+ j=i+1
+ oldParentContext = self._parentContext
+ self._parentContext = self._context
+ parentAnonymousNodes = self._anonymousNodes
+ grandParentVariables = self._parentVariables
+ self._parentVariables = self._variables
+ self._anonymousNodes = {}
+ self._variables = self._variables.copy()
+ reason2 = self._reason2
+ self._reason2 = becauseSubexpression
+ if subj is None: subj = self._store.newFormula()
+ self._context = subj
+
+ while 1:
+ i = self.skipSpace(str, j)
+ if i<0: raise BadSyntax(self._thisDoc, self.lines,
+ str, i, "needed '}', found end.")
+
+ if str[i:i+1] == "}":
+ j = i+1
+ break
+
+ j = self.directiveOrStatement(str,i)
+ if j<0: raise BadSyntax(self._thisDoc, self.lines,
+ str, i, "expected statement or '}'")
+
+ self._anonymousNodes = parentAnonymousNodes
+ self._variables = self._parentVariables
+ self._parentVariables = grandParentVariables
+ self._context = self._parentContext
+ self._reason2 = reason2
+ self._parentContext = oldParentContext
+ res.append(subj.close()) # No use until closed
+ return j
+
+ if ch == "(":
+ thing_type = self._store.newList
+ ch2 = str[i+1:i+2]
+ if ch2 == '$':
+ thing_type = self._store.newSet
+ i += 1
+ j=i+1
+
+ List = []
+ while 1:
+ i = self.skipSpace(str, j)
+ if i<0: raise BadSyntax(self._thisDoc, self.lines,
+ str, i, "needed ')', found end.")
+ if str[i:i+1] == ')':
+ j = i+1
+ break
+
+ item = []
+ j = self.item(str,i, item) #@@@@@ should be path, was object
+ if j<0: raise BadSyntax(self._thisDoc, self.lines, str, i,
+ "expected item in list or ')'")
+ List.append(self._store.intern(item[0]))
+ res.append(thing_type(List, self._context))
+ return j
+
+ j = self.tok('this', str, i) # This context
+ if j>=0:
+ raise BadSyntax(self._thisDoc, self.lines, str, i,
+ "Keyword 'this' was ancient N3. Now use @forSome and @forAll keywords.")
+ res.append(self._context)
+ return j
+
+ #booleans
+ j = self.tok('true', str, i)
+ if j>=0:
+ res.append(True)
+ return j
+ j = self.tok('false', str, i)
+ if j>=0:
+ res.append(False)
+ return j
+
+ if subj is None: # If this can be a named node, then check for a name.
+ j = self.uri_ref2(str, i, res)
+ if j >= 0:
+ return j
+
+ return -1
+
+ def property_list(self, str, i, subj):
+ """Parse property list
+ Leaves the terminating punctuation in the buffer
+ """
+ while 1:
+ j = self.skipSpace(str, i)
+ if j<0:
+ raise BadSyntax(self._thisDoc, self.lines, str, i,
+ "EOF found when expected verb in property list")
+ return j #eof
+
+ if str[j:j+2] ==":-":
+ i = j + 2
+ res = []
+ j = self.node(str, i, res, subj)
+ if j<0: raise BadSyntax(self._thisDoc, self.lines, str, i,
+ "bad {} or () or [] node after :- ")
+ i=j
+ continue
+ i=j
+ v = []
+ j = self.verb(str, i, v)
+ if j<=0:
+ return i # void but valid
+
+ objs = []
+ i = self.objectList(str, j, objs)
+ if i<0: raise BadSyntax(self._thisDoc, self.lines, str, j,
+ "objectList expected")
+ for obj in objs:
+ dir, sym = v[0]
+ if dir == '->':
+ self.makeStatement((self._context, sym, subj, obj))
+ else:
+ self.makeStatement((self._context, sym, obj, subj))
+
+ j = self.skipSpace(str, i)
+ if j<0:
+ raise BadSyntax(self._thisDoc, self.lines, str, j,
+ "EOF found in list of objects")
+ return j #eof
+ if str[i:i+1] != ";":
+ return i
+ i = i+1 # skip semicolon and continue
+
+ def commaSeparatedList(self, str, j, res, what):
+ """return value: -1 bad syntax; >1 new position in str
+ res has things found appended
+ """
+ i = self.skipSpace(str, j)
+ if i<0:
+ raise BadSyntax(self._thisDoc, self.lines, str, i,
+ "EOF found expecting comma sep list")
+ return i
+ if str[i] == ".": return j # empty list is OK
+ i = what(str, i, res)
+ if i<0: return -1
+
+ while 1:
+ j = self.skipSpace(str, i)
+ if j<0: return j # eof
+ ch = str[j:j+1]
+ if ch != ",":
+ if ch != ".":
+ return -1
+ return j # Found but not swallowed "."
+ i = what(str, j+1, res)
+ if i<0:
+ raise BadSyntax(self._thisDoc, self.lines, str, i,
+ "bad list content")
+ return i
+
+ def objectList(self, str, i, res):
+ i = self.object(str, i, res)
+ if i<0: return -1
+ while 1:
+ j = self.skipSpace(str, i)
+ if j<0:
+ raise BadSyntax(self._thisDoc, self.lines, str, j,
+ "EOF found after object")
+ return j #eof
+ if str[j:j+1] != ",":
+ return j # Found something else!
+ i = self.object(str, j+1, res)
+ if i<0: return i
+
+ def checkDot(self, str, i):
+ j = self.skipSpace(str, i)
+ if j<0: return j #eof
+ if str[j:j+1] == ".":
+ return j+1 # skip
+ if str[j:j+1] == "}":
+ return j # don't skip it
+ if str[j:j+1] == "]":
+ return j
+ raise BadSyntax(self._thisDoc, self.lines,
+ str, j, "expected '.' or '}' or ']' at end of statement")
+ return i
+
+
+ def uri_ref2(self, str, i, res):
+ """Generate uri from n3 representation.
+
+ Note that the RDF convention of directly concatenating
+ NS and local name is now used though I prefer inserting a '#'
+ to make the namesapces look more like what XML folks expect.
+ """
+ qn = []
+ j = self.qname(str, i, qn)
+ if j>=0:
+ pfx, ln = qn[0]
+ if pfx is None:
+ assert 0, "not used?"
+ ns = self._baseURI + ADDED_HASH
+ else:
+ try:
+ ns = self._bindings[pfx]
+ except KeyError:
+ if pfx == "_": # Magic prefix 2001/05/30, can be overridden
+ res.append(self.anonymousNode(ln))
+ return j
+ raise BadSyntax(self._thisDoc, self.lines, str, i,
+ "Prefix \"%s:\" not bound" % (pfx))
+ symb = self._store.newSymbol(ns + ln)
+ if symb in self._variables:
+ res.append(self._variables[symb])
+ else:
+ res.append(symb) # @@@ "#" CONVENTION
+ if not ns.find("#"):progress(
+ "Warning: no # on namespace %s," % ns)
+ return j
+
+
+ i = self.skipSpace(str, i)
+ if i<0: return -1
+
+ if str[i] == "?":
+ v = []
+ j = self.variable(str,i,v)
+ if j>0: #Forget varibles as a class, only in context.
+ res.append(v[0])
+ return j
+ return -1
+
+ elif str[i]=="<":
+ i = i + 1
+ st = i
+ while i < len(str):
+ if str[i] == ">":
+ uref = str[st:i] # the join should dealt with "":
+ if self._baseURI:
+ uref = join(self._baseURI, uref) # was: uripath.join
+ else:
+ assert ":" in uref, \
+ "With no base URI, cannot deal with relative URIs"
+ if str[i-1:i]=="#" and not uref[-1:]=="#":
+ uref = uref + "#" # She meant it! Weirdness in urlparse?
+ symb = self._store.newSymbol(uref)
+ if symb in self._variables:
+ res.append(self._variables[symb])
+ else:
+ res.append(symb)
+ return i+1
+ i = i + 1
+ raise BadSyntax(self._thisDoc, self.lines, str, j,
+ "unterminated URI reference")
+
+ elif self.keywordsSet:
+ v = []
+ j = self.bareWord(str,i,v)
+ if j<0: return -1 #Forget varibles as a class, only in context.
+ if v[0] in self.keywords:
+ raise BadSyntax(self._thisDoc, self.lines, str, i,
+ 'Keyword "%s" not allowed here.' % v[0])
+ res.append(self._store.newSymbol(self._bindings[""]+v[0]))
+ return j
+ else:
+ return -1
+
+ def skipSpace(self, str, i):
+ """Skip white space, newlines and comments.
+ return -1 if EOF, else position of first non-ws character"""
+ while 1:
+ m = eol.match(str, i)
+ if m == None: break
+ self.lines = self.lines + 1
+ i = m.end() # Point to first character unmatched
+ self.startOfLine = i
+ m = ws.match(str, i)
+ if m != None:
+ i = m.end()
+ m = eof.match(str, i)
+ if m != None: return -1
+ return i
+
+ def variable(self, str, i, res):
+ """ ?abc -> variable(:abc)
+ """
+
+ j = self.skipSpace(str, i)
+ if j<0: return -1
+
+ if str[j:j+1] != "?": return -1
+ j=j+1
+ i = j
+ if str[j] in "0123456789-":
+ raise BadSyntax(self._thisDoc, self.lines, str, j,
+ "Varible name can't start with '%s'" % str[j])
+ return -1
+ while i <len(str) and str[i] not in _notNameChars:
+ i = i+1
+ if self._parentContext == None:
+ varURI = self._store.newSymbol(self._baseURI + "#" +str[j:i])
+ if varURI not in self._variables:
+ self._variables[varURI] = self._context.newUniversal(varURI
+ , why=self._reason2)
+ res.append(self._variables[varURI])
+ return i
+ # @@ was:
+ # raise BadSyntax(self._thisDoc, self.lines, str, j,
+ # "Can't use ?xxx syntax for variable in outermost level: %s"
+ # % str[j-1:i])
+ varURI = self._store.newSymbol(self._baseURI + "#" +str[j:i])
+ if varURI not in self._parentVariables:
+ self._parentVariables[varURI] = self._parentContext.newUniversal(varURI
+ , why=self._reason2)
+ res.append(self._parentVariables[varURI])
+ return i
+
+ def bareWord(self, str, i, res):
+ """ abc -> :abc
+ """
+ j = self.skipSpace(str, i)
+ if j<0: return -1
+
+ if str[j] in "0123456789-" or str[j] in _notNameChars: return -1
+ i = j
+ while i <len(str) and str[i] not in _notNameChars:
+ i = i+1
+ res.append(str[j:i])
+ return i
+
+ def qname(self, str, i, res):
+ """
+ xyz:def -> ('xyz', 'def')
+ If not in keywords and keywordsSet: def -> ('', 'def')
+ :def -> ('', 'def')
+ """
+
+ i = self.skipSpace(str, i)
+ if i<0: return -1
+
+ c = str[i]
+ if c in "0123456789-+": return -1
+ if c not in _notNameChars:
+ ln = c
+ i = i + 1
+ while i < len(str):
+ c = str[i]
+ if c not in _notNameChars:
+ ln = ln + c
+ i = i + 1
+ else: break
+ else: # First character is non-alpha
+ ln = '' # Was: None - TBL (why? useful?)
+
+ if i<len(str) and str[i] == ':':
+ pfx = ln
+ i = i + 1
+ ln = ''
+ while i < len(str):
+ c = str[i]
+ if c not in _notNameChars:
+ ln = ln + c
+ i = i + 1
+ else: break
+
+ res.append((pfx, ln))
+ return i
+
+ else: # delimiter was not ":"
+ if ln and self.keywordsSet and ln not in self.keywords:
+ res.append(('', ln))
+ return i
+ return -1
+
+ def object(self, str, i, res):
+ j = self.subject(str, i, res)
+ if j>= 0:
+ return j
+ else:
+ j = self.skipSpace(str, i)
+ if j<0: return -1
+ else: i=j
+
+ if str[i]=='"':
+ if str[i:i+3] == '"""': delim = '"""'
+ else: delim = '"'
+ i = i + len(delim)
+
+ j, s = self.strconst(str, i, delim)
+
+ res.append(self._store.newLiteral(s))
+ progress("New string const ", s, j)
+ return j
+ else:
+ return -1
+
+ def nodeOrLiteral(self, str, i, res):
+ j = self.node(str, i, res)
+ startline = self.lines # Remember where for error messages
+ if j>= 0:
+ return j
+ else:
+ j = self.skipSpace(str, i)
+ if j<0: return -1
+ else: i=j
+
+ ch = str[i]
+ if ch in "-+0987654321":
+ m = number_syntax.match(str, i)
+ if m == None:
+ raise BadSyntax(self._thisDoc, self.lines, str, i,
+ "Bad number syntax")
+ j = m.end()
+ if m.group('exponent') != None: # includes decimal exponent
+ res.append(float(str[i:j]))
+# res.append(self._store.newLiteral(str[i:j],
+# self._store.newSymbol(FLOAT_DATATYPE)))
+ elif m.group('decimal') != None:
+ res.append(Decimal(str[i:j]))
+ else:
+ res.append(long(str[i:j]))
+# res.append(self._store.newLiteral(str[i:j],
+# self._store.newSymbol(INTEGER_DATATYPE)))
+ return j
+
+ if str[i]=='"':
+ if str[i:i+3] == '"""': delim = '"""'
+ else: delim = '"'
+ i = i + len(delim)
+
+ dt = None
+ j, s = self.strconst(str, i, delim)
+ lang = None
+ if str[j:j+1] == "@": # Language?
+ m = langcode.match(str, j+1)
+ if m == None:
+ raise BadSyntax(self._thisDoc, startline, str, i,
+ "Bad language code syntax on string literal, after @")
+ i = m.end()
+ lang = str[j+1:i]
+ j = i
+ if str[j:j+2] == "^^":
+ res2 = []
+ j = self.uri_ref2(str, j+2, res2) # Read datatype URI
+ dt = res2[0]
+# if dt.uriref() == "http://www.w3.org/1999/02/22-rdf-syntax-ns#XMLLiteral":
+ if dt == "http://www.w3.org/1999/02/22-rdf-syntax-ns#XMLLiteral":
+ try:
+ dom = XMLtoDOM('<rdf:envelope xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns">'
+ + s
+ + '</rdf:envelope>').firstChild
+ except:
+ raise ValueError('s="%s"' % s)
+ res.append(self._store.newXMLLiteral(dom))
+ return j
+ res.append(self._store.newLiteral(s, dt, lang))
+ return j
+ else:
+ return -1
+
+ def uriOf(self, sym):
+ if isinstance(sym, types.TupleType):
+ return sym[1] # old system for --pipe
+ # return sym.uriref() # cwm api
+ return sym
+
+
+ def strconst(self, str, i, delim):
+ """parse an N3 string constant delimited by delim.
+ return index, val
+ """
+
+
+ j = i
+ ustr = u"" # Empty unicode string
+ startline = self.lines # Remember where for error messages
+ while j<len(str):
+ if str[j] == '"':
+ if delim == '"': # done when delim is "
+ i = j + 1
+ return i, ustr
+ if delim == '"""': # done when delim is """ and ...
+ if str[j:j+5] == '"""""': # ... we have "" before
+ i = j + 5
+ ustr = ustr + '""'
+ return i, ustr
+ if str[j:j+4] == '""""': # ... we have " before
+ i = j + 4
+ ustr = ustr + '"'
+ return i, ustr
+ if str[j:j+3] == '"""': # ... current " is part of delim
+ i = j + 3
+ return i, ustr
+
+ # we are inside of the string and current char is "
+ j = j + 1
+ ustr = ustr + '"'
+ continue
+
+ m = interesting.search(str, j) # was str[j:].
+ # Note for pos param to work, MUST be compiled ... re bug?
+ assert m , "Quote expected in string at ^ in %s^%s" %(
+ str[j-20:j], str[j:j+20]) # we at least have to find a quote
+
+ i = m.start()
+ try:
+ ustr = ustr + str[j:i]
+ except UnicodeError:
+ err = ""
+ for c in str[j:i]:
+ err = err + (" %02x" % ord(c))
+ streason = sys.exc_info()[1].__str__()
+ raise BadSyntax(self._thisDoc, startline, str, j,
+ "Unicode error appending characters %s to string, because\n\t%s"
+ % (err, streason))
+
+# print "@@@ i = ",i, " j=",j, "m.end=", m.end()
+
+ ch = str[i]
+ if ch == '"':
+ j = i
+ continue
+ elif ch == "\r": # Strip carriage returns
+ j = i+1
+ continue
+ elif ch == "\n":
+ if delim == '"':
+ raise BadSyntax(self._thisDoc, startline, str, i,
+ "newline found in string literal")
+ self.lines = self.lines + 1
+ ustr = ustr + ch
+ j = i + 1
+ self.startOfLine = j
+
+ elif ch == "\\":
+ j = i + 1
+ ch = str[j:j+1] # Will be empty if string ends
+ if not ch:
+ raise BadSyntax(self._thisDoc, startline, str, i,
+ "unterminated string literal (2)")
+ k = 'abfrtvn\\"'.find(ch)
+ if k >= 0:
+ uch = '\a\b\f\r\t\v\n\\"'[k]
+ ustr = ustr + uch
+ j = j + 1
+ elif ch == "u":
+ j, ch = self.uEscape(str, j+1, startline)
+ ustr = ustr + ch
+ elif ch == "U":
+ j, ch = self.UEscape(str, j+1, startline)
+ ustr = ustr + ch
+ else:
+ raise BadSyntax(self._thisDoc, self.lines, str, i,
+ "bad escape")
+
+ raise BadSyntax(self._thisDoc, self.lines, str, i,
+ "unterminated string literal")
+
+
+ def uEscape(self, str, i, startline):
+ j = i
+ count = 0
+ value = 0
+ while count < 4: # Get 4 more characters
+ ch = str[j:j+1].lower()
+ # sbp http://ilrt.org/discovery/chatlogs/rdfig/2002-07-05
+ j = j + 1
+ if ch == "":
+ raise BadSyntax(self._thisDoc, startline, str, i,
+ "unterminated string literal(3)")
+ k = "0123456789abcdef".find(ch)
+ if k < 0:
+ raise BadSyntax(self._thisDoc, startline, str, i,
+ "bad string literal hex escape")
+ value = value * 16 + k
+ count = count + 1
+ uch = unichr(value)
+ return j, uch
+
+ def UEscape(self, str, i, startline):
+ stringType = type('')
+ j = i
+ count = 0
+ value = '\\U'
+ while count < 8: # Get 8 more characters
+ ch = str[j:j+1].lower()
+ # sbp http://ilrt.org/discovery/chatlogs/rdfig/2002-07-05
+ j = j + 1
+ if ch == "":
+ raise BadSyntax(self._thisDoc, startline, str, i,
+ "unterminated string literal(3)")
+ k = "0123456789abcdef".find(ch)
+ if k < 0:
+ raise BadSyntax(self._thisDoc, startline, str, i,
+ "bad string literal hex escape")
+ value = value + ch
+ count = count + 1
+
+ uch = stringType(value).decode('unicode-escape')
+ return j, uch
+
+wide_build = True
+try:
+ unichr(0x10000)
+except ValueError:
+ wide_build = False
+
+# If we are going to do operators then they should generate
+# [ is operator:plus of ( \1 \2 ) ]
+
+
+class BadSyntax(SyntaxError):
+ def __init__(self, uri, lines, str, i, why):
+ self._str = str.encode('utf-8') # Better go back to strings for errors
+ self._i = i
+ self._why = why
+ self.lines = lines
+ self._uri = uri
+
+ def __str__(self):
+ str = self._str
+ i = self._i
+ st = 0
+ if i>60:
+ pre="..."
+ st = i - 60
+ else: pre=""
+ if len(str)-i > 60: post="..."
+ else: post=""
+
+ return 'at line %i of <%s>:\nBad syntax (%s) at ^ in:\n"%s%s^%s%s"' \
+ % (self.lines +1, self._uri, self._why, pre,
+ str[st:i], str[i:i+60], post)
+
+
+
+def stripCR(str):
+ res = ""
+ for ch in str:
+ if ch != "\r":
+ res = res + ch
+ return res
+
+def dummyWrite(x):
+ pass
+
+################################################################################
+
+
+def toBool(s):
+ if s == 'true' or s == 'True' or s == '1':
+ return True
+ if s == 'false' or s == 'False' or s == '0':
+ return False
+ raise ValueError(s)
+
+
+
+
+
+class Formula(object):
+ number = 0
+
+ def __init__(self, parent):
+ self.counter = 0
+ Formula.number += 1
+ self.number = Formula.number
+ self.existentials = {}
+ self.universals = {}
+
+ self.quotedgraph=QuotedGraph(store=parent.store, identifier=self.id())
+
+ def __str__(self):
+ return '_:Formula%s' % self.number
+
+ def id(self):
+ return BNode('_:Formula%s' % self.number)
+
+ def newBlankNode(self, uri=None, why=None):
+ if uri is None:
+ self.counter += 1
+ b = BNode('f%sb%s' % (id(self), self.counter))
+ else: b = BNode(uri.split('#').pop().replace('_', 'b'))
+ return b
+
+ def newUniversal(self, uri, why=None):
+ return Variable(uri.split('#').pop())
+
+ def declareExistential(self, x):
+ self.existentials[x] = self.newBlankNode()
+
+ def close(self):
+
+ return self.quotedgraph
+
+r_hibyte = re.compile(r'([\x80-\xff])')
+def iri(uri):
+ return uri.decode('utf-8')
+ # return unicode(r_hibyte.sub(lambda m: '%%%02X' % ord(m.group(1)), uri))
+
+class RDFSink(object):
+ def __init__(self, graph):
+ self.rootFormula = None
+ self.counter = 0
+ self.graph=graph
+
+
+ def newFormula(self):
+ assert self.graph.store.formula_aware
+ f = Formula(self.graph)
+ return f
+
+ def newSymbol(self, *args):
+ uri = args[0].encode('utf-8')
+ return URIRef(iri(uri))
+
+ def newBlankNode(self, arg=None, **kargs):
+ if isinstance(arg, Formula):
+ return arg.newBlankNode()
+ elif arg is None:
+ self.counter += 1
+ b = BNode('n' + str(self.counter))
+ else: b = BNode(str(arg[0]).split('#').pop().replace('_', 'b'))
+ return b
+
+ def newLiteral(self, s, dt, lang):
+ if dt: return Literal(s, datatype=dt)
+ else: return Literal(s, lang=lang)
+
+ def newList(self, n, f):
+ if not n:
+ return self.newSymbol(
+ 'http://www.w3.org/1999/02/22-rdf-syntax-ns#nil'
+ )
+
+ a = self.newBlankNode(f)
+ first = self.newSymbol(
+ 'http://www.w3.org/1999/02/22-rdf-syntax-ns#first'
+ )
+ rest = self.newSymbol('http://www.w3.org/1999/02/22-rdf-syntax-ns#rest')
+ self.makeStatement((f, first, a, n[0]))
+ self.makeStatement((f, rest, a, self.newList(n[1:], f)))
+ return a
+
+ def newSet(self, *args):
+ return set(args)
+
+ def setDefaultNamespace(self, *args):
+ return ':'.join(repr(n) for n in args)
+
+ def makeStatement(self, quadruple, why=None):
+ f, p, s, o = quadruple
+
+ if hasattr(p, 'formula'):
+ raise Exception("Formula used as predicate")
+
+ s = self.normalise(f, s)
+ p = self.normalise(f, p)
+ o = self.normalise(f, o)
+
+
+ if f == self.rootFormula:
+ # print s, p, o, '.'
+ self.graph.add((s, p, o))
+ else:
+ f.quotedgraph.add((s,p,o))
+
+
+ #return str(quadruple)
+
+ def normalise(self, f, n):
+ if isinstance(n, tuple):
+ return URIRef(unicode(n[1]))
+
+ # if isinstance(n, list):
+ # rdflist, f = n
+ # name = self.newBlankNode()
+ # if f == self.rootFormula:
+ # sublist = name
+ # for i in xrange(0, len(rdflist) - 1):
+ # print sublist, 'first', rdflist[i]
+ # rest = self.newBlankNode()
+ # print sublist, 'rest', rest
+ # sublist = rest
+ # print sublist, 'first', rdflist[-1]
+ # print sublist, 'rest', 'nil'
+ # return name
+
+ if isinstance(n, bool):
+ s = Literal(str(n).lower(), datatype=BOOLEAN_DATATYPE)
+ return s
+
+ if isinstance(n, int) or isinstance(n, long):
+ s = Literal(unicode(n), datatype=INTEGER_DATATYPE)
+ return s
+
+ if isinstance(n, Decimal):
+ value = str(n.normalize())
+ if value == '-0':
+ value = '0'
+ s = Literal(value, datatype=DECIMAL_DATATYPE )
+ return s
+
+ if isinstance(n, float):
+ s = Literal(str(n), datatype=DOUBLE_DATATYPE )
+ return s
+
+ if f.existentials.has_key(n):
+ return f.existentials[n]
+
+ # if isinstance(n, Var):
+ # if f.universals.has_key(n):
+ # return f.universals[n]
+ # f.universals[n] = f.newBlankNode()
+ # return f.universals[n]
+
+ return n
+
+ def intern(self, something):
+ return something
+
+ def bind(self, pfx, uri):
+ pass # print pfx, ':', uri
+
+ def startDoc(self, formula):
+ self.rootFormula = formula
+
+ def endDoc(self, formula):
+ pass
+
+
+###################################################
+#
+# Utilities
+#
+
+Escapes = {'a': '\a',
+ 'b': '\b',
+ 'f': '\f',
+ 'r': '\r',
+ 't': '\t',
+ 'v': '\v',
+ 'n': '\n',
+ '\\': '\\',
+ '"': '"'}
+
+forbidden1 = re.compile(ur'[\\\"\a\b\f\r\v\u0080-\U0000ffff]')
+forbidden2 = re.compile(ur'[\\\"\a\b\f\r\v\t\n\u0080-\U0000ffff]')
+#"
+def stringToN3(str, singleLine=0, flags=""):
+ res = ''
+ if (len(str) > 20 and
+ str[-1] <> '"' and
+ not singleLine and
+ (str.find("\n") >=0
+ or str.find('"') >=0)):
+ delim= '"""'
+ forbidden = forbidden1 # (allow tabs too now)
+ else:
+ delim = '"'
+ forbidden = forbidden2
+
+ i = 0
+
+ while i < len(str):
+ m = forbidden.search(str, i)
+ if not m:
+ break
+
+ j = m.start()
+ res = res + str[i:j]
+ ch = m.group(0)
+ if ch == '"' and delim == '"""' and str[j:j+3] != '"""': #"
+ res = res + ch
+ else:
+ k = '\a\b\f\r\t\v\n\\"'.find(ch)
+ if k >= 0: res = res + "\\" + 'abfrtvn\\"'[k]
+ else:
+ if 'e' in flags:
+# res = res + ('\\u%04x' % ord(ch))
+ res = res + ('\\u%04X' % ord(ch))
+ # http://www.w3.org/TR/rdf-testcases/#ntriples
+ else:
+ res = res + ch
+ i = j + 1
+
+ # The following code fixes things for really high range Unicode
+ newstr = ""
+ for ch in res + str[i:]:
+ if ord(ch)>65535:
+ newstr = newstr + ('\\U%08X' % ord(ch))
+ # http://www.w3.org/TR/rdf-testcases/#ntriples
+ else:
+ newstr = newstr + ch
+ #
+
+ return delim + newstr + delim
+
+def backslashUify(ustr):
+ """Use URL encoding to return an ASCII string corresponding
+ to the given unicode"""
+# progress("String is "+`ustr`)
+# s1=ustr.encode('utf-8')
+ s = ""
+ for ch in ustr: # .encode('utf-8'):
+ if ord(ch) > 65535:
+ ch = "\\U%08X" % ord(ch)
+ elif ord(ch) > 126:
+ ch = "\\u%04X" % ord(ch)
+ else:
+ ch = "%c" % ord(ch)
+ s = s + ch
+ return b(s)
+
+@py3compat.format_doctest_out
+def hexify(ustr):
+ """Use URL encoding to return an ASCII string
+ corresponding to the given UTF8 string
+
+ >>> hexify("http://example/a b")
+ %(b)s'http://example/a%%20b'
+
+ """ #"
+# progress("String is "+`ustr`)
+# s1=ustr.encode('utf-8')
+ s = ""
+ for ch in ustr: # .encode('utf-8'):
+ if ord(ch) > 126 or ord(ch) < 33 :
+ ch = "%%%02X" % ord(ch)
+ else:
+ ch = "%c" % ord(ch)
+ s = s + ch
+ return b(s)
+
+def dummy():
+ res = ""
+ if len(str) > 20 and (str.find("\n") >=0
+ or str.find('"') >=0):
+ delim= '"""'
+ forbidden = "\\\"\a\b\f\r\v" # (allow tabs too now)
+ else:
+ delim = '"'
+ forbidden = "\\\"\a\b\f\r\v\t\n"
+ for i in range(len(str)):
+ ch = str[i]
+ j = forbidden.find(ch)
+ if ch == '"' and delim == '"""' \
+ and i+1 < len(str) and str[i+1] != '"':
+ j=-1 # Single quotes don't need escaping in long format
+ if j>=0: ch = "\\" + '\\"abfrvtn'[j]
+ elif ch not in "\n\t" and (ch < " " or ch > "}"):
+ ch = "[[" + `ch` + "]]" #[2:-1] # Use python
+ res = res + ch
+ return delim + res + delim
+
+
+class N3Parser(Parser):
+
+ def __init__(self):
+ pass
+
+ def parse(self, source, graph, encoding="utf-8"):
+ # we're currently being handed a Graph, not a ConjunctiveGraph
+ assert graph.store.context_aware # is this implied by formula_aware
+ assert graph.store.formula_aware
+
+ if encoding not in [None, "utf-8"]:
+ raise Exception("N3 files are always utf-8 encoded, I was passed: %s"%encoding)
+
+ conj_graph = ConjunctiveGraph(store=graph.store)
+ conj_graph.default_context = graph # TODO: CG __init__ should have a default_context arg
+ # TODO: update N3Processor so that it can use conj_graph as the sink
+ conj_graph.namespace_manager = graph.namespace_manager
+ sink = RDFSink(conj_graph)
+
+ baseURI = graph.absolutize(source.getPublicId() or source.getSystemId() or "")
+ p = SinkParser(sink, baseURI=baseURI)
+
+ p.loadStream(source.getByteStream())
+
+ for prefix, namespace in p._bindings.items():
+ conj_graph.bind(prefix, namespace)
+
+
+
+
+def _test():
+ import doctest
+ doctest.testmod()
+
+
+# if __name__ == '__main__':
+# _test()
+
+def main():
+ g=ConjunctiveGraph()
+
+ sink = RDFSink(g)
+ base = 'file://' + os.path.join(os.getcwd(), sys.argv[1])
+
+ p = SinkParser(sink, baseURI=base)
+ p._bindings[''] = p._baseURI + '#'
+ p.startDoc()
+
+ f = open(sys.argv[1], 'rb')
+ bytes = f.read()
+ f.close()
+
+ p.feed(bytes)
+ p.endDoc()
+ for t in g.quads((None,None,None)):
+
+ print t
+
+if __name__ == '__main__':
+ main()
+
+#ends
+
diff --git a/creactistore/_templates/lib/rdflib_/plugins/parsers/nquads.py b/creactistore/_templates/lib/rdflib_/plugins/parsers/nquads.py
new file mode 100644
index 0000000..22d65f8
--- /dev/null
+++ b/creactistore/_templates/lib/rdflib_/plugins/parsers/nquads.py
@@ -0,0 +1,107 @@
+"""
+This is a rdflib_ plugin for parsing NQuad files into Conjunctive
+graphs that can be used and queried. The store that backs the graph
+*must* be able to handle contexts.
+
+>>> from rdflib_ import ConjunctiveGraph, URIRef, Namespace
+>>> g = ConjunctiveGraph()
+>>> data = open("test/example.nquads", "rb")
+>>> g.parse(data, format="nquads") # doctest:+ELLIPSIS
+<Graph identifier=... (<class 'rdflib_.graph.Graph'>)>
+>>> assert len(g.store) == 449
+>>> # There should be 16 separate contexts
+>>> assert len([x for x in g.store.contexts()]) == 16
+>>> # is the name of entity E10009 "Arco Publications"? (in graph http://bibliographica.org/entity/E10009)
+>>> # Looking for:
+>>> # <http://bibliographica.org/entity/E10009> <http://xmlns.com/foaf/0.1/name> "Arco Publications" <http://bibliographica.org/entity/E10009>
+>>> s = URIRef("http://bibliographica.org/entity/E10009")
+>>> FOAF = Namespace("http://xmlns.com/foaf/0.1/")
+>>> assert(g.value(s, FOAF.name) == "Arco Publications")
+"""
+
+from rdflib_.py3compat import b
+
+# Build up from the NTriples parser:
+from rdflib_.plugins.parsers.ntriples import NTriplesParser
+from rdflib_.plugins.parsers.ntriples import ParseError
+from rdflib_.plugins.parsers.ntriples import r_tail
+from rdflib_.plugins.parsers.ntriples import r_wspace
+from rdflib_.plugins.parsers.ntriples import r_wspaces
+
+__all__ = ['QuadSink', 'NQuadsParser']
+
+class QuadSink(object):
+ def __init__(self):
+ class FakeStore(object):
+ def __init__(self, addn):
+ self.addN = addn
+ self.length = 0
+ self.__quads = []
+ self.__store = FakeStore(self.addN)
+
+ def addN(self, quads):
+ self.length += 1
+ self.__quads.append(quads)
+
+ def quads(self, (s,p,o)):
+ for s,p,o,ctx in self.__quads:
+ yield s,p,o,ctx
+
+class NQuadsParser(NTriplesParser):
+ def __init__(self, sink=None):
+ if sink is not None:
+ assert sink.store.context_aware, ("NQuadsParser must be given"
+ " a context aware store.")
+ self.sink = sink
+ else: self.sink = QuadSink()
+
+ def parse(self, inputsource, sink, **kwargs):
+ """Parse f as an N-Triples file."""
+ assert sink.store.context_aware, ("NQuadsParser must be given"
+ " a context aware store.")
+ self.sink = sink
+
+ source = inputsource.getByteStream()
+
+ if not hasattr(source, 'read'):
+ raise ParseError("Item to parse must be a file-like object.")
+
+ self.file = source
+ self.buffer = ''
+ while True:
+ self.line = self.readline()
+ if self.line is None: break
+ try: self.parseline()
+ except ParseError:
+ raise ParseError("Invalid line: %r" % self.line)
+ return self.sink
+
+ def context(self):
+ context = self.uriref()
+ if not context:
+ raise ParseError("Context must be a uriref")
+ return context
+
+ def parseline(self):
+ self.eat(r_wspace)
+ if (not self.line) or self.line.startswith(b('#')):
+ return # The line is empty or a comment
+
+ subject = self.subject()
+ self.eat(r_wspaces)
+
+ predicate = self.predicate()
+ self.eat(r_wspaces)
+
+ obj = self.object()
+ self.eat(r_wspaces)
+
+ context = self.context()
+ self.eat(r_tail)
+
+ if self.line:
+ raise ParseError("Trailing garbage")
+ # Must have a context aware store - add on a normal Graph
+ # discards anything where the ctx != graph.identifier
+ self.sink.store.add((subject, predicate, obj), context)
+
diff --git a/creactistore/_templates/lib/rdflib_/plugins/parsers/nquads.py~ b/creactistore/_templates/lib/rdflib_/plugins/parsers/nquads.py~
new file mode 100644
index 0000000..fbb4a37
--- /dev/null
+++ b/creactistore/_templates/lib/rdflib_/plugins/parsers/nquads.py~
@@ -0,0 +1,107 @@
+"""
+This is a rdflib plugin for parsing NQuad files into Conjunctive
+graphs that can be used and queried. The store that backs the graph
+*must* be able to handle contexts.
+
+>>> from rdflib import ConjunctiveGraph, URIRef, Namespace
+>>> g = ConjunctiveGraph()
+>>> data = open("test/example.nquads", "rb")
+>>> g.parse(data, format="nquads") # doctest:+ELLIPSIS
+<Graph identifier=... (<class 'rdflib.graph.Graph'>)>
+>>> assert len(g.store) == 449
+>>> # There should be 16 separate contexts
+>>> assert len([x for x in g.store.contexts()]) == 16
+>>> # is the name of entity E10009 "Arco Publications"? (in graph http://bibliographica.org/entity/E10009)
+>>> # Looking for:
+>>> # <http://bibliographica.org/entity/E10009> <http://xmlns.com/foaf/0.1/name> "Arco Publications" <http://bibliographica.org/entity/E10009>
+>>> s = URIRef("http://bibliographica.org/entity/E10009")
+>>> FOAF = Namespace("http://xmlns.com/foaf/0.1/")
+>>> assert(g.value(s, FOAF.name) == "Arco Publications")
+"""
+
+from rdflib.py3compat import b
+
+# Build up from the NTriples parser:
+from rdflib.plugins.parsers.ntriples import NTriplesParser
+from rdflib.plugins.parsers.ntriples import ParseError
+from rdflib.plugins.parsers.ntriples import r_tail
+from rdflib.plugins.parsers.ntriples import r_wspace
+from rdflib.plugins.parsers.ntriples import r_wspaces
+
+__all__ = ['QuadSink', 'NQuadsParser']
+
+class QuadSink(object):
+ def __init__(self):
+ class FakeStore(object):
+ def __init__(self, addn):
+ self.addN = addn
+ self.length = 0
+ self.__quads = []
+ self.__store = FakeStore(self.addN)
+
+ def addN(self, quads):
+ self.length += 1
+ self.__quads.append(quads)
+
+ def quads(self, (s,p,o)):
+ for s,p,o,ctx in self.__quads:
+ yield s,p,o,ctx
+
+class NQuadsParser(NTriplesParser):
+ def __init__(self, sink=None):
+ if sink is not None:
+ assert sink.store.context_aware, ("NQuadsParser must be given"
+ " a context aware store.")
+ self.sink = sink
+ else: self.sink = QuadSink()
+
+ def parse(self, inputsource, sink, **kwargs):
+ """Parse f as an N-Triples file."""
+ assert sink.store.context_aware, ("NQuadsParser must be given"
+ " a context aware store.")
+ self.sink = sink
+
+ source = inputsource.getByteStream()
+
+ if not hasattr(source, 'read'):
+ raise ParseError("Item to parse must be a file-like object.")
+
+ self.file = source
+ self.buffer = ''
+ while True:
+ self.line = self.readline()
+ if self.line is None: break
+ try: self.parseline()
+ except ParseError:
+ raise ParseError("Invalid line: %r" % self.line)
+ return self.sink
+
+ def context(self):
+ context = self.uriref()
+ if not context:
+ raise ParseError("Context must be a uriref")
+ return context
+
+ def parseline(self):
+ self.eat(r_wspace)
+ if (not self.line) or self.line.startswith(b('#')):
+ return # The line is empty or a comment
+
+ subject = self.subject()
+ self.eat(r_wspaces)
+
+ predicate = self.predicate()
+ self.eat(r_wspaces)
+
+ obj = self.object()
+ self.eat(r_wspaces)
+
+ context = self.context()
+ self.eat(r_tail)
+
+ if self.line:
+ raise ParseError("Trailing garbage")
+ # Must have a context aware store - add on a normal Graph
+ # discards anything where the ctx != graph.identifier
+ self.sink.store.add((subject, predicate, obj), context)
+
diff --git a/creactistore/_templates/lib/rdflib_/plugins/parsers/nt.py b/creactistore/_templates/lib/rdflib_/plugins/parsers/nt.py
new file mode 100644
index 0000000..86cec18
--- /dev/null
+++ b/creactistore/_templates/lib/rdflib_/plugins/parsers/nt.py
@@ -0,0 +1,28 @@
+from rdflib_.parser import Parser
+from rdflib_.plugins.parsers.ntriples import NTriplesParser
+
+__all__ = ['NTSink', 'NTParser']
+
+class NTSink(object):
+ def __init__(self, graph):
+ self.graph = graph
+
+ def triple(self, s, p, o):
+ self.graph.add((s, p, o))
+
+
+class NTParser(Parser):
+ """parser for the ntriples format, often stored with the .nt extension
+
+ See http://www.w3.org/TR/rdf-testcases/#ntriples"""
+
+ def __init__(self):
+ super(NTParser, self).__init__()
+
+ def parse(self, source, sink, baseURI=None):
+ f = source.getByteStream() # TODO getCharacterStream?
+ parser = NTriplesParser(NTSink(sink))
+ parser.parse(f)
+ f.close()
+
+
diff --git a/creactistore/_templates/lib/rdflib_/plugins/parsers/nt.py~ b/creactistore/_templates/lib/rdflib_/plugins/parsers/nt.py~
new file mode 100644
index 0000000..1ec2282
--- /dev/null
+++ b/creactistore/_templates/lib/rdflib_/plugins/parsers/nt.py~
@@ -0,0 +1,28 @@
+from rdflib.parser import Parser
+from rdflib.plugins.parsers.ntriples import NTriplesParser
+
+__all__ = ['NTSink', 'NTParser']
+
+class NTSink(object):
+ def __init__(self, graph):
+ self.graph = graph
+
+ def triple(self, s, p, o):
+ self.graph.add((s, p, o))
+
+
+class NTParser(Parser):
+ """parser for the ntriples format, often stored with the .nt extension
+
+ See http://www.w3.org/TR/rdf-testcases/#ntriples"""
+
+ def __init__(self):
+ super(NTParser, self).__init__()
+
+ def parse(self, source, sink, baseURI=None):
+ f = source.getByteStream() # TODO getCharacterStream?
+ parser = NTriplesParser(NTSink(sink))
+ parser.parse(f)
+ f.close()
+
+
diff --git a/creactistore/_templates/lib/rdflib_/plugins/parsers/ntriples.py b/creactistore/_templates/lib/rdflib_/plugins/parsers/ntriples.py
new file mode 100644
index 0000000..36a263e
--- /dev/null
+++ b/creactistore/_templates/lib/rdflib_/plugins/parsers/ntriples.py
@@ -0,0 +1,243 @@
+#!/usr/bin/env python
+__doc__="""
+N-Triples Parser
+License: GPL 2, W3C, BSD, or MIT
+Author: Sean B. Palmer, inamidst.com
+"""
+
+import re
+from rdflib_.term import URIRef as URI
+from rdflib_.term import BNode as bNode
+from rdflib_.term import Literal
+
+from rdflib_.py3compat import b, cast_bytes
+
+__all__ = ['unquote', 'uriquote', 'Sink', 'NTriplesParser']
+
+uriref = b(r'<([^:]+:[^\s"<>]+)>')
+literal = b(r'"([^"\\]*(?:\\.[^"\\]*)*)"')
+litinfo = b(r'(?:@([a-z]+(?:-[a-z0-9]+)*)|\^\^') + uriref + b(r')?')
+
+r_line = re.compile(b(r'([^\r\n]*)(?:\r\n|\r|\n)'))
+r_wspace = re.compile(b(r'[ \t]*'))
+r_wspaces = re.compile(b(r'[ \t]+'))
+r_tail = re.compile(b(r'[ \t]*\.[ \t]*'))
+r_uriref = re.compile(uriref)
+r_nodeid = re.compile(b(r'_:([A-Za-z][A-Za-z0-9]*)'))
+r_literal = re.compile(literal + litinfo)
+
+bufsiz = 2048
+validate = False
+
+class Node(unicode): pass
+
+class ParseError(Exception): pass
+
+class Sink(object):
+ def __init__(self):
+ self.length = 0
+
+ def triple(self, s, p, o):
+ self.length += 1
+ print (s, p, o)
+
+quot = {b('t'): u'\t', b('n'): u'\n', b('r'): u'\r', b('"'): u'"', b('\\'): u'\\'}
+r_safe = re.compile(b(r'([\x20\x21\x23-\x5B\x5D-\x7E]+)'))
+r_quot = re.compile(b(r'\\(t|n|r|"|\\)'))
+r_uniquot = re.compile(b(r'\\u([0-9A-F]{4})|\\U([0-9A-F]{8})'))
+
+def unquote(s):
+ """Unquote an N-Triples string."""
+ if not validate:
+ return s.decode('unicode-escape')
+ else:
+ result = []
+ while s:
+ m = r_safe.match(s)
+ if m:
+ s = s[m.end():]
+ result.append(m.group(1).decode('ascii'))
+ continue
+
+ m = r_quot.match(s)
+ if m:
+ s = s[2:]
+ result.append(quot[m.group(1)])
+ continue
+
+ m = r_uniquot.match(s)
+ if m:
+ s = s[m.end():]
+ u, U = m.groups()
+ codepoint = int(u or U, 16)
+ if codepoint > 0x10FFFF:
+ raise ParseError("Disallowed codepoint: %08X" % codepoint)
+ result.append(unichr(codepoint))
+ elif s.startswith(b('\\')):
+ raise ParseError("Illegal escape at: %s..." % s[:10])
+ else: raise ParseError("Illegal literal character: %r" % s[0])
+ return u''.join(result)
+
+r_hibyte = re.compile(ur'([\x80-\xFF])')
+
+def uriquote(uri):
+ if not validate:
+ return uri
+ else:
+ return r_hibyte.sub(
+ lambda m: '%%%02X' % ord(m.group(1)), uri)
+
+class NTriplesParser(object):
+ """An N-Triples Parser.
+
+ Usage::
+
+ p = NTriplesParser(sink=MySink())
+ sink = p.parse(f) # file; use parsestring for a string
+ """
+
+ def __init__(self, sink=None):
+ if sink is not None:
+ self.sink = sink
+ else: self.sink = Sink()
+
+ def parse(self, f):
+ """Parse f as an N-Triples file."""
+ if not hasattr(f, 'read'):
+ raise ParseError("Item to parse must be a file-like object.")
+
+ self.file = f
+ self.buffer = ''
+ while True:
+ self.line = self.readline()
+ if self.line is None: break
+ try: self.parseline()
+ except ParseError:
+ raise ParseError("Invalid line: %r" % self.line)
+ return self.sink
+
+ def parsestring(self, s):
+ """Parse s as an N-Triples string."""
+ if not isinstance(s, basestring):
+ raise ParseError("Item to parse must be a string instance.")
+ try:
+ from io import BytesIO
+ except ImportError:
+ from cStringIO import StringIO as BytesIO
+ f = BytesIO()
+ f.write(cast_bytes(s))
+ f.seek(0)
+ self.parse(f)
+
+ def readline(self):
+ """Read an N-Triples line from buffered input."""
+ # N-Triples lines end in either CRLF, CR, or LF
+ # Therefore, we can't just use f.readline()
+ if not self.buffer:
+ buffer = self.file.read(bufsiz)
+ if not buffer: return None
+ self.buffer = buffer
+
+ while True:
+ m = r_line.match(self.buffer)
+ if m: # the more likely prospect
+ self.buffer = self.buffer[m.end():]
+ return m.group(1)
+ else:
+ buffer = self.file.read(bufsiz)
+ if not buffer and not self.buffer.isspace():
+ raise ParseError("EOF in line")
+ elif not buffer:
+ return None
+ self.buffer += buffer
+
+ def parseline(self):
+ self.eat(r_wspace)
+ if (not self.line) or self.line.startswith(b('#')):
+ return # The line is empty or a comment
+
+ subject = self.subject()
+ self.eat(r_wspaces)
+
+ predicate = self.predicate()
+ self.eat(r_wspaces)
+
+ object = self.object()
+ self.eat(r_tail)
+
+ if self.line:
+ raise ParseError("Trailing garbage")
+ self.sink.triple(subject, predicate, object)
+
+ def peek(self, token):
+ return self.line.startswith(token)
+
+ def eat(self, pattern):
+ m = pattern.match(self.line)
+ if not m: # @@ Why can't we get the original pattern?
+ print(dir(pattern))
+ print repr(self.line), type(self.line)
+ raise ParseError("Failed to eat %s" % pattern)
+ self.line = self.line[m.end():]
+ return m
+
+ def subject(self):
+ # @@ Consider using dictionary cases
+ subj = self.uriref() or self.nodeid()
+ if not subj:
+ raise ParseError("Subject must be uriref or nodeID")
+ return subj
+
+ def predicate(self):
+ pred = self.uriref()
+ if not pred:
+ raise ParseError("Predicate must be uriref")
+ return pred
+
+ def object(self):
+ objt = self.uriref() or self.nodeid() or self.literal()
+ if objt is False:
+ raise ParseError("Unrecognised object type")
+ return objt
+
+ def uriref(self):
+ if self.peek(b('<')):
+ uri = self.eat(r_uriref).group(1)
+ uri = unquote(uri)
+ uri = uriquote(uri)
+ return URI(uri)
+ return False
+
+ def nodeid(self):
+ if self.peek(b('_')):
+ return bNode(self.eat(r_nodeid).group(1).decode())
+ return False
+
+ def literal(self):
+ if self.peek(b('"')):
+ lit, lang, dtype = self.eat(r_literal).groups()
+ if lang:
+ lang = lang.decode()
+ else:
+ lang = None
+ if dtype:
+ dtype = dtype.decode()
+ else:
+ dtype = None
+ if lang and dtype:
+ raise ParseError("Can't have both a language and a datatype")
+ lit = unquote(lit)
+ return Literal(lit, lang, dtype)
+ return False
+
+# # Obsolete, unused
+# def parseURI(uri):
+# import urllib
+# parser = NTriplesParser()
+# u = urllib.urlopen(uri)
+# sink = parser.parse(u)
+# u.close()
+# # for triple in sink:
+# # print triple
+# print 'Length of input:', sink.length
+
diff --git a/creactistore/_templates/lib/rdflib_/plugins/parsers/ntriples.py~ b/creactistore/_templates/lib/rdflib_/plugins/parsers/ntriples.py~
new file mode 100644
index 0000000..48fe327
--- /dev/null
+++ b/creactistore/_templates/lib/rdflib_/plugins/parsers/ntriples.py~
@@ -0,0 +1,243 @@
+#!/usr/bin/env python
+__doc__="""
+N-Triples Parser
+License: GPL 2, W3C, BSD, or MIT
+Author: Sean B. Palmer, inamidst.com
+"""
+
+import re
+from rdflib.term import URIRef as URI
+from rdflib.term import BNode as bNode
+from rdflib.term import Literal
+
+from rdflib.py3compat import b, cast_bytes
+
+__all__ = ['unquote', 'uriquote', 'Sink', 'NTriplesParser']
+
+uriref = b(r'<([^:]+:[^\s"<>]+)>')
+literal = b(r'"([^"\\]*(?:\\.[^"\\]*)*)"')
+litinfo = b(r'(?:@([a-z]+(?:-[a-z0-9]+)*)|\^\^') + uriref + b(r')?')
+
+r_line = re.compile(b(r'([^\r\n]*)(?:\r\n|\r|\n)'))
+r_wspace = re.compile(b(r'[ \t]*'))
+r_wspaces = re.compile(b(r'[ \t]+'))
+r_tail = re.compile(b(r'[ \t]*\.[ \t]*'))
+r_uriref = re.compile(uriref)
+r_nodeid = re.compile(b(r'_:([A-Za-z][A-Za-z0-9]*)'))
+r_literal = re.compile(literal + litinfo)
+
+bufsiz = 2048
+validate = False
+
+class Node(unicode): pass
+
+class ParseError(Exception): pass
+
+class Sink(object):
+ def __init__(self):
+ self.length = 0
+
+ def triple(self, s, p, o):
+ self.length += 1
+ print (s, p, o)
+
+quot = {b('t'): u'\t', b('n'): u'\n', b('r'): u'\r', b('"'): u'"', b('\\'): u'\\'}
+r_safe = re.compile(b(r'([\x20\x21\x23-\x5B\x5D-\x7E]+)'))
+r_quot = re.compile(b(r'\\(t|n|r|"|\\)'))
+r_uniquot = re.compile(b(r'\\u([0-9A-F]{4})|\\U([0-9A-F]{8})'))
+
+def unquote(s):
+ """Unquote an N-Triples string."""
+ if not validate:
+ return s.decode('unicode-escape')
+ else:
+ result = []
+ while s:
+ m = r_safe.match(s)
+ if m:
+ s = s[m.end():]
+ result.append(m.group(1).decode('ascii'))
+ continue
+
+ m = r_quot.match(s)
+ if m:
+ s = s[2:]
+ result.append(quot[m.group(1)])
+ continue
+
+ m = r_uniquot.match(s)
+ if m:
+ s = s[m.end():]
+ u, U = m.groups()
+ codepoint = int(u or U, 16)
+ if codepoint > 0x10FFFF:
+ raise ParseError("Disallowed codepoint: %08X" % codepoint)
+ result.append(unichr(codepoint))
+ elif s.startswith(b('\\')):
+ raise ParseError("Illegal escape at: %s..." % s[:10])
+ else: raise ParseError("Illegal literal character: %r" % s[0])
+ return u''.join(result)
+
+r_hibyte = re.compile(ur'([\x80-\xFF])')
+
+def uriquote(uri):
+ if not validate:
+ return uri
+ else:
+ return r_hibyte.sub(
+ lambda m: '%%%02X' % ord(m.group(1)), uri)
+
+class NTriplesParser(object):
+ """An N-Triples Parser.
+
+ Usage::
+
+ p = NTriplesParser(sink=MySink())
+ sink = p.parse(f) # file; use parsestring for a string
+ """
+
+ def __init__(self, sink=None):
+ if sink is not None:
+ self.sink = sink
+ else: self.sink = Sink()
+
+ def parse(self, f):
+ """Parse f as an N-Triples file."""
+ if not hasattr(f, 'read'):
+ raise ParseError("Item to parse must be a file-like object.")
+
+ self.file = f
+ self.buffer = ''
+ while True:
+ self.line = self.readline()
+ if self.line is None: break
+ try: self.parseline()
+ except ParseError:
+ raise ParseError("Invalid line: %r" % self.line)
+ return self.sink
+
+ def parsestring(self, s):
+ """Parse s as an N-Triples string."""
+ if not isinstance(s, basestring):
+ raise ParseError("Item to parse must be a string instance.")
+ try:
+ from io import BytesIO
+ except ImportError:
+ from cStringIO import StringIO as BytesIO
+ f = BytesIO()
+ f.write(cast_bytes(s))
+ f.seek(0)
+ self.parse(f)
+
+ def readline(self):
+ """Read an N-Triples line from buffered input."""
+ # N-Triples lines end in either CRLF, CR, or LF
+ # Therefore, we can't just use f.readline()
+ if not self.buffer:
+ buffer = self.file.read(bufsiz)
+ if not buffer: return None
+ self.buffer = buffer
+
+ while True:
+ m = r_line.match(self.buffer)
+ if m: # the more likely prospect
+ self.buffer = self.buffer[m.end():]
+ return m.group(1)
+ else:
+ buffer = self.file.read(bufsiz)
+ if not buffer and not self.buffer.isspace():
+ raise ParseError("EOF in line")
+ elif not buffer:
+ return None
+ self.buffer += buffer
+
+ def parseline(self):
+ self.eat(r_wspace)
+ if (not self.line) or self.line.startswith(b('#')):
+ return # The line is empty or a comment
+
+ subject = self.subject()
+ self.eat(r_wspaces)
+
+ predicate = self.predicate()
+ self.eat(r_wspaces)
+
+ object = self.object()
+ self.eat(r_tail)
+
+ if self.line:
+ raise ParseError("Trailing garbage")
+ self.sink.triple(subject, predicate, object)
+
+ def peek(self, token):
+ return self.line.startswith(token)
+
+ def eat(self, pattern):
+ m = pattern.match(self.line)
+ if not m: # @@ Why can't we get the original pattern?
+ print(dir(pattern))
+ print repr(self.line), type(self.line)
+ raise ParseError("Failed to eat %s" % pattern)
+ self.line = self.line[m.end():]
+ return m
+
+ def subject(self):
+ # @@ Consider using dictionary cases
+ subj = self.uriref() or self.nodeid()
+ if not subj:
+ raise ParseError("Subject must be uriref or nodeID")
+ return subj
+
+ def predicate(self):
+ pred = self.uriref()
+ if not pred:
+ raise ParseError("Predicate must be uriref")
+ return pred
+
+ def object(self):
+ objt = self.uriref() or self.nodeid() or self.literal()
+ if objt is False:
+ raise ParseError("Unrecognised object type")
+ return objt
+
+ def uriref(self):
+ if self.peek(b('<')):
+ uri = self.eat(r_uriref).group(1)
+ uri = unquote(uri)
+ uri = uriquote(uri)
+ return URI(uri)
+ return False
+
+ def nodeid(self):
+ if self.peek(b('_')):
+ return bNode(self.eat(r_nodeid).group(1).decode())
+ return False
+
+ def literal(self):
+ if self.peek(b('"')):
+ lit, lang, dtype = self.eat(r_literal).groups()
+ if lang:
+ lang = lang.decode()
+ else:
+ lang = None
+ if dtype:
+ dtype = dtype.decode()
+ else:
+ dtype = None
+ if lang and dtype:
+ raise ParseError("Can't have both a language and a datatype")
+ lit = unquote(lit)
+ return Literal(lit, lang, dtype)
+ return False
+
+# # Obsolete, unused
+# def parseURI(uri):
+# import urllib
+# parser = NTriplesParser()
+# u = urllib.urlopen(uri)
+# sink = parser.parse(u)
+# u.close()
+# # for triple in sink:
+# # print triple
+# print 'Length of input:', sink.length
+
diff --git a/creactistore/_templates/lib/rdflib_/plugins/parsers/rdfa/__init__.py b/creactistore/_templates/lib/rdflib_/plugins/parsers/rdfa/__init__.py
new file mode 100644
index 0000000..9c4f4eb
--- /dev/null
+++ b/creactistore/_templates/lib/rdflib_/plugins/parsers/rdfa/__init__.py
@@ -0,0 +1,168 @@
+"""
+From a Python file, expecting an RDF/XML pretty printed output::
+
+ import rdflib_.graph as g
+ graph = g.Graph()
+ graph.parse('filename.html', format='rdfa')
+ print graph.serialize(format='pretty-xml')
+
+For details on RDFa, the reader should consult the `RDFa syntax document`__.
+
+This is an adapted version of pyRdfa (`W3C RDFa Distiller page`__) by Ivan Herman
+
+.. __: http://www.w3.org/TR/rdfa-syntax
+.. __: http://www.w3.org/2007/08/pyRdfa/
+
+"""
+
+
+import sys
+import urllib
+import xml.dom.minidom
+
+from rdflib_.term import URIRef
+from rdflib_.parser import Parser
+from rdflib_.plugins.parsers.rdfa.state import ExecutionContext
+from rdflib_.plugins.parsers.rdfa.parse import parse_one_node
+from rdflib_.plugins.parsers.rdfa.options import (Options, _add_to_comment_graph,
+ DIST_NS, ERROR, GENERIC_XML, XHTML_RDFA, HTML5_RDFA)
+
+from rdflib_.plugins.parsers.rdfa.transform.headabout import head_about_transform
+
+__all__ = ['RDFaParser']
+
+# These are part of the RDFa spec.
+BUILT_IN_TRANSFORMERS = [
+ head_about_transform
+]
+
+# Exception handling. Essentially, all the different exceptions are re-packaged
+# into separate exception class, to allow for an easier management on the user
+# level
+class RDFaError(Exception) :
+ """Just a wrapper around the local exceptions. It does not add any new
+ functionality to the Exception class."""
+ pass
+
+# For some doctype and element name combinations an automatic switch to an
+# input mode is done
+_HOST_LANG = {
+ ("http://www.w3.org/1999/xhtml", "html"): XHTML_RDFA,
+ ("http://www.w3.org/2000/svg", "svg"): GENERIC_XML
+}
+
+
+class RDFaParser(Parser):
+
+ def parse(self, source, sink,
+ warnings=False, space_preserve=True,
+ transformers=None, xhtml=True, lax=True, html5=False, encoding=None):
+ if transformers is None:
+ transformers = []
+ options = Options(warnings, space_preserve, transformers, xhtml, lax)
+ baseURI = source.getPublicId()
+ stream = source.getByteStream()
+ if html5:
+ dom = _process_html5_source(stream, options, encoding)
+ else:
+ dom = _try_process_source(stream, options, encoding)
+ _process_DOM(dom, baseURI, sink, options)
+
+
+def _process_DOM(dom, base, graph, options=None):
+ """
+ Core processing. The transformers ("pre-processing") is done on the DOM
+ tree, the state is initialized, and the "real" RDFa parsing is done.
+ The result is put into the provided Graph.
+
+ The real work is done in the parser function ``parse_one_node()``.
+
+ Params:
+ dom -- XML DOM Tree node (for the top level)
+ base -- URI for the default "base" value (usually the URI of the file to be processed)
+
+ Options:
+ obj -- `Options` for the distiller
+ raise RDFaError -- when called via CGI, this encapsulates the possible
+ exceptions raised by the RDFLib serializer or the processing itself
+ """
+ html = dom.documentElement
+ # Perform the built-in and external transformations on the HTML tree. This is,
+ # in simulated form, the hGRDDL approach of Ben Adida.
+ for trans in options.transformers + BUILT_IN_TRANSFORMERS:
+ trans(html, options)
+ # Collect the initial state. This takes care of things
+ # like base, top level namespace settings, etc.
+ # Ensure the proper initialization.
+ state = ExecutionContext(html, graph, base=base, options=options)
+ # The top level subject starts with the current document; this
+ # is used by the recursion
+ subject = URIRef(state.base)
+ # Parse the whole thing recursively and fill the graph.
+ parse_one_node(html, graph, subject, state, [])
+ if options.comment_graph.graph != None:
+ # Add the content of the comment graph to the output.
+ graph.bind("dist", DIST_NS)
+ for t in options.comment_graph.graph:
+ graph.add(t)
+
+def _try_process_source(stream, options, encoding):
+ """
+ Tries to parse input as xhtml, xml (e.g. svg) or html(5), modifying options
+ while figuring out input..
+
+ Returns a DOM tree.
+ """
+ parse = xml.dom.minidom.parse
+ try:
+ dom = parse(stream)
+ # Try to second-guess the input type
+ # This is _not_ really kosher, but the minidom is not really namespace aware...
+ # In practice the goal is to have the system recognize svg content automatically
+ # First see if there is a default namespace defined for the document:
+ top = dom.documentElement
+ if top.hasAttribute("xmlns"):
+ key = (top.getAttribute("xmlns"), top.nodeName)
+ if key in _HOST_LANG:
+ options.host_language = _HOST_LANG[key]
+ return dom
+ except:
+ # XML Parsing error in the input
+ type, value, traceback = sys.exc_info()
+ if options.host_language == GENERIC_XML or options.lax == False:
+ raise RDFaError('Parsing error in input file: "%s"' % value)
+
+ # XML Parsing error in the input
+ msg = "XHTML Parsing error in input file: %s. Falling back on the HTML5 parser" % value
+ if options != None and options.warnings:
+ options.comment_graph.add_warning(msg)
+
+ # in Ivan's original code he reopened the stream if it was from urllib
+ if isinstance(stream, urllib.addinfourl):
+ stream = urllib.urlopen(stream.url)
+
+ return _process_html5_source(stream, options, encoding)
+
+
+def _process_html5_source(stream, options, encoding):
+ # Now try to see if and HTML5 parser is an alternative...
+ try:
+ from html5lib import HTMLParser, treebuilders
+ except ImportError:
+ # no alternative to the XHTML error, because HTML5 parser not available...
+ msg2 = 'XHTML Parsing error in input file: %s. Though parsing is lax, HTML5 parser not available. Try installing html5lib <http://code.google.com/p/html5lib>'
+ raise RDFaError(msg2)
+
+ parser = HTMLParser(tree=treebuilders.getTreeBuilder("dom"))
+ parse = parser.parse
+ try:
+ dom = parse(stream, encoding)
+ # The host language has changed
+ options.host_language = HTML5_RDFA
+ except:
+ # Well, even the HTML5 parser could not do anything with this...
+ (type, value, traceback) = sys.exc_info()
+ msg2 = 'Parsing error in input file as HTML5: "%s"' % value
+ raise RDFaError, msg2
+
+ return dom
diff --git a/creactistore/_templates/lib/rdflib_/plugins/parsers/rdfa/__init__.py~ b/creactistore/_templates/lib/rdflib_/plugins/parsers/rdfa/__init__.py~
new file mode 100644
index 0000000..9553349
--- /dev/null
+++ b/creactistore/_templates/lib/rdflib_/plugins/parsers/rdfa/__init__.py~
@@ -0,0 +1,168 @@
+"""
+From a Python file, expecting an RDF/XML pretty printed output::
+
+ import rdflib.graph as g
+ graph = g.Graph()
+ graph.parse('filename.html', format='rdfa')
+ print graph.serialize(format='pretty-xml')
+
+For details on RDFa, the reader should consult the `RDFa syntax document`__.
+
+This is an adapted version of pyRdfa (`W3C RDFa Distiller page`__) by Ivan Herman
+
+.. __: http://www.w3.org/TR/rdfa-syntax
+.. __: http://www.w3.org/2007/08/pyRdfa/
+
+"""
+
+
+import sys
+import urllib
+import xml.dom.minidom
+
+from rdflib.term import URIRef
+from rdflib.parser import Parser
+from rdflib.plugins.parsers.rdfa.state import ExecutionContext
+from rdflib.plugins.parsers.rdfa.parse import parse_one_node
+from rdflib.plugins.parsers.rdfa.options import (Options, _add_to_comment_graph,
+ DIST_NS, ERROR, GENERIC_XML, XHTML_RDFA, HTML5_RDFA)
+
+from rdflib.plugins.parsers.rdfa.transform.headabout import head_about_transform
+
+__all__ = ['RDFaParser']
+
+# These are part of the RDFa spec.
+BUILT_IN_TRANSFORMERS = [
+ head_about_transform
+]
+
+# Exception handling. Essentially, all the different exceptions are re-packaged
+# into separate exception class, to allow for an easier management on the user
+# level
+class RDFaError(Exception) :
+ """Just a wrapper around the local exceptions. It does not add any new
+ functionality to the Exception class."""
+ pass
+
+# For some doctype and element name combinations an automatic switch to an
+# input mode is done
+_HOST_LANG = {
+ ("http://www.w3.org/1999/xhtml", "html"): XHTML_RDFA,
+ ("http://www.w3.org/2000/svg", "svg"): GENERIC_XML
+}
+
+
+class RDFaParser(Parser):
+
+ def parse(self, source, sink,
+ warnings=False, space_preserve=True,
+ transformers=None, xhtml=True, lax=True, html5=False, encoding=None):
+ if transformers is None:
+ transformers = []
+ options = Options(warnings, space_preserve, transformers, xhtml, lax)
+ baseURI = source.getPublicId()
+ stream = source.getByteStream()
+ if html5:
+ dom = _process_html5_source(stream, options, encoding)
+ else:
+ dom = _try_process_source(stream, options, encoding)
+ _process_DOM(dom, baseURI, sink, options)
+
+
+def _process_DOM(dom, base, graph, options=None):
+ """
+ Core processing. The transformers ("pre-processing") is done on the DOM
+ tree, the state is initialized, and the "real" RDFa parsing is done.
+ The result is put into the provided Graph.
+
+ The real work is done in the parser function ``parse_one_node()``.
+
+ Params:
+ dom -- XML DOM Tree node (for the top level)
+ base -- URI for the default "base" value (usually the URI of the file to be processed)
+
+ Options:
+ obj -- `Options` for the distiller
+ raise RDFaError -- when called via CGI, this encapsulates the possible
+ exceptions raised by the RDFLib serializer or the processing itself
+ """
+ html = dom.documentElement
+ # Perform the built-in and external transformations on the HTML tree. This is,
+ # in simulated form, the hGRDDL approach of Ben Adida.
+ for trans in options.transformers + BUILT_IN_TRANSFORMERS:
+ trans(html, options)
+ # Collect the initial state. This takes care of things
+ # like base, top level namespace settings, etc.
+ # Ensure the proper initialization.
+ state = ExecutionContext(html, graph, base=base, options=options)
+ # The top level subject starts with the current document; this
+ # is used by the recursion
+ subject = URIRef(state.base)
+ # Parse the whole thing recursively and fill the graph.
+ parse_one_node(html, graph, subject, state, [])
+ if options.comment_graph.graph != None:
+ # Add the content of the comment graph to the output.
+ graph.bind("dist", DIST_NS)
+ for t in options.comment_graph.graph:
+ graph.add(t)
+
+def _try_process_source(stream, options, encoding):
+ """
+ Tries to parse input as xhtml, xml (e.g. svg) or html(5), modifying options
+ while figuring out input..
+
+ Returns a DOM tree.
+ """
+ parse = xml.dom.minidom.parse
+ try:
+ dom = parse(stream)
+ # Try to second-guess the input type
+ # This is _not_ really kosher, but the minidom is not really namespace aware...
+ # In practice the goal is to have the system recognize svg content automatically
+ # First see if there is a default namespace defined for the document:
+ top = dom.documentElement
+ if top.hasAttribute("xmlns"):
+ key = (top.getAttribute("xmlns"), top.nodeName)
+ if key in _HOST_LANG:
+ options.host_language = _HOST_LANG[key]
+ return dom
+ except:
+ # XML Parsing error in the input
+ type, value, traceback = sys.exc_info()
+ if options.host_language == GENERIC_XML or options.lax == False:
+ raise RDFaError('Parsing error in input file: "%s"' % value)
+
+ # XML Parsing error in the input
+ msg = "XHTML Parsing error in input file: %s. Falling back on the HTML5 parser" % value
+ if options != None and options.warnings:
+ options.comment_graph.add_warning(msg)
+
+ # in Ivan's original code he reopened the stream if it was from urllib
+ if isinstance(stream, urllib.addinfourl):
+ stream = urllib.urlopen(stream.url)
+
+ return _process_html5_source(stream, options, encoding)
+
+
+def _process_html5_source(stream, options, encoding):
+ # Now try to see if and HTML5 parser is an alternative...
+ try:
+ from html5lib import HTMLParser, treebuilders
+ except ImportError:
+ # no alternative to the XHTML error, because HTML5 parser not available...
+ msg2 = 'XHTML Parsing error in input file: %s. Though parsing is lax, HTML5 parser not available. Try installing html5lib <http://code.google.com/p/html5lib>'
+ raise RDFaError(msg2)
+
+ parser = HTMLParser(tree=treebuilders.getTreeBuilder("dom"))
+ parse = parser.parse
+ try:
+ dom = parse(stream, encoding)
+ # The host language has changed
+ options.host_language = HTML5_RDFA
+ except:
+ # Well, even the HTML5 parser could not do anything with this...
+ (type, value, traceback) = sys.exc_info()
+ msg2 = 'Parsing error in input file as HTML5: "%s"' % value
+ raise RDFaError, msg2
+
+ return dom
diff --git a/creactistore/_templates/lib/rdflib_/plugins/parsers/rdfa/embeddedrdf.py b/creactistore/_templates/lib/rdflib_/plugins/parsers/rdfa/embeddedrdf.py
new file mode 100644
index 0000000..4a9b015
--- /dev/null
+++ b/creactistore/_templates/lib/rdflib_/plugins/parsers/rdfa/embeddedrdf.py
@@ -0,0 +1,36 @@
+# -*- coding: utf-8 -*-
+"""
+Extracting possible embedded RDF/XML content from the file and parse it separately into the Graph. This is used, for example
+by U{SVG 1.2 Tiny<http://www.w3.org/TR/SVGMobile12/>}.
+
+@author: U{Ivan Herman<a href="http://www.w3.org/People/Ivan/">}
+@license: This software is available for use under the
+U{W3C® SOFTWARE NOTICE AND LICENSE<href="http://www.w3.org/Consortium/Legal/2002/copyright-software-20021231">}
+@contact: Ivan Herman, ivan@w3.org
+"""
+
+from StringIO import StringIO
+
+__all__ = ['handle_embeddedRDF']
+
+def handle_embeddedRDF(node, graph, state):
+ """
+ Check if the node is the top level rdf element for RDF/XML. If so, the content is parsed and added to the target graph. Note that if an separate
+ base is defined in the state, the C{xml:base} attribute will be added to the C{rdf} node before parsing.
+ @param node: a DOM node for the top level xml element
+ @param graph: target rdf graph
+ @type graph: RDFLib's Graph object instance
+ @param state: the inherited state (namespaces, lang, etc)
+ @type state: L{State.ExecutionContext}
+ @return: whether an RDF/XML content has been detected or not. If TRUE, the RDFa processing should not occur on the node and its descendents.
+ @rtype: Boolean
+
+ """
+ if node.localName == "RDF" and node.namespaceURI == "http://www.w3.org/1999/02/22-rdf-syntax-ns#":
+ node.setAttribute("xml:base",state.base)
+ rdf = StringIO(node.toxml())
+ graph.parse(rdf)
+ return True
+ else:
+ return False
+
diff --git a/creactistore/_templates/lib/rdflib_/plugins/parsers/rdfa/embeddedrdf.py~ b/creactistore/_templates/lib/rdflib_/plugins/parsers/rdfa/embeddedrdf.py~
new file mode 100644
index 0000000..4a9b015
--- /dev/null
+++ b/creactistore/_templates/lib/rdflib_/plugins/parsers/rdfa/embeddedrdf.py~
@@ -0,0 +1,36 @@
+# -*- coding: utf-8 -*-
+"""
+Extracting possible embedded RDF/XML content from the file and parse it separately into the Graph. This is used, for example
+by U{SVG 1.2 Tiny<http://www.w3.org/TR/SVGMobile12/>}.
+
+@author: U{Ivan Herman<a href="http://www.w3.org/People/Ivan/">}
+@license: This software is available for use under the
+U{W3C® SOFTWARE NOTICE AND LICENSE<href="http://www.w3.org/Consortium/Legal/2002/copyright-software-20021231">}
+@contact: Ivan Herman, ivan@w3.org
+"""
+
+from StringIO import StringIO
+
+__all__ = ['handle_embeddedRDF']
+
+def handle_embeddedRDF(node, graph, state):
+ """
+ Check if the node is the top level rdf element for RDF/XML. If so, the content is parsed and added to the target graph. Note that if an separate
+ base is defined in the state, the C{xml:base} attribute will be added to the C{rdf} node before parsing.
+ @param node: a DOM node for the top level xml element
+ @param graph: target rdf graph
+ @type graph: RDFLib's Graph object instance
+ @param state: the inherited state (namespaces, lang, etc)
+ @type state: L{State.ExecutionContext}
+ @return: whether an RDF/XML content has been detected or not. If TRUE, the RDFa processing should not occur on the node and its descendents.
+ @rtype: Boolean
+
+ """
+ if node.localName == "RDF" and node.namespaceURI == "http://www.w3.org/1999/02/22-rdf-syntax-ns#":
+ node.setAttribute("xml:base",state.base)
+ rdf = StringIO(node.toxml())
+ graph.parse(rdf)
+ return True
+ else:
+ return False
+
diff --git a/creactistore/_templates/lib/rdflib_/plugins/parsers/rdfa/literal.py b/creactistore/_templates/lib/rdflib_/plugins/parsers/rdfa/literal.py
new file mode 100644
index 0000000..ed185af
--- /dev/null
+++ b/creactistore/_templates/lib/rdflib_/plugins/parsers/rdfa/literal.py
@@ -0,0 +1,180 @@
+# -*- coding: utf-8 -*-
+"""
+Implementation of the Literal handling. Details of the algorithm are described on
+U{RDFa Task Force's wiki page<http://www.w3.org/2006/07/SWD/wiki/RDFa/LiteralObject>}.
+
+@summary: RDFa Literal generation
+@requires: U{RDFLib package<http://rdflib_.net>}
+@organization: U{World Wide Web Consortium<http://www.w3.org>}
+@author: U{Ivan Herman<a href="http://www.w3.org/People/Ivan/">}
+@license: This software is available for use under the
+U{W3C® SOFTWARE NOTICE AND LICENSE<href="http://www.w3.org/Consortium/Legal/2002/copyright-software-20021231">}
+"""
+
+import re
+from rdflib_.namespace import RDF
+from rdflib_.term import Literal
+
+__all__ = ['generate_literal']
+
+XMLLiteral = RDF.XMLLiteral
+
+
+def __putBackEntities(str):
+ """Put 'back' entities for the '&', '<', and '>' characters, to produce kosher XML string.
+ Used by XML Literal
+ @param str: string to be converted
+ @return: string with entities
+ @rtype: string
+ """
+ return str.replace('&', '&amp;').replace('<', '&lt;').replace('>', '&gt;')
+
+#### The real meat...
+def generate_literal(node, graph, subject, state):
+ """Generate the literal the C{@property}, taking into account datatype, etc.
+ Note: this method is called only if the C{@property} is indeed present, no need to check.
+
+ This method is an encoding of the algorithm documented
+ U{task force's wiki page<http://www.w3.org/2006/07/SWD/wiki/RDFa/LiteralObject>}.
+
+ The method returns a value whether the literal is a 'normal' literal (regardless of its datatype)
+ or an XML Literal. The return value is True or False, respectively. This value is used to control whether
+ the parser should stop recursion. This also means that that if the literal is generated from @content,
+ the return value is False, regardless of the possible @datatype value.
+
+ @param node: DOM element node
+ @param graph: the (RDF) graph to add the properies to
+ @param subject: the RDFLib URIRef serving as a subject for the generated triples
+ @param state: the current state to be used for the CURIE-s
+ @type state: L{State.ExecutionContext}
+ @return: whether the literal is a 'normal' or an XML Literal (return value is True or False, respectively). Note that if the literal is generated from @content, the return value is False, regardless of the possible @datatype value.
+ @rtype: Boolean
+ """
+ def _get_literal(Pnode):
+ """
+ Get (recursively) the full text from a DOM Node.
+
+ @param Pnode: DOM Node
+ @return: string
+ """
+ rc = ""
+ for node in Pnode.childNodes:
+ if node.nodeType == node.TEXT_NODE:
+ rc = rc + node.data
+ elif node.nodeType == node.ELEMENT_NODE:
+ rc = rc + _get_literal(node)
+
+ # The decision of the group in February 2008 is not to normalize the result by default.
+ # This is reflected in the default value of the option
+ if state.options.space_preserve:
+ return rc
+ else:
+ return re.sub(r'(\r| |\n|\t)+', " ", rc).strip()
+ # end getLiteral
+
+ def _get_XML_literal(Pnode):
+ """
+ Get (recursively) the XML Literal content of a DOM Node. (Most of the processing is done
+ via a C{node.toxml} call of the xml minidom implementation.)
+
+ @param Pnode: DOM Node
+ @return: string
+ """
+ def collectPrefixes(prefixes, node):
+ def addPf(prefx, string):
+ pf = string.split(':')[0]
+ if pf != string and pf not in prefx : prefx.append(pf)
+ # edn addPf
+
+ # first the local name of the node
+ addPf(prefixes, node.tagName)
+ # get all the attributes and children
+ for child in node.childNodes:
+ if child.nodeType == node.ELEMENT_NODE:
+ collectPrefixes(prefixes, child)
+ elif child.nodeType == node.ATTRIBUTE_NODE:
+ addPf(prefixes, node.child.name)
+ # end collectPrefixes
+
+ rc = ""
+ prefixes = []
+ for node in Pnode.childNodes:
+ if node.nodeType == node.ELEMENT_NODE:
+ collectPrefixes(prefixes, node)
+
+ for node in Pnode.childNodes:
+ if node.nodeType == node.TEXT_NODE:
+ rc = rc + __putBackEntities(node.data)
+ elif node.nodeType == node.ELEMENT_NODE:
+ # Decorate the element with namespaces and lang values
+ for prefix in prefixes:
+ if prefix in state.ns and not node.hasAttribute("xmlns:%s" % prefix):
+ node.setAttribute("xmlns:%s" % prefix, "%s" % state.ns[prefix])
+ # Set the default namespace, if not done (and is available)
+ if not node.getAttribute("xmlns") and state.defaultNS != None:
+ node.setAttribute("xmlns", state.defaultNS)
+ # Get the lang, if necessary
+ if not node.getAttribute("xml:lang") and state.lang != None:
+ node.setAttribute("xml:lang", state.lang)
+ rc = rc + node.toxml()
+ return rc
+ # If XML Literals must be canonicalized for space, then this is the return line:
+ #return re.sub(r'(\r| |\n|\t)+', " ", rc).strip()
+ # end getXMLLiteral
+
+ # Most of the times the literal is a 'normal' one, ie, not an XML Literal
+ retval = True
+
+ # Get the Property URI-s
+ props = state.get_resources(node.getAttribute("property"), prop=True)
+
+ # Get, if exists, the value of @datatype, and figure out the language
+ datatype = None
+ dtset = False
+ lang = state.lang
+ if node.hasAttribute("datatype"):
+ dtset = True
+ dt = node.getAttribute("datatype")
+ if dt != "":
+ datatype = state.get_resource(dt)
+ lang = None
+
+ # The simple case: separate @content attribute
+ if node.hasAttribute("content"):
+ val = node.getAttribute("content")
+ object = Literal(node.getAttribute("content"), datatype=datatype, lang=lang)
+ # The value of datatype has been set, and the keyword paramaters take care of the rest
+ else:
+ # see if there *is* a datatype (even if it is empty!)
+ if dtset:
+ # yep. The Literal content is the pure text part of the current element:
+ # We have to check whether the specified datatype is, in fact, and
+ # explicit XML Literal
+ if datatype == XMLLiteral:
+ object = Literal(_get_XML_literal(node), datatype=XMLLiteral)
+ retval = False
+ else:
+ object = Literal(_get_literal(node), datatype=datatype, lang=lang)
+ else:
+ # no controlling @datatype. We have to see if there is markup in the contained
+ # element
+ if True in [ n.nodeType == node.ELEMENT_NODE for n in node.childNodes ]:
+ # yep, and XML Literal should be generated
+ object = Literal(_get_XML_literal(node), datatype=XMLLiteral)
+ retval = False
+ else:
+ val = _get_literal(node)
+ # At this point, there might be entities in the string that are returned as real characters by the dom
+ # implementation. That should be turned back
+ object = Literal(_get_literal(node), lang=lang)
+
+ # NOTE: rdflib_<2.5 didn't equal Literal with lang="", hence this check
+ # proably always passed?
+ # All tests pass with this check removed; going with that..
+ ## The object may be empty, for example in an ill-defined <meta> element...
+ if True:#object != "":
+ for prop in props:
+ graph.add((subject, prop, object))
+
+ return retval
+
diff --git a/creactistore/_templates/lib/rdflib_/plugins/parsers/rdfa/literal.py~ b/creactistore/_templates/lib/rdflib_/plugins/parsers/rdfa/literal.py~
new file mode 100644
index 0000000..2ab9b44
--- /dev/null
+++ b/creactistore/_templates/lib/rdflib_/plugins/parsers/rdfa/literal.py~
@@ -0,0 +1,180 @@
+# -*- coding: utf-8 -*-
+"""
+Implementation of the Literal handling. Details of the algorithm are described on
+U{RDFa Task Force's wiki page<http://www.w3.org/2006/07/SWD/wiki/RDFa/LiteralObject>}.
+
+@summary: RDFa Literal generation
+@requires: U{RDFLib package<http://rdflib.net>}
+@organization: U{World Wide Web Consortium<http://www.w3.org>}
+@author: U{Ivan Herman<a href="http://www.w3.org/People/Ivan/">}
+@license: This software is available for use under the
+U{W3C® SOFTWARE NOTICE AND LICENSE<href="http://www.w3.org/Consortium/Legal/2002/copyright-software-20021231">}
+"""
+
+import re
+from rdflib.namespace import RDF
+from rdflib.term import Literal
+
+__all__ = ['generate_literal']
+
+XMLLiteral = RDF.XMLLiteral
+
+
+def __putBackEntities(str):
+ """Put 'back' entities for the '&', '<', and '>' characters, to produce kosher XML string.
+ Used by XML Literal
+ @param str: string to be converted
+ @return: string with entities
+ @rtype: string
+ """
+ return str.replace('&', '&amp;').replace('<', '&lt;').replace('>', '&gt;')
+
+#### The real meat...
+def generate_literal(node, graph, subject, state):
+ """Generate the literal the C{@property}, taking into account datatype, etc.
+ Note: this method is called only if the C{@property} is indeed present, no need to check.
+
+ This method is an encoding of the algorithm documented
+ U{task force's wiki page<http://www.w3.org/2006/07/SWD/wiki/RDFa/LiteralObject>}.
+
+ The method returns a value whether the literal is a 'normal' literal (regardless of its datatype)
+ or an XML Literal. The return value is True or False, respectively. This value is used to control whether
+ the parser should stop recursion. This also means that that if the literal is generated from @content,
+ the return value is False, regardless of the possible @datatype value.
+
+ @param node: DOM element node
+ @param graph: the (RDF) graph to add the properies to
+ @param subject: the RDFLib URIRef serving as a subject for the generated triples
+ @param state: the current state to be used for the CURIE-s
+ @type state: L{State.ExecutionContext}
+ @return: whether the literal is a 'normal' or an XML Literal (return value is True or False, respectively). Note that if the literal is generated from @content, the return value is False, regardless of the possible @datatype value.
+ @rtype: Boolean
+ """
+ def _get_literal(Pnode):
+ """
+ Get (recursively) the full text from a DOM Node.
+
+ @param Pnode: DOM Node
+ @return: string
+ """
+ rc = ""
+ for node in Pnode.childNodes:
+ if node.nodeType == node.TEXT_NODE:
+ rc = rc + node.data
+ elif node.nodeType == node.ELEMENT_NODE:
+ rc = rc + _get_literal(node)
+
+ # The decision of the group in February 2008 is not to normalize the result by default.
+ # This is reflected in the default value of the option
+ if state.options.space_preserve:
+ return rc
+ else:
+ return re.sub(r'(\r| |\n|\t)+', " ", rc).strip()
+ # end getLiteral
+
+ def _get_XML_literal(Pnode):
+ """
+ Get (recursively) the XML Literal content of a DOM Node. (Most of the processing is done
+ via a C{node.toxml} call of the xml minidom implementation.)
+
+ @param Pnode: DOM Node
+ @return: string
+ """
+ def collectPrefixes(prefixes, node):
+ def addPf(prefx, string):
+ pf = string.split(':')[0]
+ if pf != string and pf not in prefx : prefx.append(pf)
+ # edn addPf
+
+ # first the local name of the node
+ addPf(prefixes, node.tagName)
+ # get all the attributes and children
+ for child in node.childNodes:
+ if child.nodeType == node.ELEMENT_NODE:
+ collectPrefixes(prefixes, child)
+ elif child.nodeType == node.ATTRIBUTE_NODE:
+ addPf(prefixes, node.child.name)
+ # end collectPrefixes
+
+ rc = ""
+ prefixes = []
+ for node in Pnode.childNodes:
+ if node.nodeType == node.ELEMENT_NODE:
+ collectPrefixes(prefixes, node)
+
+ for node in Pnode.childNodes:
+ if node.nodeType == node.TEXT_NODE:
+ rc = rc + __putBackEntities(node.data)
+ elif node.nodeType == node.ELEMENT_NODE:
+ # Decorate the element with namespaces and lang values
+ for prefix in prefixes:
+ if prefix in state.ns and not node.hasAttribute("xmlns:%s" % prefix):
+ node.setAttribute("xmlns:%s" % prefix, "%s" % state.ns[prefix])
+ # Set the default namespace, if not done (and is available)
+ if not node.getAttribute("xmlns") and state.defaultNS != None:
+ node.setAttribute("xmlns", state.defaultNS)
+ # Get the lang, if necessary
+ if not node.getAttribute("xml:lang") and state.lang != None:
+ node.setAttribute("xml:lang", state.lang)
+ rc = rc + node.toxml()
+ return rc
+ # If XML Literals must be canonicalized for space, then this is the return line:
+ #return re.sub(r'(\r| |\n|\t)+', " ", rc).strip()
+ # end getXMLLiteral
+
+ # Most of the times the literal is a 'normal' one, ie, not an XML Literal
+ retval = True
+
+ # Get the Property URI-s
+ props = state.get_resources(node.getAttribute("property"), prop=True)
+
+ # Get, if exists, the value of @datatype, and figure out the language
+ datatype = None
+ dtset = False
+ lang = state.lang
+ if node.hasAttribute("datatype"):
+ dtset = True
+ dt = node.getAttribute("datatype")
+ if dt != "":
+ datatype = state.get_resource(dt)
+ lang = None
+
+ # The simple case: separate @content attribute
+ if node.hasAttribute("content"):
+ val = node.getAttribute("content")
+ object = Literal(node.getAttribute("content"), datatype=datatype, lang=lang)
+ # The value of datatype has been set, and the keyword paramaters take care of the rest
+ else:
+ # see if there *is* a datatype (even if it is empty!)
+ if dtset:
+ # yep. The Literal content is the pure text part of the current element:
+ # We have to check whether the specified datatype is, in fact, and
+ # explicit XML Literal
+ if datatype == XMLLiteral:
+ object = Literal(_get_XML_literal(node), datatype=XMLLiteral)
+ retval = False
+ else:
+ object = Literal(_get_literal(node), datatype=datatype, lang=lang)
+ else:
+ # no controlling @datatype. We have to see if there is markup in the contained
+ # element
+ if True in [ n.nodeType == node.ELEMENT_NODE for n in node.childNodes ]:
+ # yep, and XML Literal should be generated
+ object = Literal(_get_XML_literal(node), datatype=XMLLiteral)
+ retval = False
+ else:
+ val = _get_literal(node)
+ # At this point, there might be entities in the string that are returned as real characters by the dom
+ # implementation. That should be turned back
+ object = Literal(_get_literal(node), lang=lang)
+
+ # NOTE: rdflib<2.5 didn't equal Literal with lang="", hence this check
+ # proably always passed?
+ # All tests pass with this check removed; going with that..
+ ## The object may be empty, for example in an ill-defined <meta> element...
+ if True:#object != "":
+ for prop in props:
+ graph.add((subject, prop, object))
+
+ return retval
+
diff --git a/creactistore/_templates/lib/rdflib_/plugins/parsers/rdfa/options.py b/creactistore/_templates/lib/rdflib_/plugins/parsers/rdfa/options.py
new file mode 100644
index 0000000..05abe3e
--- /dev/null
+++ b/creactistore/_templates/lib/rdflib_/plugins/parsers/rdfa/options.py
@@ -0,0 +1,173 @@
+# -*- coding: utf-8 -*-
+"""
+
+Options class: collect the possible options that govern the parsing possibilities. It also includes a reference and
+handling of the extra Graph for warnings, informations, errors.
+
+
+@summary: RDFa parser (distiller)
+@requires: U{RDFLib<http://rdflib_.net>}
+@requires: U{html5lib<http://code.google.com/p/html5lib/>} for the HTML5 parsing; note possible dependecies on Python's version on the project's web site
+@organization: U{World Wide Web Consortium<http://www.w3.org>}
+@author: U{Ivan Herman<a href="http://www.w3.org/People/Ivan/">}
+@license: This software is available for use under the
+U{W3C® SOFTWARE NOTICE AND LICENSE<href="http://www.w3.org/Consortium/Legal/2002/copyright-software-20021231">}
+
+"""
+
+import sys
+from rdflib_.graph import Graph
+from rdflib_.term import BNode, Literal, URIRef
+from rdflib_.namespace import Namespace
+
+__all__ = ['CommentGraph', 'Options']
+
+DIST_URI = "http://www.w3.org/2007/08/pyRdfa/distiller"
+DIST_NS = DIST_URI + '#'
+
+ns_errors = Namespace(DIST_NS)
+distillerURI = URIRef(DIST_URI)
+
+WARNING = 'warning'
+ERROR = 'error'
+INFO = 'info'
+DEBUG = 'debug'
+
+_message_properties = {
+ WARNING: ns_errors["warning"],
+ ERROR: ns_errors["error"],
+ INFO: ns_errors["information"],
+ DEBUG: ns_errors["debug"]
+}
+
+def _add_to_comment_graph(graph, msg, prop, uri):
+ """
+ Add a distiller message to the graph.
+
+ @param graph: RDFLib Graph
+ @param msg: message of an exception
+ @type msg: RDFLIb Literal
+ @param prop: the property to be used
+ @type prop: string, must be one of 'warning', 'error', 'info', 'debug'
+ @param uri: the top URI used to invoke the distiller
+ @type uri: URIRef
+ """
+ bnode = BNode()
+ graph.add((distillerURI, _message_properties[prop], bnode))
+ graph.add((bnode, ns_errors["onURI"], uri))
+ graph.add((bnode, ns_errors["message"], msg))
+
+
+class CommentGraph(object):
+ """Class to handle the 'comment graph', ie, the (RDF) Graph containing the warnings,
+ error messages, and informational messages.
+ """
+ def __init__(self, warnings = False):
+ """
+ @param warnings: whether a graph should effectively be set up, or whether this
+ should just be an empty shell for the various calls to work (without effect)
+ """
+ if warnings:
+ self.graph = Graph()
+ else:
+ self.graph = None
+ self.accumulated_literals = []
+ self.baseURI = None
+
+ def _add_triple(self, msg, prop):
+ obj = Literal(msg)
+ if self.baseURI == None:
+ self.accumulated_literals.append((obj,prop))
+ elif self.graph != None:
+ _add_to_comment_graph(self.graph, obj, prop, self.baseURI)
+
+ def set_base_URI(self, URI):
+ """Set the base URI for the comment triples.
+
+ Note that this method I{must} be called at some point to complete the triples. Without it the triples
+ added via L{add_warning<CommentGraph.add_warning>}, L{add_info<CommentGraph.add_info>}, etc, will not be added to the final graph.
+
+ @param URI: URIRef for the subject of the comments
+ """
+ self.baseURI = URI
+ if self.graph != None:
+ for obj, prop in self.accumulated_literals:
+ _add_to_comment_graph(self.graph, obj, prop, self.baseURI)
+ self.accumulated_literals = []
+
+ def add_warning(self, txt):
+ """Add a warning. A comment triplet is added to the separate "warning" graph.
+ @param txt: the warning text. It will be preceded by the string "==== pyRdfa Warning ==== "
+ """
+ self._add_triple(txt, WARNING)
+
+ def add_info(self, txt):
+ """Add an informational comment. A comment triplet is added to the separate "warning" graph.
+ @param txt: the information text. It will be preceded by the string "==== pyRdfa information ==== "
+ """
+ self._add_triple(txt, INFO)
+
+ def add_error(self, txt):
+ """Add an error comment. A comment triplet is added to the separate "warning" graph.
+ @param txt: the information text. It will be preceded by the string "==== pyRdfa information ==== "
+ """
+ self._add_triple(txt, ERROR)
+
+ def _add_debug(self, txt):
+ self._add_triple(txt, DEBUG)
+
+
+GENERIC_XML = 0
+XHTML_RDFA = 1
+HTML5_RDFA = 2
+
+class Options(object):
+ """Settable options. An instance of this class is stored in
+ the L{execution context<ExecutionContext>} of the parser.
+
+ @ivar space_preserve: whether plain literals should preserve spaces at output or not
+ @type space_preserve: Boolean
+ @ivar comment_graph: Graph for the storage of warnings
+ @type comment_graph: L{CommentGraph}
+ @ivar warnings: whether warnings should be generated or not
+ @type warnings: Boolean
+ @ivar transformers: extra transformers
+ @type transformers: list
+ @type host_language: the host language for the RDFa attributes. Default is XHTML_RDFA, but it can be GENERIC_XML and HTML5_RDFA
+ @ivar host_language: integer (logically: an enumeration)
+ @ivar lax: whether a 'lax' parsing of XHTML (ie, HTML5) is allowed. This means that the value of the host language might change run time
+ @type lax: Boolean
+ """
+ def __init__(self, warnings=False, space_preserve=True, transformers=[], xhtml=True, lax=False):
+ """
+ @param space_preserve: whether plain literals should preserve spaces at output or not
+ @type space_preserve: Boolean
+ @param warnings: whether warnings should be generated or not
+ @type warnings: Boolean
+ @param transformers: extra transformers
+ @type transformers: list
+ @param xhtml: initial value for the host language. If True, the value is set to XHTML_RDFA. Note that run-time the class variable might be set ot HTML5_RDFA, depending on the value of the lax flag and the result of parsing.
+ @type xhtml: Booelan
+ @param lax: whether a 'lax' parsing of XHTML (ie, HTML5) is allowed. This means that the value of the host language might change run time
+ @type lax: Boolean
+ """
+ self.space_preserve = space_preserve
+ self.transformers = transformers
+ self.comment_graph = CommentGraph(warnings)
+ self.warnings = warnings
+ self.lax = lax
+ if xhtml:
+ self.host_language = XHTML_RDFA
+ else:
+ self.host_language = GENERIC_XML
+
+ def __str__(self):
+ retval = """Current options:
+ space_preserve : %s
+ warnings : %s
+ lax parsing : %s
+ host language : %s
+ """
+ return retval % (self.space_preserve, self.warnings, self.lax, self.host_language)
+
+
diff --git a/creactistore/_templates/lib/rdflib_/plugins/parsers/rdfa/options.py~ b/creactistore/_templates/lib/rdflib_/plugins/parsers/rdfa/options.py~
new file mode 100644
index 0000000..0329969
--- /dev/null
+++ b/creactistore/_templates/lib/rdflib_/plugins/parsers/rdfa/options.py~
@@ -0,0 +1,173 @@
+# -*- coding: utf-8 -*-
+"""
+
+Options class: collect the possible options that govern the parsing possibilities. It also includes a reference and
+handling of the extra Graph for warnings, informations, errors.
+
+
+@summary: RDFa parser (distiller)
+@requires: U{RDFLib<http://rdflib.net>}
+@requires: U{html5lib<http://code.google.com/p/html5lib/>} for the HTML5 parsing; note possible dependecies on Python's version on the project's web site
+@organization: U{World Wide Web Consortium<http://www.w3.org>}
+@author: U{Ivan Herman<a href="http://www.w3.org/People/Ivan/">}
+@license: This software is available for use under the
+U{W3C® SOFTWARE NOTICE AND LICENSE<href="http://www.w3.org/Consortium/Legal/2002/copyright-software-20021231">}
+
+"""
+
+import sys
+from rdflib.graph import Graph
+from rdflib.term import BNode, Literal, URIRef
+from rdflib.namespace import Namespace
+
+__all__ = ['CommentGraph', 'Options']
+
+DIST_URI = "http://www.w3.org/2007/08/pyRdfa/distiller"
+DIST_NS = DIST_URI + '#'
+
+ns_errors = Namespace(DIST_NS)
+distillerURI = URIRef(DIST_URI)
+
+WARNING = 'warning'
+ERROR = 'error'
+INFO = 'info'
+DEBUG = 'debug'
+
+_message_properties = {
+ WARNING: ns_errors["warning"],
+ ERROR: ns_errors["error"],
+ INFO: ns_errors["information"],
+ DEBUG: ns_errors["debug"]
+}
+
+def _add_to_comment_graph(graph, msg, prop, uri):
+ """
+ Add a distiller message to the graph.
+
+ @param graph: RDFLib Graph
+ @param msg: message of an exception
+ @type msg: RDFLIb Literal
+ @param prop: the property to be used
+ @type prop: string, must be one of 'warning', 'error', 'info', 'debug'
+ @param uri: the top URI used to invoke the distiller
+ @type uri: URIRef
+ """
+ bnode = BNode()
+ graph.add((distillerURI, _message_properties[prop], bnode))
+ graph.add((bnode, ns_errors["onURI"], uri))
+ graph.add((bnode, ns_errors["message"], msg))
+
+
+class CommentGraph(object):
+ """Class to handle the 'comment graph', ie, the (RDF) Graph containing the warnings,
+ error messages, and informational messages.
+ """
+ def __init__(self, warnings = False):
+ """
+ @param warnings: whether a graph should effectively be set up, or whether this
+ should just be an empty shell for the various calls to work (without effect)
+ """
+ if warnings:
+ self.graph = Graph()
+ else:
+ self.graph = None
+ self.accumulated_literals = []
+ self.baseURI = None
+
+ def _add_triple(self, msg, prop):
+ obj = Literal(msg)
+ if self.baseURI == None:
+ self.accumulated_literals.append((obj,prop))
+ elif self.graph != None:
+ _add_to_comment_graph(self.graph, obj, prop, self.baseURI)
+
+ def set_base_URI(self, URI):
+ """Set the base URI for the comment triples.
+
+ Note that this method I{must} be called at some point to complete the triples. Without it the triples
+ added via L{add_warning<CommentGraph.add_warning>}, L{add_info<CommentGraph.add_info>}, etc, will not be added to the final graph.
+
+ @param URI: URIRef for the subject of the comments
+ """
+ self.baseURI = URI
+ if self.graph != None:
+ for obj, prop in self.accumulated_literals:
+ _add_to_comment_graph(self.graph, obj, prop, self.baseURI)
+ self.accumulated_literals = []
+
+ def add_warning(self, txt):
+ """Add a warning. A comment triplet is added to the separate "warning" graph.
+ @param txt: the warning text. It will be preceded by the string "==== pyRdfa Warning ==== "
+ """
+ self._add_triple(txt, WARNING)
+
+ def add_info(self, txt):
+ """Add an informational comment. A comment triplet is added to the separate "warning" graph.
+ @param txt: the information text. It will be preceded by the string "==== pyRdfa information ==== "
+ """
+ self._add_triple(txt, INFO)
+
+ def add_error(self, txt):
+ """Add an error comment. A comment triplet is added to the separate "warning" graph.
+ @param txt: the information text. It will be preceded by the string "==== pyRdfa information ==== "
+ """
+ self._add_triple(txt, ERROR)
+
+ def _add_debug(self, txt):
+ self._add_triple(txt, DEBUG)
+
+
+GENERIC_XML = 0
+XHTML_RDFA = 1
+HTML5_RDFA = 2
+
+class Options(object):
+ """Settable options. An instance of this class is stored in
+ the L{execution context<ExecutionContext>} of the parser.
+
+ @ivar space_preserve: whether plain literals should preserve spaces at output or not
+ @type space_preserve: Boolean
+ @ivar comment_graph: Graph for the storage of warnings
+ @type comment_graph: L{CommentGraph}
+ @ivar warnings: whether warnings should be generated or not
+ @type warnings: Boolean
+ @ivar transformers: extra transformers
+ @type transformers: list
+ @type host_language: the host language for the RDFa attributes. Default is XHTML_RDFA, but it can be GENERIC_XML and HTML5_RDFA
+ @ivar host_language: integer (logically: an enumeration)
+ @ivar lax: whether a 'lax' parsing of XHTML (ie, HTML5) is allowed. This means that the value of the host language might change run time
+ @type lax: Boolean
+ """
+ def __init__(self, warnings=False, space_preserve=True, transformers=[], xhtml=True, lax=False):
+ """
+ @param space_preserve: whether plain literals should preserve spaces at output or not
+ @type space_preserve: Boolean
+ @param warnings: whether warnings should be generated or not
+ @type warnings: Boolean
+ @param transformers: extra transformers
+ @type transformers: list
+ @param xhtml: initial value for the host language. If True, the value is set to XHTML_RDFA. Note that run-time the class variable might be set ot HTML5_RDFA, depending on the value of the lax flag and the result of parsing.
+ @type xhtml: Booelan
+ @param lax: whether a 'lax' parsing of XHTML (ie, HTML5) is allowed. This means that the value of the host language might change run time
+ @type lax: Boolean
+ """
+ self.space_preserve = space_preserve
+ self.transformers = transformers
+ self.comment_graph = CommentGraph(warnings)
+ self.warnings = warnings
+ self.lax = lax
+ if xhtml:
+ self.host_language = XHTML_RDFA
+ else:
+ self.host_language = GENERIC_XML
+
+ def __str__(self):
+ retval = """Current options:
+ space_preserve : %s
+ warnings : %s
+ lax parsing : %s
+ host language : %s
+ """
+ return retval % (self.space_preserve, self.warnings, self.lax, self.host_language)
+
+
diff --git a/creactistore/_templates/lib/rdflib_/plugins/parsers/rdfa/parse.py b/creactistore/_templates/lib/rdflib_/plugins/parsers/rdfa/parse.py
new file mode 100644
index 0000000..b786f7f
--- /dev/null
+++ b/creactistore/_templates/lib/rdflib_/plugins/parsers/rdfa/parse.py
@@ -0,0 +1,200 @@
+# -*- coding: utf-8 -*-
+"""
+The core parsing function of RDFa. Some details are
+put into other modules to make it clearer to update/modify (eg, generation of literals, or managing the current state).
+
+@summary: RDFa core parser processing step
+@requires: U{RDFLib package<http://rdflib_.net>}
+@organization: U{World Wide Web Consortium<http://www.w3.org>}
+@author: U{Ivan Herman<a href="http://www.w3.org/People/Ivan/">}
+@license: This software is available for use under the
+U{W3C® SOFTWARE NOTICE AND LICENSE<href="http://www.w3.org/Consortium/Legal/2002/copyright-software-20021231">}
+"""
+
+from rdflib_.term import BNode, URIRef
+from rdflib_.namespace import RDF
+
+from rdflib_.plugins.parsers.rdfa.state import ExecutionContext
+from rdflib_.plugins.parsers.rdfa.literal import generate_literal
+from rdflib_.plugins.parsers.rdfa.embeddedrdf import handle_embeddedRDF
+from rdflib_.plugins.parsers.rdfa.options import GENERIC_XML, XHTML_RDFA, HTML5_RDFA
+
+__all__ = ['parse_one_node']
+
+def parse_one_node(node, graph, parent_object, incoming_state, parent_incomplete_triples):
+ """The (recursive) step of handling a single node. See the
+ U{RDFa syntax document<http://www.w3.org/TR/rdfa-syntax>} for further details.
+
+ @param node: the DOM node to handle
+ @param graph: the RDF graph
+ @type graph: RDFLib's Graph object instance
+ @param parent_object: the parent's object, as an RDFLib URIRef
+ @param incoming_state: the inherited state (namespaces, lang, etc)
+ @type incoming_state: L{State.ExecutionContext}
+ @param parent_incomplete_triples: list of hanging triples (the missing resource set to None) to be handled (or not)
+ by the current node.
+ @return: whether the caller has to complete it's parent's incomplete triples
+ @rtype: Boolean
+ """
+ def _get_resources_for_attr(attr):
+ """Get a series of resources encoded via CURIE-s for an attribute on a specific node.
+ @param attr: the name of the attribute
+ @return: a list of RDFLib URIRef instances
+ """
+ if not node.hasAttribute(attr):
+ return []
+ else:
+ rel = (attr == "rel") or (attr == "rev")
+ prop = (attr == "property")
+ return state.get_resources(node.getAttribute(attr), rel, prop)
+
+ # Update the state. This means, for example, the possible local settings of
+ # namespaces and lang
+ state = ExecutionContext(node, graph, inherited_state=incoming_state)
+
+ #---------------------------------------------------------------------------------
+ # Handle the special case for embedded RDF, eg, in SVG1.2.
+ # This may add some triples to the target graph that does not originate from RDFa parsing
+ # If the function return TRUE, that means that an rdf:RDF has been found. No
+ # RDFa parsing should be done on that subtree, so we simply return...
+ if state.options.host_language == GENERIC_XML and node.nodeType == node.ELEMENT_NODE and handle_embeddedRDF(node, graph, state):
+ return
+
+ #---------------------------------------------------------------------------------
+ # First, let us check whether there is anything to do at all. Ie,
+ # whether there is any relevant RDFa specific attribute on the element
+ #
+ if not _has_one_of_attributes(node, "href", "resource", "about", "property", "rel", "rev", "typeof", "src"):
+ # nop, there is nothing to do here, just go down the tree and return...
+ for n in node.childNodes:
+ if n.nodeType == node.ELEMENT_NODE : parse_one_node(n, graph, parent_object, state, parent_incomplete_triples)
+ return
+
+
+ #-----------------------------------------------------------------
+ # The goal is to establish the subject and object for local processing
+ # The behaviour is slightly different depending on the presense or not
+ # of the @rel/@rev attributes
+ current_subject = None
+ current_object = None
+
+ if _has_one_of_attributes(node, "rel", "rev"):
+ # in this case there is the notion of 'left' and 'right' of @rel/@rev
+ # in establishing the new Subject and the objectResource
+
+ # set first the subject
+ if node.hasAttribute("about"):
+ current_subject = state.get_Curie_ref(node.getAttribute("about"))
+ elif node.hasAttribute("src"):
+ current_subject = state.get_URI_ref(node.getAttribute("src"))
+ elif node.hasAttribute("typeof"):
+ current_subject = BNode()
+
+ # get_URI_ref may return None in case of an illegal Curie, so
+ # we have to be careful here, not use only an 'else'
+ if current_subject == None:
+ current_subject = parent_object
+
+ # set the object resource
+ if node.hasAttribute("resource"):
+ current_object = state.get_Curie_ref(node.getAttribute("resource"))
+ elif node.hasAttribute("href"):
+ current_object = state.get_URI_ref(node.getAttribute("href"))
+ else:
+ # in this case all the various 'resource' setting attributes
+ # behave identically, except that their value might be different
+ # in terms of CURIE-s and they also have their own priority, of course
+ if node.hasAttribute("about"):
+ current_subject = state.get_Curie_ref(node.getAttribute("about"))
+ elif node.hasAttribute("src"):
+ current_subject = state.get_URI_ref(node.getAttribute("src"))
+ elif node.hasAttribute("resource"):
+ current_subject = state.get_Curie_ref(node.getAttribute("resource"))
+ elif node.hasAttribute("href"):
+ current_subject = state.get_URI_ref(node.getAttribute("href"))
+ elif node.hasAttribute("typeof"):
+ current_subject = BNode()
+
+ # get_URI_ref may return None in case of an illegal Curie, so
+ # we have to be careful here, not use only an 'else'
+ if current_subject == None:
+ current_subject = parent_object
+
+ # in this case no non-literal triples will be generated, so the
+ # only role of the current_objectResource is to be transferred to
+ # the children node
+ current_object = current_subject
+
+ # ---------------------------------------------------------------------
+ # The possible typeof indicates a number of type statements on the newSubject
+ for defined_type in _get_resources_for_attr("typeof"):
+ graph.add((current_subject, RDF.type, defined_type))
+
+ # ---------------------------------------------------------------------
+ # In case of @rel/@rev, either triples or incomplete triples are generated
+ # the (possible) incomplete triples are collected, to be forwarded to the children
+ incomplete_triples = []
+ for prop in _get_resources_for_attr("rel"):
+ theTriple = (current_subject, prop, current_object)
+ if current_object != None:
+ graph.add(theTriple)
+ else:
+ incomplete_triples.append(theTriple)
+ for prop in _get_resources_for_attr("rev"):
+ theTriple = (current_object, prop, current_subject)
+ if current_object != None:
+ graph.add(theTriple)
+ else:
+ incomplete_triples.append(theTriple)
+
+ # ----------------------------------------------------------------------
+ # Generation of the literal values. The newSubject is the subject
+ # A particularity of property is that it stops the parsing down the DOM tree if an XML Literal is generated,
+ # because everything down there is part of the generated literal. For this purpose the recurse flag is set (and used later
+ # in the parsing process).
+ if node.hasAttribute("property"):
+ # Generate the literal. It has been put it into a separate module to make it more managable
+ # the overall return value should be set to true if any valid triple has been generated
+ recurse = generate_literal(node, graph, current_subject, state)
+ else:
+ recurse = True
+
+ # ----------------------------------------------------------------------
+ # Setting the current object to a bnode is setting up a possible resource
+ # for the incomplete triples downwards
+ if current_object == None:
+ object_to_children = BNode()
+ else:
+ object_to_children = current_object
+
+ #-----------------------------------------------------------------------
+ # Here is the recursion step for all the children
+ if recurse:
+ for n in node.childNodes:
+ if n.nodeType == node.ELEMENT_NODE:
+ parse_one_node(n, graph, object_to_children, state, incomplete_triples)
+
+ # ---------------------------------------------------------------------
+ # At this point, the parent's incomplete triples may be completed
+ for s, p, o in parent_incomplete_triples:
+ if s == None: s = current_subject
+ if o == None: o = current_subject
+ graph.add((s, p, o))
+
+ # -------------------------------------------------------------------
+ # This should be it...
+ # -------------------------------------------------------------------
+ return
+
+
+def _has_one_of_attributes(node, *args):
+ """
+ Check whether one of the listed attributes is present on a (DOM) node.
+ @param node: DOM element node
+ @param args: possible attribute names
+ @return: True or False
+ @rtype: Boolean
+ """
+ return True in [ node.hasAttribute(attr) for attr in args ]
+
+
diff --git a/creactistore/_templates/lib/rdflib_/plugins/parsers/rdfa/parse.py~ b/creactistore/_templates/lib/rdflib_/plugins/parsers/rdfa/parse.py~
new file mode 100644
index 0000000..d5b411f
--- /dev/null
+++ b/creactistore/_templates/lib/rdflib_/plugins/parsers/rdfa/parse.py~
@@ -0,0 +1,200 @@
+# -*- coding: utf-8 -*-
+"""
+The core parsing function of RDFa. Some details are
+put into other modules to make it clearer to update/modify (eg, generation of literals, or managing the current state).
+
+@summary: RDFa core parser processing step
+@requires: U{RDFLib package<http://rdflib.net>}
+@organization: U{World Wide Web Consortium<http://www.w3.org>}
+@author: U{Ivan Herman<a href="http://www.w3.org/People/Ivan/">}
+@license: This software is available for use under the
+U{W3C® SOFTWARE NOTICE AND LICENSE<href="http://www.w3.org/Consortium/Legal/2002/copyright-software-20021231">}
+"""
+
+from rdflib.term import BNode, URIRef
+from rdflib.namespace import RDF
+
+from rdflib.plugins.parsers.rdfa.state import ExecutionContext
+from rdflib.plugins.parsers.rdfa.literal import generate_literal
+from rdflib.plugins.parsers.rdfa.embeddedrdf import handle_embeddedRDF
+from rdflib.plugins.parsers.rdfa.options import GENERIC_XML, XHTML_RDFA, HTML5_RDFA
+
+__all__ = ['parse_one_node']
+
+def parse_one_node(node, graph, parent_object, incoming_state, parent_incomplete_triples):
+ """The (recursive) step of handling a single node. See the
+ U{RDFa syntax document<http://www.w3.org/TR/rdfa-syntax>} for further details.
+
+ @param node: the DOM node to handle
+ @param graph: the RDF graph
+ @type graph: RDFLib's Graph object instance
+ @param parent_object: the parent's object, as an RDFLib URIRef
+ @param incoming_state: the inherited state (namespaces, lang, etc)
+ @type incoming_state: L{State.ExecutionContext}
+ @param parent_incomplete_triples: list of hanging triples (the missing resource set to None) to be handled (or not)
+ by the current node.
+ @return: whether the caller has to complete it's parent's incomplete triples
+ @rtype: Boolean
+ """
+ def _get_resources_for_attr(attr):
+ """Get a series of resources encoded via CURIE-s for an attribute on a specific node.
+ @param attr: the name of the attribute
+ @return: a list of RDFLib URIRef instances
+ """
+ if not node.hasAttribute(attr):
+ return []
+ else:
+ rel = (attr == "rel") or (attr == "rev")
+ prop = (attr == "property")
+ return state.get_resources(node.getAttribute(attr), rel, prop)
+
+ # Update the state. This means, for example, the possible local settings of
+ # namespaces and lang
+ state = ExecutionContext(node, graph, inherited_state=incoming_state)
+
+ #---------------------------------------------------------------------------------
+ # Handle the special case for embedded RDF, eg, in SVG1.2.
+ # This may add some triples to the target graph that does not originate from RDFa parsing
+ # If the function return TRUE, that means that an rdf:RDF has been found. No
+ # RDFa parsing should be done on that subtree, so we simply return...
+ if state.options.host_language == GENERIC_XML and node.nodeType == node.ELEMENT_NODE and handle_embeddedRDF(node, graph, state):
+ return
+
+ #---------------------------------------------------------------------------------
+ # First, let us check whether there is anything to do at all. Ie,
+ # whether there is any relevant RDFa specific attribute on the element
+ #
+ if not _has_one_of_attributes(node, "href", "resource", "about", "property", "rel", "rev", "typeof", "src"):
+ # nop, there is nothing to do here, just go down the tree and return...
+ for n in node.childNodes:
+ if n.nodeType == node.ELEMENT_NODE : parse_one_node(n, graph, parent_object, state, parent_incomplete_triples)
+ return
+
+
+ #-----------------------------------------------------------------
+ # The goal is to establish the subject and object for local processing
+ # The behaviour is slightly different depending on the presense or not
+ # of the @rel/@rev attributes
+ current_subject = None
+ current_object = None
+
+ if _has_one_of_attributes(node, "rel", "rev"):
+ # in this case there is the notion of 'left' and 'right' of @rel/@rev
+ # in establishing the new Subject and the objectResource
+
+ # set first the subject
+ if node.hasAttribute("about"):
+ current_subject = state.get_Curie_ref(node.getAttribute("about"))
+ elif node.hasAttribute("src"):
+ current_subject = state.get_URI_ref(node.getAttribute("src"))
+ elif node.hasAttribute("typeof"):
+ current_subject = BNode()
+
+ # get_URI_ref may return None in case of an illegal Curie, so
+ # we have to be careful here, not use only an 'else'
+ if current_subject == None:
+ current_subject = parent_object
+
+ # set the object resource
+ if node.hasAttribute("resource"):
+ current_object = state.get_Curie_ref(node.getAttribute("resource"))
+ elif node.hasAttribute("href"):
+ current_object = state.get_URI_ref(node.getAttribute("href"))
+ else:
+ # in this case all the various 'resource' setting attributes
+ # behave identically, except that their value might be different
+ # in terms of CURIE-s and they also have their own priority, of course
+ if node.hasAttribute("about"):
+ current_subject = state.get_Curie_ref(node.getAttribute("about"))
+ elif node.hasAttribute("src"):
+ current_subject = state.get_URI_ref(node.getAttribute("src"))
+ elif node.hasAttribute("resource"):
+ current_subject = state.get_Curie_ref(node.getAttribute("resource"))
+ elif node.hasAttribute("href"):
+ current_subject = state.get_URI_ref(node.getAttribute("href"))
+ elif node.hasAttribute("typeof"):
+ current_subject = BNode()
+
+ # get_URI_ref may return None in case of an illegal Curie, so
+ # we have to be careful here, not use only an 'else'
+ if current_subject == None:
+ current_subject = parent_object
+
+ # in this case no non-literal triples will be generated, so the
+ # only role of the current_objectResource is to be transferred to
+ # the children node
+ current_object = current_subject
+
+ # ---------------------------------------------------------------------
+ # The possible typeof indicates a number of type statements on the newSubject
+ for defined_type in _get_resources_for_attr("typeof"):
+ graph.add((current_subject, RDF.type, defined_type))
+
+ # ---------------------------------------------------------------------
+ # In case of @rel/@rev, either triples or incomplete triples are generated
+ # the (possible) incomplete triples are collected, to be forwarded to the children
+ incomplete_triples = []
+ for prop in _get_resources_for_attr("rel"):
+ theTriple = (current_subject, prop, current_object)
+ if current_object != None:
+ graph.add(theTriple)
+ else:
+ incomplete_triples.append(theTriple)
+ for prop in _get_resources_for_attr("rev"):
+ theTriple = (current_object, prop, current_subject)
+ if current_object != None:
+ graph.add(theTriple)
+ else:
+ incomplete_triples.append(theTriple)
+
+ # ----------------------------------------------------------------------
+ # Generation of the literal values. The newSubject is the subject
+ # A particularity of property is that it stops the parsing down the DOM tree if an XML Literal is generated,
+ # because everything down there is part of the generated literal. For this purpose the recurse flag is set (and used later
+ # in the parsing process).
+ if node.hasAttribute("property"):
+ # Generate the literal. It has been put it into a separate module to make it more managable
+ # the overall return value should be set to true if any valid triple has been generated
+ recurse = generate_literal(node, graph, current_subject, state)
+ else:
+ recurse = True
+
+ # ----------------------------------------------------------------------
+ # Setting the current object to a bnode is setting up a possible resource
+ # for the incomplete triples downwards
+ if current_object == None:
+ object_to_children = BNode()
+ else:
+ object_to_children = current_object
+
+ #-----------------------------------------------------------------------
+ # Here is the recursion step for all the children
+ if recurse:
+ for n in node.childNodes:
+ if n.nodeType == node.ELEMENT_NODE:
+ parse_one_node(n, graph, object_to_children, state, incomplete_triples)
+
+ # ---------------------------------------------------------------------
+ # At this point, the parent's incomplete triples may be completed
+ for s, p, o in parent_incomplete_triples:
+ if s == None: s = current_subject
+ if o == None: o = current_subject
+ graph.add((s, p, o))
+
+ # -------------------------------------------------------------------
+ # This should be it...
+ # -------------------------------------------------------------------
+ return
+
+
+def _has_one_of_attributes(node, *args):
+ """
+ Check whether one of the listed attributes is present on a (DOM) node.
+ @param node: DOM element node
+ @param args: possible attribute names
+ @return: True or False
+ @rtype: Boolean
+ """
+ return True in [ node.hasAttribute(attr) for attr in args ]
+
+
diff --git a/creactistore/_templates/lib/rdflib_/plugins/parsers/rdfa/state.py b/creactistore/_templates/lib/rdflib_/plugins/parsers/rdfa/state.py
new file mode 100644
index 0000000..0e6be08
--- /dev/null
+++ b/creactistore/_templates/lib/rdflib_/plugins/parsers/rdfa/state.py
@@ -0,0 +1,434 @@
+# -*- coding: utf-8 -*-
+"""
+Parser's execution context (a.k.a. state) object and handling. The state includes:
+
+ - dictionary for namespaces. Keys are the namespace prefixes, values are RDFLib Namespace instances
+ - language, retrieved from C{@xml:lang}
+ - URI base, determined by <base> (or set explicitly). This is a little bit superfluous, because the current RDFa syntax does not make use of C{@xml:base}; ie, this could be a global value. But the structure is prepared to add C{@xml:base} easily, if needed.
+ - options, in the form of an L{Options<pyRdfa.Options>} instance
+
+The execution context object is also used to turn relative URI-s and CURIES into real URI references.
+
+@summary: RDFa core parser processing step
+@requires: U{RDFLib package<http://rdflib_.net>}
+@organization: U{World Wide Web Consortium<http://www.w3.org>}
+@author: U{Ivan Herman<a href="http://www.w3.org/People/Ivan/">}
+@license: This software is available for use under the
+U{W3C® SOFTWARE NOTICE AND LICENSE<href="http://www.w3.org/Consortium/Legal/2002/copyright-software-20021231">}
+
+@var XHTML_PREFIX: prefix for the XHTML vocabulary namespace
+@var XHTML_URI: URI prefix of the XHTML vocabulary
+@var RDFa_PROFILE: the official RDFa profile URI
+@var RDFa_VERSION: the official version string of RDFa
+@var usual_protocols: list of "usual" protocols (used to generate warnings when CURIES are not protected)
+@var _predefined_rel: list of predefined C{@rev} and C{@rel} values that should be mapped onto the XHTML vocabulary URI-s.
+@var _predefined_property: list of predefined C{@property} values that should be mapped onto the XHTML vocabulary URI-s. (At present, this list is empty, but this has been an ongoing question in the group, so the I{mechanism} of checking is still there.)
+@var __bnodes: dictionary of blank node names to real blank node
+@var __empty_bnode: I{The} Bnode to be associated with the CURIE of the form "C{_:}".
+"""
+
+from rdflib_.namespace import Namespace, RDF, RDFS
+from rdflib_.term import BNode, URIRef
+from rdflib_.plugins.parsers.rdfa.options import Options, GENERIC_XML, XHTML_RDFA, HTML5_RDFA
+
+import re
+import random
+import urlparse
+
+__all__ = ['ExecutionContext']
+
+RDFa_PROFILE = "http://www.w3.org/1999/xhtml/vocab"
+RDFa_VERSION = "XHTML+RDFa 1.0"
+RDFa_PublicID = "-//W3C//DTD XHTML+RDFa 1.0//EN"
+RDFa_SystemID = "http://www.w3.org/MarkUp/DTD/xhtml-rdfa-1.dtd"
+
+usual_protocols = ["http", "https", "mailto", "ftp", "urn", "gopher", "tel", "ldap", "doi", "news"]
+
+####Predefined @rel/@rev/@property values
+# predefined values for the @rel and @rev values. These are considered to be part of a specific
+# namespace, defined by the RDFa document.
+# At the moment, there are no predefined @property values, but the code is there in case
+# some will be defined
+XHTML_PREFIX = "xhv"
+XHTML_URI = "http://www.w3.org/1999/xhtml/vocab#"
+
+_predefined_rel = ['alternate', 'appendix', 'cite', 'bookmark', 'chapter', 'contents',
+'copyright', 'glossary', 'help', 'icon', 'index', 'meta', 'next', 'p3pv1', 'prev',
+'role', 'section', 'subsection', 'start', 'license', 'up', 'last', 'stylesheet', 'first', 'top']
+
+_predefined_property = []
+
+#### Managing blank nodes for CURIE-s
+__bnodes = {}
+__empty_bnode = BNode()
+def _get_bnode_from_Curie(var):
+ """
+ 'Var' gives the string after the coloumn in a CURIE of the form C{_:XXX}. If this variable has been used
+ before, then the corresponding BNode is returned; otherwise a new BNode is created and
+ associated to that value.
+ @param var: CURIE BNode identifier
+ @return: BNode
+ """
+ if len(var) == 0:
+ return __empty_bnode
+ if var in __bnodes:
+ return __bnodes[var]
+ else:
+ retval = BNode()
+ __bnodes[var] = retval
+ return retval
+
+#### Quote URI-s
+import urllib
+# 'safe' characters for the URI quoting, ie, characters that can safely stay as they are. Other
+# special characters are converted to their %.. equivalents for namespace prefixes
+_unquotedChars = ':/\?=#'
+_warnChars = [' ', '\n', '\r', '\t']
+def _quote(uri, options):
+ """
+ 'quote' a URI, ie, exchange special characters for their '%..' equivalents. Some of the characters
+ may stay as they are (listed in L{_unquotedChars}. If one of the characters listed in L{_warnChars}
+ is also in the uri, an extra warning is also generated.
+ @param uri: URI
+ @param options:
+ @type options: L{Options<pyRdfa.Options>}
+ """
+ suri = uri.strip()
+ for c in _warnChars:
+ if suri.find(c) != -1:
+ if options != None:
+ options.comment_graph.add_warning('Unusual character in uri:%s; possible error?' % suri)
+ break
+ return urllib.quote(suri, _unquotedChars)
+
+
+#### Core Class definition
+class ExecutionContext(object):
+ """State at a specific node, including the current set
+ of namespaces in the RDFLib sense, the
+ current language, and the base. The class is also used to interpret URI-s and CURIE-s to produce
+ URI references for RDFLib.
+
+ @ivar options: reference to the overall options
+ @type ivar: L{Options.Options}
+ @ivar base: the 'base' URI
+ @ivar defaultNS: default namespace
+ @ivar lang: language tag (possibly None)
+ @ivar ns: dictionary of namespaces
+ @type ns: dictionary, each value is an RDFLib Namespace object
+
+ """
+ def __init__(self, node, graph, inherited_state=None, base="", options=None):
+ """
+ @param node: the current DOM Node
+ @param graph: the RDFLib Graph
+ @keyword inherited_state: the state as inherited
+ from upper layers. This inherited_state is mixed with the state information
+ retrieved from the current node.
+ @type inherited_state: L{State.ExecutionContext}
+ @keyword base: string denoting the base URI for the specific node. This overrides the possible
+ base inherited from the upper layers. The
+ current XHTML+RDFa syntax does not allow the usage of C{@xml:base}, but SVG1.2 does, so this is
+ necessary for SVG (and other possible XML dialects that accept C{@xml:base})
+ @keyword options: invocation option
+ @type options: L{Options<pyRdfa.Options>}
+ """
+ #-----------------------------------------------------------------
+ # settling the base
+ # note that, strictly speaking, it is not necessary to add the base to the
+ # context, because there is only one place to set it (<base> element of the <header>).
+ # It is done because it is prepared for a possible future change in direction of
+ # accepting xml:base on each element.
+ # At the moment, it is invoked with a 'None' at the top level of parsing, that is
+ # when the <base> element is looked for.
+ if inherited_state:
+ self.base = inherited_state.base
+ self.options = inherited_state.options
+ # for generic XML versions the xml:base attribute should be handled
+ if self.options.host_language == GENERIC_XML and node.hasAttribute("xml:base"):
+ self.base = node.getAttribute("xml:base")
+ else:
+ # this is the branch called from the very top
+ self.base = ""
+ for bases in node.getElementsByTagName("base"):
+ if bases.hasAttribute("href"):
+ self.base = bases.getAttribute("href")
+ continue
+ if self.base == "":
+ self.base = base
+
+ # this is just to play safe. I believe this branch should actually not happen...
+ if options == None:
+ from pyRdfa import Options
+ self.options = Options()
+ else:
+ self.options = options
+
+ # xml:base is not part of XHTML+RDFa, but it is a valid setting for, say, SVG1.2
+ if self.options.host_language == GENERIC_XML and node.hasAttribute("xml:base"):
+ self.base = node.getAttribute("xml:base")
+
+ self.options.comment_graph.set_base_URI(URIRef(_quote(base, self.options)))
+
+ # check the the presense of the @profile and or @version attribute for the RDFa profile...
+ # This whole branch is, however, irrelevant if the host language is a generic XML one (eg, SVG)
+ if self.options.host_language != GENERIC_XML:
+ doctype = None
+ try:
+ # I am not 100% sure the HTML5 minidom implementation has this, so let us just be
+ # cautious here...
+ doctype = node.ownerDocument.doctype
+ except:
+ pass
+ if doctype == None or not( doctype.publicId == RDFa_PublicID and doctype.systemId == RDFa_SystemID ):
+ # next level: check the version
+ html = node.ownerDocument.documentElement
+ if not( html.hasAttribute("version") and RDFa_VERSION == html.getAttribute("version") ):
+ # see if least the profile has been set
+ # Find the <head> element
+ head = None
+ for index in range(0, html.childNodes.length-1):
+ if html.childNodes.item(index).nodeName == "head":
+ head = html.childNodes.item(index)
+ break
+ if not( head != None and head.hasAttribute("profile") and RDFa_PROFILE in head.getAttribute("profile").strip().split() ):
+ if self.options.host_language == HTML5_RDFA:
+ self.options.comment_graph.add_info("RDFa profile or RFDa version has not been set (for a correct identification of RDFa). This is not a requirement for RDFa, but it is advised to use one of those nevertheless. Note that in the case of HTML5, the DOCTYPE setting may not work...")
+ else:
+ self.options.comment_graph.add_info("None of the RDFa DOCTYPE, RDFa profile, or RFDa version has been set (for a correct identification of RDFa). This is not a requirement for RDFa, but it is advised to use one of those nevertheless.")
+
+ #-----------------------------------------------------------------
+ # Stripping the fragment ID from the base URI, as demanded by RFC 3986
+ self.base = urlparse.urldefrag(self.base)[0]
+
+ #-----------------------------------------------------------------
+ # Settling the language tags
+ # check first the lang or xml:lang attribute
+ # RDFa does not allow the lang attribute. HTML5 relies :-( on @lang;
+ # I just want to be prepared here...
+ if options != None and options.host_language == HTML5_RDFA and node.hasAttribute("lang"):
+ self.lang = node.getAttribute("lang")
+ if len(self.lang) == 0 : self.lang = None
+ elif node.hasAttribute("xml:lang"):
+ self.lang = node.getAttribute("xml:lang")
+ if len(self.lang) == 0 : self.lang = None
+ elif inherited_state:
+ self.lang = inherited_state.lang
+ else:
+ self.lang = None
+
+ #-----------------------------------------------------------------
+ # Handling namespaces
+ # First get the local xmlns declarations/namespaces stuff.
+ dict = {}
+ for i in range(0, node.attributes.length):
+ attr = node.attributes.item(i)
+ if attr.name.find('xmlns:') == 0 :
+ # yep, there is a namespace setting
+ key = attr.localName
+ if key != "" : # exclude the top level xmlns setting...
+ if key == "_":
+ if warning: self.options.comment_graph.add_error("The '_' local CURIE prefix is reserved for blank nodes, and cannot be changed" )
+ elif key.find(':') != -1:
+ if warning: self.options.comment_graph.add_error("The character ':' is not valid in a CURIE Prefix" )
+ else :
+ # quote the URI, ie, convert special characters into %.. This is
+ # true, for example, for spaces
+ uri = _quote(attr.value, self.options)
+ # 1. create a new Namespace entry
+ ns = Namespace(uri)
+ # 2. 'bind' it in the current graph to
+ # get a nicer output
+ graph.bind(key, uri)
+ # 3. Add an entry to the dictionary
+ dict[key] = ns
+
+ # See if anything has been collected at all.
+ # If not, the namespaces of the incoming state is
+ # taken over
+ self.ns = {}
+ if len(dict) == 0 and inherited_state:
+ self.ns = inherited_state.ns
+ else:
+ if inherited_state:
+ for k in inherited_state.ns : self.ns[k] = inherited_state.ns[k]
+ # copying the newly found namespace, possibly overwriting
+ # incoming values
+ for k in dict : self.ns[k] = dict[k]
+ else:
+ self.ns = dict
+
+ # see if the xhtml core vocabulary has been set
+ self.xhtml_prefix = None
+ for key in self.ns.keys():
+ if XHTML_URI == str(self.ns[key]):
+ self.xhtml_prefix = key
+ break
+ if self.xhtml_prefix == None:
+ if XHTML_PREFIX not in self.ns:
+ self.ns[XHTML_PREFIX] = Namespace(XHTML_URI)
+ self.xhtml_prefix = XHTML_PREFIX
+ else:
+ # the most disagreeable thing, the user has used
+ # the prefix for something else...
+ self.xhtml_prefix = XHTML_PREFIX + '_' + ("%d" % random.randint(1, 1000))
+ self.ns[self.xhtml_prefix] = Namespace(XHTML_URI)
+ graph.bind(self.xhtml_prefix, XHTML_URI)
+
+ # extra tricks for unusual usages...
+ # if the 'rdf' prefix is not used, it is artificially added...
+ if "rdf" not in self.ns:
+ self.ns["rdf"] = RDF
+ if "rdfs" not in self.ns:
+ self.ns["rdfs"] = RDFS
+
+ # Final touch: setting the default namespace...
+ if node.hasAttribute("xmlns"):
+ self.defaultNS = node.getAttribute("xmlns")
+ elif inherited_state and inherited_state.defaultNS != None:
+ self.defaultNS = inherited_state.defaultNS
+ else:
+ self.defaultNS = None
+
+ def _get_predefined_rels(self, val, warning):
+ """Get the predefined URI value for the C{@rel/@rev} attribute.
+ @param val: attribute name
+ @param warning: whether a warning should be generated or not
+ @type warning: boolean
+ @return: URIRef for the predefined URI (or None)
+ """
+ vv = val.strip().lower()
+ if vv in _predefined_rel:
+ return self.ns[self.xhtml_prefix][vv]
+ else:
+ if warning: self.options.comment_graph.add_warning("invalid @rel/@rev value: '%s'" % val)
+ return None
+
+ def _get_predefined_properties(self, val, warning):
+ """Get the predefined value for the C{@property} attribute.
+ @param val: attribute name
+ @param warning: whether a warning should be generated or not
+ @type warning: boolean
+ @return: URIRef for the predefined URI (or None)
+ """
+ vv = val.strip().lower()
+ if vv in _predefined_property:
+ return self.ns[self.xhtml_prefix][vv]
+ else:
+ if warning: self.options.comment_graph.add_warning("invalid @property value: '%s'" % val)
+ return None
+
+ def get_resource(self, val, rel=False, prop=False, warning=True):
+ """Get a resource for a CURIE.
+ The input argument is a CURIE; this is interpreted
+ via the current namespaces and the corresponding URI Reference is returned
+ @param val: string of the form "prefix:lname"
+ @keyword rel: whether the predefined C{@rel/@rev} values should also be interpreted
+ @keyword prop: whether the predefined C{@property} values should also be interpreted
+ @return: an RDFLib URIRef instance (or None)
+ """
+ if val == "":
+ return None
+ elif val.find(":") != -1:
+ key = val.split(":", 1)[0]
+ lname = val.split(":", 1)[1]
+ if key == "_":
+ # A possible error: this method is invoked for property URI-s, which
+ # should not refer to a blank node. This case is checked and a possible
+ # error condition is handled
+ self.options.comment_graph.add_error("Blank node CURIE cannot be used in property position: _:%s" % lname)
+ return None
+ if key == "":
+ # This is the ":blabla" case
+ key = self.xhtml_prefix
+ else:
+ # if the resources correspond to a @rel or @rev or @property, then there
+ # may be one more possibility here, namely that it is one of the
+ # predefined values
+ if rel:
+ return self._get_predefined_rels(val, warning)
+ elif prop:
+ return self._get_predefined_properties(val, warning)
+ else:
+ self.options.comment_graph.add_warning("Invalid CURIE (without prefix): '%s'" % val)
+ return None
+
+ if key not in self.ns:
+ self.options.comment_graph.add_error("CURIE used with non declared prefix: %s" % key)
+ return None
+ else:
+ if lname == "":
+ return URIRef(str(self.ns[key]))
+ else:
+ return self.ns[key][lname]
+
+ def get_resources(self, val, rel=False, prop=False):
+ """Get a series of resources encoded in CURIE-s.
+ The input argument is a list of CURIE-s; these are interpreted
+ via the current namespaces and the corresponding URI References are returned.
+ @param val: strings of the form prefix':'lname, separated by space
+ @keyword rel: whether the predefined C{@rel/@rev} values should also be interpreted
+ @keyword prop: whether the predefined C{@property} values should also be interpreted
+ @return: a list of RDFLib URIRef instances (possibly empty)
+ """
+ val.strip()
+ resources = [ self.get_resource(v, rel, prop) for v in val.split() if v != None ]
+ return [ r for r in resources if r != None ]
+
+ def get_URI_ref(self, val):
+ """Create a URI RDFLib resource for a URI.
+ The input argument is a URI. It is checked whether it is a local
+ reference with a '#' or not. If yes, a URIRef combined with the
+ stored base value is returned. In both cases a URIRef for a full URI is created
+ and returned
+ @param val: URI string
+ @return: an RDFLib URIRef instance
+ """
+ if val == "":
+ return URIRef(self.base)
+ elif val[0] == '[' and val[-1] == ']':
+ self.options.comment_graph.add_error("Illegal usage of CURIE: %s" % val)
+ return None
+ else:
+ return URIRef(urlparse.urljoin(self.base, val))
+
+ def get_Curie_ref(self, val):
+ """Create a URI RDFLib resource for a CURIE.
+ The input argument is a CURIE. This means that it is:
+ - either of the form [a:b] where a:b should be resolved as an
+ 'unprotected' CURIE, or
+ - it is a traditional URI (relative or absolute)
+
+ If the second case the URI value is also compared to 'usual' URI
+ protocols ('http', 'https', 'ftp', etc) (see L{usual_protocols}).
+ If there is no match, a warning is generated (indeed, a frequent
+ mistake in authoring RDFa is to forget the '[' and ']' characters to
+ "protect" CURIE-s.)
+
+ @param val: CURIE string
+ @return: an RDFLib URIRef instance
+ """
+ if len(val) == 0:
+ return URIRef(self.base)
+ elif val[0] == "[":
+ if val[-1] == "]":
+ curie = val[1:-1]
+ # A possible Blank node reference should be separated here:
+ if len(curie) >= 2 and curie[0] == "_" and curie[1] == ":":
+ return _get_bnode_from_Curie(curie[2:])
+ else:
+ return self.get_resource(val[1:-1])
+ else:
+ # illegal CURIE...
+ self.options.comment_graph.add_error("Illegal CURIE: %s" % val)
+ return None
+ else:
+ # check the value, to see if an error may have been made...
+ # Usual protocol values in the URI
+ v = val.strip().lower()
+ protocol = urlparse.urlparse(val)[0]
+ if protocol != "" and protocol not in usual_protocols:
+ err = "Possible URI error with '%s'; the intention may have been to use a protected CURIE" % val
+ self.options.comment_graph.add_warning(err)
+ return self.get_URI_ref(val)
+
diff --git a/creactistore/_templates/lib/rdflib_/plugins/parsers/rdfa/state.py~ b/creactistore/_templates/lib/rdflib_/plugins/parsers/rdfa/state.py~
new file mode 100644
index 0000000..31caf41
--- /dev/null
+++ b/creactistore/_templates/lib/rdflib_/plugins/parsers/rdfa/state.py~
@@ -0,0 +1,434 @@
+# -*- coding: utf-8 -*-
+"""
+Parser's execution context (a.k.a. state) object and handling. The state includes:
+
+ - dictionary for namespaces. Keys are the namespace prefixes, values are RDFLib Namespace instances
+ - language, retrieved from C{@xml:lang}
+ - URI base, determined by <base> (or set explicitly). This is a little bit superfluous, because the current RDFa syntax does not make use of C{@xml:base}; ie, this could be a global value. But the structure is prepared to add C{@xml:base} easily, if needed.
+ - options, in the form of an L{Options<pyRdfa.Options>} instance
+
+The execution context object is also used to turn relative URI-s and CURIES into real URI references.
+
+@summary: RDFa core parser processing step
+@requires: U{RDFLib package<http://rdflib.net>}
+@organization: U{World Wide Web Consortium<http://www.w3.org>}
+@author: U{Ivan Herman<a href="http://www.w3.org/People/Ivan/">}
+@license: This software is available for use under the
+U{W3C® SOFTWARE NOTICE AND LICENSE<href="http://www.w3.org/Consortium/Legal/2002/copyright-software-20021231">}
+
+@var XHTML_PREFIX: prefix for the XHTML vocabulary namespace
+@var XHTML_URI: URI prefix of the XHTML vocabulary
+@var RDFa_PROFILE: the official RDFa profile URI
+@var RDFa_VERSION: the official version string of RDFa
+@var usual_protocols: list of "usual" protocols (used to generate warnings when CURIES are not protected)
+@var _predefined_rel: list of predefined C{@rev} and C{@rel} values that should be mapped onto the XHTML vocabulary URI-s.
+@var _predefined_property: list of predefined C{@property} values that should be mapped onto the XHTML vocabulary URI-s. (At present, this list is empty, but this has been an ongoing question in the group, so the I{mechanism} of checking is still there.)
+@var __bnodes: dictionary of blank node names to real blank node
+@var __empty_bnode: I{The} Bnode to be associated with the CURIE of the form "C{_:}".
+"""
+
+from rdflib.namespace import Namespace, RDF, RDFS
+from rdflib.term import BNode, URIRef
+from rdflib.plugins.parsers.rdfa.options import Options, GENERIC_XML, XHTML_RDFA, HTML5_RDFA
+
+import re
+import random
+import urlparse
+
+__all__ = ['ExecutionContext']
+
+RDFa_PROFILE = "http://www.w3.org/1999/xhtml/vocab"
+RDFa_VERSION = "XHTML+RDFa 1.0"
+RDFa_PublicID = "-//W3C//DTD XHTML+RDFa 1.0//EN"
+RDFa_SystemID = "http://www.w3.org/MarkUp/DTD/xhtml-rdfa-1.dtd"
+
+usual_protocols = ["http", "https", "mailto", "ftp", "urn", "gopher", "tel", "ldap", "doi", "news"]
+
+####Predefined @rel/@rev/@property values
+# predefined values for the @rel and @rev values. These are considered to be part of a specific
+# namespace, defined by the RDFa document.
+# At the moment, there are no predefined @property values, but the code is there in case
+# some will be defined
+XHTML_PREFIX = "xhv"
+XHTML_URI = "http://www.w3.org/1999/xhtml/vocab#"
+
+_predefined_rel = ['alternate', 'appendix', 'cite', 'bookmark', 'chapter', 'contents',
+'copyright', 'glossary', 'help', 'icon', 'index', 'meta', 'next', 'p3pv1', 'prev',
+'role', 'section', 'subsection', 'start', 'license', 'up', 'last', 'stylesheet', 'first', 'top']
+
+_predefined_property = []
+
+#### Managing blank nodes for CURIE-s
+__bnodes = {}
+__empty_bnode = BNode()
+def _get_bnode_from_Curie(var):
+ """
+ 'Var' gives the string after the coloumn in a CURIE of the form C{_:XXX}. If this variable has been used
+ before, then the corresponding BNode is returned; otherwise a new BNode is created and
+ associated to that value.
+ @param var: CURIE BNode identifier
+ @return: BNode
+ """
+ if len(var) == 0:
+ return __empty_bnode
+ if var in __bnodes:
+ return __bnodes[var]
+ else:
+ retval = BNode()
+ __bnodes[var] = retval
+ return retval
+
+#### Quote URI-s
+import urllib
+# 'safe' characters for the URI quoting, ie, characters that can safely stay as they are. Other
+# special characters are converted to their %.. equivalents for namespace prefixes
+_unquotedChars = ':/\?=#'
+_warnChars = [' ', '\n', '\r', '\t']
+def _quote(uri, options):
+ """
+ 'quote' a URI, ie, exchange special characters for their '%..' equivalents. Some of the characters
+ may stay as they are (listed in L{_unquotedChars}. If one of the characters listed in L{_warnChars}
+ is also in the uri, an extra warning is also generated.
+ @param uri: URI
+ @param options:
+ @type options: L{Options<pyRdfa.Options>}
+ """
+ suri = uri.strip()
+ for c in _warnChars:
+ if suri.find(c) != -1:
+ if options != None:
+ options.comment_graph.add_warning('Unusual character in uri:%s; possible error?' % suri)
+ break
+ return urllib.quote(suri, _unquotedChars)
+
+
+#### Core Class definition
+class ExecutionContext(object):
+ """State at a specific node, including the current set
+ of namespaces in the RDFLib sense, the
+ current language, and the base. The class is also used to interpret URI-s and CURIE-s to produce
+ URI references for RDFLib.
+
+ @ivar options: reference to the overall options
+ @type ivar: L{Options.Options}
+ @ivar base: the 'base' URI
+ @ivar defaultNS: default namespace
+ @ivar lang: language tag (possibly None)
+ @ivar ns: dictionary of namespaces
+ @type ns: dictionary, each value is an RDFLib Namespace object
+
+ """
+ def __init__(self, node, graph, inherited_state=None, base="", options=None):
+ """
+ @param node: the current DOM Node
+ @param graph: the RDFLib Graph
+ @keyword inherited_state: the state as inherited
+ from upper layers. This inherited_state is mixed with the state information
+ retrieved from the current node.
+ @type inherited_state: L{State.ExecutionContext}
+ @keyword base: string denoting the base URI for the specific node. This overrides the possible
+ base inherited from the upper layers. The
+ current XHTML+RDFa syntax does not allow the usage of C{@xml:base}, but SVG1.2 does, so this is
+ necessary for SVG (and other possible XML dialects that accept C{@xml:base})
+ @keyword options: invocation option
+ @type options: L{Options<pyRdfa.Options>}
+ """
+ #-----------------------------------------------------------------
+ # settling the base
+ # note that, strictly speaking, it is not necessary to add the base to the
+ # context, because there is only one place to set it (<base> element of the <header>).
+ # It is done because it is prepared for a possible future change in direction of
+ # accepting xml:base on each element.
+ # At the moment, it is invoked with a 'None' at the top level of parsing, that is
+ # when the <base> element is looked for.
+ if inherited_state:
+ self.base = inherited_state.base
+ self.options = inherited_state.options
+ # for generic XML versions the xml:base attribute should be handled
+ if self.options.host_language == GENERIC_XML and node.hasAttribute("xml:base"):
+ self.base = node.getAttribute("xml:base")
+ else:
+ # this is the branch called from the very top
+ self.base = ""
+ for bases in node.getElementsByTagName("base"):
+ if bases.hasAttribute("href"):
+ self.base = bases.getAttribute("href")
+ continue
+ if self.base == "":
+ self.base = base
+
+ # this is just to play safe. I believe this branch should actually not happen...
+ if options == None:
+ from pyRdfa import Options
+ self.options = Options()
+ else:
+ self.options = options
+
+ # xml:base is not part of XHTML+RDFa, but it is a valid setting for, say, SVG1.2
+ if self.options.host_language == GENERIC_XML and node.hasAttribute("xml:base"):
+ self.base = node.getAttribute("xml:base")
+
+ self.options.comment_graph.set_base_URI(URIRef(_quote(base, self.options)))
+
+ # check the the presense of the @profile and or @version attribute for the RDFa profile...
+ # This whole branch is, however, irrelevant if the host language is a generic XML one (eg, SVG)
+ if self.options.host_language != GENERIC_XML:
+ doctype = None
+ try:
+ # I am not 100% sure the HTML5 minidom implementation has this, so let us just be
+ # cautious here...
+ doctype = node.ownerDocument.doctype
+ except:
+ pass
+ if doctype == None or not( doctype.publicId == RDFa_PublicID and doctype.systemId == RDFa_SystemID ):
+ # next level: check the version
+ html = node.ownerDocument.documentElement
+ if not( html.hasAttribute("version") and RDFa_VERSION == html.getAttribute("version") ):
+ # see if least the profile has been set
+ # Find the <head> element
+ head = None
+ for index in range(0, html.childNodes.length-1):
+ if html.childNodes.item(index).nodeName == "head":
+ head = html.childNodes.item(index)
+ break
+ if not( head != None and head.hasAttribute("profile") and RDFa_PROFILE in head.getAttribute("profile").strip().split() ):
+ if self.options.host_language == HTML5_RDFA:
+ self.options.comment_graph.add_info("RDFa profile or RFDa version has not been set (for a correct identification of RDFa). This is not a requirement for RDFa, but it is advised to use one of those nevertheless. Note that in the case of HTML5, the DOCTYPE setting may not work...")
+ else:
+ self.options.comment_graph.add_info("None of the RDFa DOCTYPE, RDFa profile, or RFDa version has been set (for a correct identification of RDFa). This is not a requirement for RDFa, but it is advised to use one of those nevertheless.")
+
+ #-----------------------------------------------------------------
+ # Stripping the fragment ID from the base URI, as demanded by RFC 3986
+ self.base = urlparse.urldefrag(self.base)[0]
+
+ #-----------------------------------------------------------------
+ # Settling the language tags
+ # check first the lang or xml:lang attribute
+ # RDFa does not allow the lang attribute. HTML5 relies :-( on @lang;
+ # I just want to be prepared here...
+ if options != None and options.host_language == HTML5_RDFA and node.hasAttribute("lang"):
+ self.lang = node.getAttribute("lang")
+ if len(self.lang) == 0 : self.lang = None
+ elif node.hasAttribute("xml:lang"):
+ self.lang = node.getAttribute("xml:lang")
+ if len(self.lang) == 0 : self.lang = None
+ elif inherited_state:
+ self.lang = inherited_state.lang
+ else:
+ self.lang = None
+
+ #-----------------------------------------------------------------
+ # Handling namespaces
+ # First get the local xmlns declarations/namespaces stuff.
+ dict = {}
+ for i in range(0, node.attributes.length):
+ attr = node.attributes.item(i)
+ if attr.name.find('xmlns:') == 0 :
+ # yep, there is a namespace setting
+ key = attr.localName
+ if key != "" : # exclude the top level xmlns setting...
+ if key == "_":
+ if warning: self.options.comment_graph.add_error("The '_' local CURIE prefix is reserved for blank nodes, and cannot be changed" )
+ elif key.find(':') != -1:
+ if warning: self.options.comment_graph.add_error("The character ':' is not valid in a CURIE Prefix" )
+ else :
+ # quote the URI, ie, convert special characters into %.. This is
+ # true, for example, for spaces
+ uri = _quote(attr.value, self.options)
+ # 1. create a new Namespace entry
+ ns = Namespace(uri)
+ # 2. 'bind' it in the current graph to
+ # get a nicer output
+ graph.bind(key, uri)
+ # 3. Add an entry to the dictionary
+ dict[key] = ns
+
+ # See if anything has been collected at all.
+ # If not, the namespaces of the incoming state is
+ # taken over
+ self.ns = {}
+ if len(dict) == 0 and inherited_state:
+ self.ns = inherited_state.ns
+ else:
+ if inherited_state:
+ for k in inherited_state.ns : self.ns[k] = inherited_state.ns[k]
+ # copying the newly found namespace, possibly overwriting
+ # incoming values
+ for k in dict : self.ns[k] = dict[k]
+ else:
+ self.ns = dict
+
+ # see if the xhtml core vocabulary has been set
+ self.xhtml_prefix = None
+ for key in self.ns.keys():
+ if XHTML_URI == str(self.ns[key]):
+ self.xhtml_prefix = key
+ break
+ if self.xhtml_prefix == None:
+ if XHTML_PREFIX not in self.ns:
+ self.ns[XHTML_PREFIX] = Namespace(XHTML_URI)
+ self.xhtml_prefix = XHTML_PREFIX
+ else:
+ # the most disagreeable thing, the user has used
+ # the prefix for something else...
+ self.xhtml_prefix = XHTML_PREFIX + '_' + ("%d" % random.randint(1, 1000))
+ self.ns[self.xhtml_prefix] = Namespace(XHTML_URI)
+ graph.bind(self.xhtml_prefix, XHTML_URI)
+
+ # extra tricks for unusual usages...
+ # if the 'rdf' prefix is not used, it is artificially added...
+ if "rdf" not in self.ns:
+ self.ns["rdf"] = RDF
+ if "rdfs" not in self.ns:
+ self.ns["rdfs"] = RDFS
+
+ # Final touch: setting the default namespace...
+ if node.hasAttribute("xmlns"):
+ self.defaultNS = node.getAttribute("xmlns")
+ elif inherited_state and inherited_state.defaultNS != None:
+ self.defaultNS = inherited_state.defaultNS
+ else:
+ self.defaultNS = None
+
+ def _get_predefined_rels(self, val, warning):
+ """Get the predefined URI value for the C{@rel/@rev} attribute.
+ @param val: attribute name
+ @param warning: whether a warning should be generated or not
+ @type warning: boolean
+ @return: URIRef for the predefined URI (or None)
+ """
+ vv = val.strip().lower()
+ if vv in _predefined_rel:
+ return self.ns[self.xhtml_prefix][vv]
+ else:
+ if warning: self.options.comment_graph.add_warning("invalid @rel/@rev value: '%s'" % val)
+ return None
+
+ def _get_predefined_properties(self, val, warning):
+ """Get the predefined value for the C{@property} attribute.
+ @param val: attribute name
+ @param warning: whether a warning should be generated or not
+ @type warning: boolean
+ @return: URIRef for the predefined URI (or None)
+ """
+ vv = val.strip().lower()
+ if vv in _predefined_property:
+ return self.ns[self.xhtml_prefix][vv]
+ else:
+ if warning: self.options.comment_graph.add_warning("invalid @property value: '%s'" % val)
+ return None
+
+ def get_resource(self, val, rel=False, prop=False, warning=True):
+ """Get a resource for a CURIE.
+ The input argument is a CURIE; this is interpreted
+ via the current namespaces and the corresponding URI Reference is returned
+ @param val: string of the form "prefix:lname"
+ @keyword rel: whether the predefined C{@rel/@rev} values should also be interpreted
+ @keyword prop: whether the predefined C{@property} values should also be interpreted
+ @return: an RDFLib URIRef instance (or None)
+ """
+ if val == "":
+ return None
+ elif val.find(":") != -1:
+ key = val.split(":", 1)[0]
+ lname = val.split(":", 1)[1]
+ if key == "_":
+ # A possible error: this method is invoked for property URI-s, which
+ # should not refer to a blank node. This case is checked and a possible
+ # error condition is handled
+ self.options.comment_graph.add_error("Blank node CURIE cannot be used in property position: _:%s" % lname)
+ return None
+ if key == "":
+ # This is the ":blabla" case
+ key = self.xhtml_prefix
+ else:
+ # if the resources correspond to a @rel or @rev or @property, then there
+ # may be one more possibility here, namely that it is one of the
+ # predefined values
+ if rel:
+ return self._get_predefined_rels(val, warning)
+ elif prop:
+ return self._get_predefined_properties(val, warning)
+ else:
+ self.options.comment_graph.add_warning("Invalid CURIE (without prefix): '%s'" % val)
+ return None
+
+ if key not in self.ns:
+ self.options.comment_graph.add_error("CURIE used with non declared prefix: %s" % key)
+ return None
+ else:
+ if lname == "":
+ return URIRef(str(self.ns[key]))
+ else:
+ return self.ns[key][lname]
+
+ def get_resources(self, val, rel=False, prop=False):
+ """Get a series of resources encoded in CURIE-s.
+ The input argument is a list of CURIE-s; these are interpreted
+ via the current namespaces and the corresponding URI References are returned.
+ @param val: strings of the form prefix':'lname, separated by space
+ @keyword rel: whether the predefined C{@rel/@rev} values should also be interpreted
+ @keyword prop: whether the predefined C{@property} values should also be interpreted
+ @return: a list of RDFLib URIRef instances (possibly empty)
+ """
+ val.strip()
+ resources = [ self.get_resource(v, rel, prop) for v in val.split() if v != None ]
+ return [ r for r in resources if r != None ]
+
+ def get_URI_ref(self, val):
+ """Create a URI RDFLib resource for a URI.
+ The input argument is a URI. It is checked whether it is a local
+ reference with a '#' or not. If yes, a URIRef combined with the
+ stored base value is returned. In both cases a URIRef for a full URI is created
+ and returned
+ @param val: URI string
+ @return: an RDFLib URIRef instance
+ """
+ if val == "":
+ return URIRef(self.base)
+ elif val[0] == '[' and val[-1] == ']':
+ self.options.comment_graph.add_error("Illegal usage of CURIE: %s" % val)
+ return None
+ else:
+ return URIRef(urlparse.urljoin(self.base, val))
+
+ def get_Curie_ref(self, val):
+ """Create a URI RDFLib resource for a CURIE.
+ The input argument is a CURIE. This means that it is:
+ - either of the form [a:b] where a:b should be resolved as an
+ 'unprotected' CURIE, or
+ - it is a traditional URI (relative or absolute)
+
+ If the second case the URI value is also compared to 'usual' URI
+ protocols ('http', 'https', 'ftp', etc) (see L{usual_protocols}).
+ If there is no match, a warning is generated (indeed, a frequent
+ mistake in authoring RDFa is to forget the '[' and ']' characters to
+ "protect" CURIE-s.)
+
+ @param val: CURIE string
+ @return: an RDFLib URIRef instance
+ """
+ if len(val) == 0:
+ return URIRef(self.base)
+ elif val[0] == "[":
+ if val[-1] == "]":
+ curie = val[1:-1]
+ # A possible Blank node reference should be separated here:
+ if len(curie) >= 2 and curie[0] == "_" and curie[1] == ":":
+ return _get_bnode_from_Curie(curie[2:])
+ else:
+ return self.get_resource(val[1:-1])
+ else:
+ # illegal CURIE...
+ self.options.comment_graph.add_error("Illegal CURIE: %s" % val)
+ return None
+ else:
+ # check the value, to see if an error may have been made...
+ # Usual protocol values in the URI
+ v = val.strip().lower()
+ protocol = urlparse.urlparse(val)[0]
+ if protocol != "" and protocol not in usual_protocols:
+ err = "Possible URI error with '%s'; the intention may have been to use a protected CURIE" % val
+ self.options.comment_graph.add_warning(err)
+ return self.get_URI_ref(val)
+
diff --git a/creactistore/_templates/lib/rdflib_/plugins/parsers/rdfa/transform/__init__.py b/creactistore/_templates/lib/rdflib_/plugins/parsers/rdfa/transform/__init__.py
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/creactistore/_templates/lib/rdflib_/plugins/parsers/rdfa/transform/__init__.py
diff --git a/creactistore/_templates/lib/rdflib_/plugins/parsers/rdfa/transform/headabout.py b/creactistore/_templates/lib/rdflib_/plugins/parsers/rdfa/transform/headabout.py
new file mode 100644
index 0000000..feff6ff
--- /dev/null
+++ b/creactistore/_templates/lib/rdflib_/plugins/parsers/rdfa/transform/headabout.py
@@ -0,0 +1,27 @@
+# -*- coding: utf-8 -*-
+"""
+Simple transfomer: the C{@about=""} is added to the C{<head>} and C{<body>} elements (unless something is already there).
+Note that this transformer is always invoked by the parser because this behaviour is mandated by the RDFa syntax.
+
+@summary: Add a top "about" to <head> and <body>
+@requires: U{RDFLib package<http://rdflib_.net>}
+@organization: U{World Wide Web Consortium<http://www.w3.org>}
+@author: U{Ivan Herman<a href="http://www.w3.org/People/Ivan/">}
+@license: This software is available for use under the
+U{W3C® SOFTWARE NOTICE AND LICENSE<href="http://www.w3.org/Consortium/Legal/2002/copyright-software-20021231">}
+@contact: Ivan Herman, ivan@w3.org
+"""
+
+def head_about_transform(html, options):
+ """
+ @param html: a DOM node for the top level html element
+ @param options: invocation options
+ @type options: L{Options<pyRdfa.Options>}
+ """
+ for top in html.getElementsByTagName("head"):
+ if not top.hasAttribute("about"):
+ top.setAttribute("about", "")
+ for top in html.getElementsByTagName("body"):
+ if not top.hasAttribute("about"):
+ top.setAttribute("about", "")
+
diff --git a/creactistore/_templates/lib/rdflib_/plugins/parsers/rdfa/transform/headabout.py~ b/creactistore/_templates/lib/rdflib_/plugins/parsers/rdfa/transform/headabout.py~
new file mode 100644
index 0000000..0cf8f7a
--- /dev/null
+++ b/creactistore/_templates/lib/rdflib_/plugins/parsers/rdfa/transform/headabout.py~
@@ -0,0 +1,27 @@
+# -*- coding: utf-8 -*-
+"""
+Simple transfomer: the C{@about=""} is added to the C{<head>} and C{<body>} elements (unless something is already there).
+Note that this transformer is always invoked by the parser because this behaviour is mandated by the RDFa syntax.
+
+@summary: Add a top "about" to <head> and <body>
+@requires: U{RDFLib package<http://rdflib.net>}
+@organization: U{World Wide Web Consortium<http://www.w3.org>}
+@author: U{Ivan Herman<a href="http://www.w3.org/People/Ivan/">}
+@license: This software is available for use under the
+U{W3C® SOFTWARE NOTICE AND LICENSE<href="http://www.w3.org/Consortium/Legal/2002/copyright-software-20021231">}
+@contact: Ivan Herman, ivan@w3.org
+"""
+
+def head_about_transform(html, options):
+ """
+ @param html: a DOM node for the top level html element
+ @param options: invocation options
+ @type options: L{Options<pyRdfa.Options>}
+ """
+ for top in html.getElementsByTagName("head"):
+ if not top.hasAttribute("about"):
+ top.setAttribute("about", "")
+ for top in html.getElementsByTagName("body"):
+ if not top.hasAttribute("about"):
+ top.setAttribute("about", "")
+
diff --git a/creactistore/_templates/lib/rdflib_/plugins/parsers/rdfxml.py b/creactistore/_templates/lib/rdflib_/plugins/parsers/rdfxml.py
new file mode 100644
index 0000000..a9d00ff
--- /dev/null
+++ b/creactistore/_templates/lib/rdflib_/plugins/parsers/rdfxml.py
@@ -0,0 +1,579 @@
+# Copyright (c) 2002, Daniel Krech, http://eikeon.com/
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met:
+#
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+#
+# * Redistributions in binary form must reproduce the above
+# copyright notice, this list of conditions and the following
+# disclaimer in the documentation and/or other materials provided
+# with the distribution.
+#
+# * Neither the name of Daniel Krech nor the names of its
+# contributors may be used to endorse or promote products derived
+# from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+"""
+"""
+from xml.sax import make_parser
+from xml.sax.handler import ErrorHandler
+from xml.sax.saxutils import handler, quoteattr, escape
+from urlparse import urljoin, urldefrag
+
+from rdflib_.namespace import RDF, is_ncname
+from rdflib_.term import URIRef
+from rdflib_.term import BNode
+from rdflib_.term import Literal
+from rdflib_.exceptions import ParserError, Error
+from rdflib_.parser import Parser
+
+__all__ = ['create_parser', 'BagID', 'ElementHandler', 'RDFXMLHandler', 'RDFXMLParser']
+
+RDFNS = RDF
+
+# http://www.w3.org/TR/rdf-syntax-grammar/#eventterm-attribute-URI
+# A mapping from unqualified terms to there qualified version.
+UNQUALIFIED = {"about" : RDF.about,
+ "ID" : RDF.ID,
+ "type" : RDF.type,
+ "resource": RDF.resource,
+ "parseType": RDF.parseType}
+
+# http://www.w3.org/TR/rdf-syntax-grammar/#coreSyntaxTerms
+CORE_SYNTAX_TERMS = [RDF.RDF, RDF.ID, RDF.about, RDF.parseType, RDF.resource, RDF.nodeID, RDF.datatype]
+
+# http://www.w3.org/TR/rdf-syntax-grammar/#syntaxTerms
+SYNTAX_TERMS = CORE_SYNTAX_TERMS + [RDF.Description, RDF.li]
+
+# http://www.w3.org/TR/rdf-syntax-grammar/#oldTerms
+OLD_TERMS = [
+ URIRef("http://www.w3.org/1999/02/22-rdf-syntax-ns#aboutEach"),
+ URIRef("http://www.w3.org/1999/02/22-rdf-syntax-ns#aboutEachPrefix"),
+ URIRef("http://www.w3.org/1999/02/22-rdf-syntax-ns#bagID")]
+
+NODE_ELEMENT_EXCEPTIONS = CORE_SYNTAX_TERMS + [RDF.li,] + OLD_TERMS
+NODE_ELEMENT_ATTRIBUTES = [RDF.ID, RDF.nodeID, RDF.about]
+
+PROPERTY_ELEMENT_EXCEPTIONS = CORE_SYNTAX_TERMS + [RDF.Description,] + OLD_TERMS
+PROPERTY_ATTRIBUTE_EXCEPTIONS = CORE_SYNTAX_TERMS + [RDF.Description, RDF.li] + OLD_TERMS
+PROPERTY_ELEMENT_ATTRIBUTES = [RDF.ID, RDF.resource, RDF.nodeID]
+
+XMLNS = "http://www.w3.org/XML/1998/namespace"
+BASE = (XMLNS, "base")
+LANG = (XMLNS, "lang")
+
+
+class BagID(URIRef):
+ __slots__ = ['li']
+ def __init__(self, val):
+ super(URIRef, self).__init__(val)
+ self.li = 0
+
+ def next_li(self):
+ self.li += 1
+ return RDFNS[self.li]
+
+
+class ElementHandler(object):
+ __slots__ = ['start', 'char', 'end', 'li', 'id',
+ 'base', 'subject', 'predicate', 'object',
+ 'list', 'language', 'datatype', 'declared', 'data']
+ def __init__(self):
+ self.start = None
+ self.char = None
+ self.end = None
+ self.li = 0
+ self.id = None
+ self.base = None
+ self.subject = None
+ self.object = None
+ self.list = None
+ self.language = None
+ self.datatype = None
+ self.declared = None
+ self.data = None
+
+ def next_li(self):
+ self.li += 1
+ return RDFNS[self.li]
+
+
+class RDFXMLHandler(handler.ContentHandler):
+
+ def __init__(self, store):
+ self.store = store
+ self.preserve_bnode_ids = False
+ self.reset()
+
+ def reset(self):
+ document_element = ElementHandler()
+ document_element.start = self.document_element_start
+ document_element.end = lambda name, qname: None
+ self.stack = [None, document_element,]
+ self.ids = {} # remember IDs we have already seen
+ self.bnode = {}
+ self._ns_contexts = [{}] # contains uri -> prefix dicts
+ self._current_context = self._ns_contexts[-1]
+
+ # ContentHandler methods
+
+ def setDocumentLocator(self, locator):
+ self.locator = locator
+
+ def startDocument(self):
+ pass
+
+ def startPrefixMapping(self, prefix, namespace):
+ self._ns_contexts.append(self._current_context.copy())
+ self._current_context[namespace] = prefix
+ self.store.bind(prefix, URIRef(namespace), override=False)
+
+ def endPrefixMapping(self, prefix):
+ self._current_context = self._ns_contexts[-1]
+ del self._ns_contexts[-1]
+
+ def startElementNS(self, name, qname, attrs):
+ stack = self.stack
+ stack.append(ElementHandler())
+ current = self.current
+ parent = self.parent
+ base = attrs.get(BASE, None)
+ if base is not None:
+ base, frag = urldefrag(base)
+ if parent and parent.base:
+ base = urljoin(parent.base, base)
+ else:
+ systemId = self.locator.getPublicId() or self.locator.getSystemId()
+ if systemId:
+ base = urljoin(systemId, base)
+ else:
+ if parent:
+ base = parent.base
+ if base is None:
+ systemId = self.locator.getPublicId() or self.locator.getSystemId()
+ if systemId:
+ base, frag = urldefrag(systemId)
+ current.base = base
+ language = attrs.get(LANG, None)
+ if language is None:
+ if parent:
+ language = parent.language
+ current.language = language
+ current.start(name, qname, attrs)
+
+ def endElementNS(self, name, qname):
+ self.current.end(name, qname)
+ self.stack.pop()
+
+ def characters(self, content):
+ char = self.current.char
+ if char:
+ char(content)
+
+ def ignorableWhitespace(self, content):
+ pass
+
+ def processingInstruction(self, target, data):
+ pass
+
+ def add_reified(self, sid, (s, p, o)):
+ self.store.add((sid, RDF.type, RDF.Statement))
+ self.store.add((sid, RDF.subject, s))
+ self.store.add((sid, RDF.predicate, p))
+ self.store.add((sid, RDF.object, o))
+
+ def error(self, message):
+ locator = self.locator
+ info = "%s:%s:%s: " % (locator.getSystemId(),
+ locator.getLineNumber(), locator.getColumnNumber())
+ raise ParserError(info + message)
+
+ def get_current(self):
+ return self.stack[-2]
+ # Create a read only property called current so that self.current
+ # give the current element handler.
+ current = property(get_current)
+
+ def get_next(self):
+ return self.stack[-1]
+ # Create a read only property that gives the element handler to be
+ # used for the next element.
+ next = property(get_next)
+
+ def get_parent(self):
+ return self.stack[-3]
+ # Create a read only property that gives the current parent
+ # element handler
+ parent = property(get_parent)
+
+ def absolutize(self, uri):
+ result = urljoin(self.current.base, uri, allow_fragments=1)
+ if uri and uri[-1]=="#" and result[-1]!="#":
+ result = "%s#" % result
+ return URIRef(result)
+
+ def convert(self, name, qname, attrs):
+ if name[0] is None:
+ name = URIRef(name[1])
+ else:
+ name = URIRef("".join(name))
+ atts = {}
+ for (n, v) in attrs.items(): #attrs._attrs.iteritems(): #
+ if n[0] is None:
+ att = URIRef(n[1])
+ else:
+ att = URIRef("".join(n))
+ if att.startswith(XMLNS) or att[0:3].lower()=="xml":
+ pass
+ elif att in UNQUALIFIED:
+ #if not RDFNS[att] in atts:
+ atts[RDFNS[att]] = v
+ else:
+ atts[URIRef(att)] = v
+ return name, atts
+
+ def document_element_start(self, name, qname, attrs):
+ if name[0] and URIRef("".join(name)) == RDF.RDF:
+ # Cheap hack so 2to3 doesn't turn it into __next__
+ next = getattr(self, 'next')
+ next.start = self.node_element_start
+ next.end = self.node_element_end
+ else:
+ self.node_element_start(name, qname, attrs)
+ #self.current.end = self.node_element_end
+ # TODO... set end to something that sets start such that
+ # another element will cause error
+
+
+ def node_element_start(self, name, qname, attrs):
+ name, atts = self.convert(name, qname, attrs)
+ current = self.current
+ absolutize = self.absolutize
+
+ # Cheap hack so 2to3 doesn't turn it into __next__
+ next = getattr(self, 'next')
+ next.start = self.property_element_start
+ next.end = self.property_element_end
+
+ if name in NODE_ELEMENT_EXCEPTIONS:
+ self.error("Invalid node element URI: %s" % name)
+
+ if RDF.ID in atts:
+ if RDF.about in atts or RDF.nodeID in atts:
+ self.error("Can have at most one of rdf:ID, rdf:about, and rdf:nodeID")
+
+ id = atts[RDF.ID]
+ if not is_ncname(id):
+ self.error("rdf:ID value is not a valid NCName: %s" % id)
+ subject = absolutize("#%s" % id)
+ if subject in self.ids:
+ self.error("two elements cannot use the same ID: '%s'" % subject)
+ self.ids[subject] = 1 # IDs can only appear once within a document
+ elif RDF.nodeID in atts:
+ if RDF.ID in atts or RDF.about in atts:
+ self.error("Can have at most one of rdf:ID, rdf:about, and rdf:nodeID")
+ nodeID = atts[RDF.nodeID]
+ if not is_ncname(nodeID):
+ self.error("rdf:nodeID value is not a valid NCName: %s" % nodeID)
+ if self.preserve_bnode_ids is False:
+ if nodeID in self.bnode:
+ subject = self.bnode[nodeID]
+ else:
+ subject = BNode()
+ self.bnode[nodeID] = subject
+ else:
+ subject = BNode(nodeID)
+ elif RDF.about in atts:
+ if RDF.ID in atts or RDF.nodeID in atts:
+ self.error("Can have at most one of rdf:ID, rdf:about, and rdf:nodeID")
+ subject = absolutize(atts[RDF.about])
+ else:
+ subject = BNode()
+
+ if name!=RDF.Description: # S1
+ self.store.add((subject, RDF.type, absolutize(name)))
+
+ language = current.language
+ for att in atts:
+ if not att.startswith(str(RDFNS)):
+ predicate = absolutize(att)
+ try:
+ object = Literal(atts[att], language)
+ except Error, e:
+ self.error(e.msg)
+ elif att==RDF.type: #S2
+ predicate = RDF.type
+ object = absolutize(atts[RDF.type])
+ elif att in NODE_ELEMENT_ATTRIBUTES:
+ continue
+ elif att in PROPERTY_ATTRIBUTE_EXCEPTIONS: #S3
+ self.error("Invalid property attribute URI: %s" % att)
+ continue # for when error does not throw an exception
+ else:
+ predicate = absolutize(att)
+ try:
+ object = Literal(atts[att], language)
+ except Error, e:
+ self.error(e.msg)
+ self.store.add((subject, predicate, object))
+
+ current.subject = subject
+
+
+ def node_element_end(self, name, qname):
+ self.parent.object = self.current.subject
+
+ def property_element_start(self, name, qname, attrs):
+ name, atts = self.convert(name, qname, attrs)
+ current = self.current
+ absolutize = self.absolutize
+
+ # Cheap hack so 2to3 doesn't turn it into __next__
+ next = getattr(self, 'next')
+ object = None
+ current.data = None
+ current.list = None
+
+ if not name.startswith(str(RDFNS)):
+ current.predicate = absolutize(name)
+ elif name==RDF.li:
+ current.predicate = current.next_li()
+ elif name in PROPERTY_ELEMENT_EXCEPTIONS:
+ self.error("Invalid property element URI: %s" % name)
+ else:
+ current.predicate = absolutize(name)
+
+ id = atts.get(RDF.ID, None)
+ if id is not None:
+ if not is_ncname(id):
+ self.error("rdf:ID value is not a value NCName: %s" % id)
+ current.id = absolutize("#%s" % id)
+ else:
+ current.id = None
+
+ resource = atts.get(RDF.resource, None)
+ nodeID = atts.get(RDF.nodeID, None)
+ parse_type = atts.get(RDF.parseType, None)
+ if resource is not None and nodeID is not None:
+ self.error("Property element cannot have both rdf:nodeID and rdf:resource")
+ if resource is not None:
+ object = absolutize(resource)
+ next.start = self.node_element_start
+ next.end = self.node_element_end
+ elif nodeID is not None:
+ if not is_ncname(nodeID):
+ self.error("rdf:nodeID value is not a valid NCName: %s" % nodeID)
+ if self.preserve_bnode_ids is False:
+ if nodeID in self.bnode:
+ object = self.bnode[nodeID]
+ else:
+ subject = BNode()
+ self.bnode[nodeID] = subject
+ object = subject
+ else:
+ object = subject = BNode(nodeID)
+ next.start = self.node_element_start
+ next.end = self.node_element_end
+ else:
+ if parse_type is not None:
+ for att in atts:
+ if att!=RDF.parseType and att!=RDF.ID:
+ self.error("Property attr '%s' now allowed here" % att)
+ if parse_type=="Resource":
+ current.subject = object = BNode()
+ current.char = self.property_element_char
+ next.start = self.property_element_start
+ next.end = self.property_element_end
+ elif parse_type=="Collection":
+ current.char = None
+ object = current.list = RDF.nil #BNode()#self.parent.subject
+ next.start = self.node_element_start
+ next.end = self.list_node_element_end
+ else: #if parse_type=="Literal":
+ # All other values are treated as Literal
+ # See: http://www.w3.org/TR/rdf-syntax-grammar/#parseTypeOtherPropertyElt
+ object = Literal("", datatype=RDF.XMLLiteral)
+ current.char = self.literal_element_char
+ current.declared = {}
+ next.start = self.literal_element_start
+ next.char = self.literal_element_char
+ next.end = self.literal_element_end
+ current.object = object
+ return
+ else:
+ object = None
+ current.char = self.property_element_char
+ next.start = self.node_element_start
+ next.end = self.node_element_end
+
+ datatype = current.datatype = atts.get(RDF.datatype, None)
+ language = current.language
+ if datatype is not None:
+ # TODO: check that there are no atts other than datatype and id
+ datatype = absolutize(datatype)
+ else:
+ for att in atts:
+ if not att.startswith(str(RDFNS)):
+ predicate = absolutize(att)
+ elif att in PROPERTY_ELEMENT_ATTRIBUTES:
+ continue
+ elif att in PROPERTY_ATTRIBUTE_EXCEPTIONS:
+ self.error("""Invalid property attribute URI: %s""" % att)
+ else:
+ predicate = absolutize(att)
+
+ if att==RDF.type:
+ o = URIRef(atts[att])
+ else:
+ if datatype is not None:
+ language = None
+ o = Literal(atts[att], language, datatype)
+
+ if object is None:
+ object = BNode()
+ self.store.add((object, predicate, o))
+ if object is None:
+ current.data = ""
+ current.object = None
+ else:
+ current.data = None
+ current.object = object
+
+ def property_element_char(self, data):
+ current = self.current
+ if current.data is not None:
+ current.data += data
+
+ def property_element_end(self, name, qname):
+ current = self.current
+ if current.data is not None and current.object is None:
+ literalLang = current.language
+ if current.datatype is not None:
+ literalLang = None
+ current.object = Literal(current.data, literalLang, current.datatype)
+ current.data = None
+ if self.next.end==self.list_node_element_end:
+ if current.object!=RDF.nil:
+ self.store.add((current.list, RDF.rest, RDF.nil))
+ if current.object is not None:
+ self.store.add((self.parent.subject, current.predicate, current.object))
+ if current.id is not None:
+ self.add_reified(current.id, (self.parent.subject,
+ current.predicate, current.object))
+ current.subject = None
+
+ def list_node_element_end(self, name, qname):
+ current = self.current
+ if self.parent.list==RDF.nil:
+ list = BNode()
+ # Removed between 20030123 and 20030905
+ #self.store.add((list, RDF.type, LIST))
+ self.parent.list = list
+ self.store.add((self.parent.list, RDF.first, current.subject))
+ self.parent.object = list
+ self.parent.char = None
+ else:
+ list = BNode()
+ # Removed between 20030123 and 20030905
+ #self.store.add((list, RDF.type, LIST))
+ self.store.add((self.parent.list, RDF.rest, list))
+ self.store.add((list, RDF.first, current.subject))
+ self.parent.list = list
+
+ def literal_element_start(self, name, qname, attrs):
+ current = self.current
+ self.next.start = self.literal_element_start
+ self.next.char = self.literal_element_char
+ self.next.end = self.literal_element_end
+ current.declared = self.parent.declared.copy()
+ if name[0]:
+ prefix = self._current_context[name[0]]
+ if prefix:
+ current.object = "<%s:%s" % (prefix, name[1])
+ else:
+ current.object = "<%s" % name[1]
+ if not name[0] in current.declared:
+ current.declared[name[0]] = prefix
+ if prefix:
+ current.object += (' xmlns:%s="%s"' % (prefix, name[0]))
+ else:
+ current.object += (' xmlns="%s"' % name[0])
+ else:
+ current.object = "<%s" % name[1]
+
+ for (name, value) in attrs.items():
+ if name[0]:
+ if not name[0] in current.declared:
+ current.declared[name[0]] = self._current_context[name[0]]
+ name = current.declared[name[0]] + ":" + name[1]
+ else:
+ name = name[1]
+ current.object += (' %s=%s' % (name, quoteattr(value)))
+ current.object += ">"
+
+ def literal_element_char(self, data):
+ self.current.object += escape(data)
+
+ def literal_element_end(self, name, qname):
+ if name[0]:
+ prefix = self._current_context[name[0]]
+ if prefix:
+ end = u"</%s:%s>" % (prefix, name[1])
+ else:
+ end = u"</%s>" % name[1]
+ else:
+ end = u"</%s>" % name[1]
+ self.parent.object += self.current.object + end
+
+
+def create_parser(target, store):
+ parser = make_parser()
+ try:
+ # Workaround for bug in expatreader.py. Needed when
+ # expatreader is trying to guess a prefix.
+ parser.start_namespace_decl("xml", "http://www.w3.org/XML/1998/namespace")
+ except AttributeError:
+ pass # Not present in Jython (at least)
+ parser.setFeature(handler.feature_namespaces, 1)
+ rdfxml = RDFXMLHandler(store)
+ rdfxml.setDocumentLocator(target)
+ #rdfxml.setDocumentLocator(_Locator(self.url, self.parser))
+ parser.setContentHandler(rdfxml)
+ parser.setErrorHandler(ErrorHandler())
+ return parser
+
+
+class RDFXMLParser(Parser):
+
+ def __init__(self):
+ pass
+
+ def parse(self, source, sink, **args):
+ self._parser = create_parser(source, sink)
+ content_handler = self._parser.getContentHandler()
+ preserve_bnode_ids = args.get("preserve_bnode_ids", None)
+ if preserve_bnode_ids is not None:
+ content_handler.preserve_bnode_ids = preserve_bnode_ids
+ # We're only using it once now
+ #content_handler.reset()
+ #self._parser.reset()
+ self._parser.parse(source)
+
+
+
diff --git a/creactistore/_templates/lib/rdflib_/plugins/parsers/rdfxml.py~ b/creactistore/_templates/lib/rdflib_/plugins/parsers/rdfxml.py~
new file mode 100644
index 0000000..00e8d6a
--- /dev/null
+++ b/creactistore/_templates/lib/rdflib_/plugins/parsers/rdfxml.py~
@@ -0,0 +1,579 @@
+# Copyright (c) 2002, Daniel Krech, http://eikeon.com/
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met:
+#
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+#
+# * Redistributions in binary form must reproduce the above
+# copyright notice, this list of conditions and the following
+# disclaimer in the documentation and/or other materials provided
+# with the distribution.
+#
+# * Neither the name of Daniel Krech nor the names of its
+# contributors may be used to endorse or promote products derived
+# from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+"""
+"""
+from xml.sax import make_parser
+from xml.sax.handler import ErrorHandler
+from xml.sax.saxutils import handler, quoteattr, escape
+from urlparse import urljoin, urldefrag
+
+from rdflib.namespace import RDF, is_ncname
+from rdflib.term import URIRef
+from rdflib.term import BNode
+from rdflib.term import Literal
+from rdflib.exceptions import ParserError, Error
+from rdflib.parser import Parser
+
+__all__ = ['create_parser', 'BagID', 'ElementHandler', 'RDFXMLHandler', 'RDFXMLParser']
+
+RDFNS = RDF
+
+# http://www.w3.org/TR/rdf-syntax-grammar/#eventterm-attribute-URI
+# A mapping from unqualified terms to there qualified version.
+UNQUALIFIED = {"about" : RDF.about,
+ "ID" : RDF.ID,
+ "type" : RDF.type,
+ "resource": RDF.resource,
+ "parseType": RDF.parseType}
+
+# http://www.w3.org/TR/rdf-syntax-grammar/#coreSyntaxTerms
+CORE_SYNTAX_TERMS = [RDF.RDF, RDF.ID, RDF.about, RDF.parseType, RDF.resource, RDF.nodeID, RDF.datatype]
+
+# http://www.w3.org/TR/rdf-syntax-grammar/#syntaxTerms
+SYNTAX_TERMS = CORE_SYNTAX_TERMS + [RDF.Description, RDF.li]
+
+# http://www.w3.org/TR/rdf-syntax-grammar/#oldTerms
+OLD_TERMS = [
+ URIRef("http://www.w3.org/1999/02/22-rdf-syntax-ns#aboutEach"),
+ URIRef("http://www.w3.org/1999/02/22-rdf-syntax-ns#aboutEachPrefix"),
+ URIRef("http://www.w3.org/1999/02/22-rdf-syntax-ns#bagID")]
+
+NODE_ELEMENT_EXCEPTIONS = CORE_SYNTAX_TERMS + [RDF.li,] + OLD_TERMS
+NODE_ELEMENT_ATTRIBUTES = [RDF.ID, RDF.nodeID, RDF.about]
+
+PROPERTY_ELEMENT_EXCEPTIONS = CORE_SYNTAX_TERMS + [RDF.Description,] + OLD_TERMS
+PROPERTY_ATTRIBUTE_EXCEPTIONS = CORE_SYNTAX_TERMS + [RDF.Description, RDF.li] + OLD_TERMS
+PROPERTY_ELEMENT_ATTRIBUTES = [RDF.ID, RDF.resource, RDF.nodeID]
+
+XMLNS = "http://www.w3.org/XML/1998/namespace"
+BASE = (XMLNS, "base")
+LANG = (XMLNS, "lang")
+
+
+class BagID(URIRef):
+ __slots__ = ['li']
+ def __init__(self, val):
+ super(URIRef, self).__init__(val)
+ self.li = 0
+
+ def next_li(self):
+ self.li += 1
+ return RDFNS[self.li]
+
+
+class ElementHandler(object):
+ __slots__ = ['start', 'char', 'end', 'li', 'id',
+ 'base', 'subject', 'predicate', 'object',
+ 'list', 'language', 'datatype', 'declared', 'data']
+ def __init__(self):
+ self.start = None
+ self.char = None
+ self.end = None
+ self.li = 0
+ self.id = None
+ self.base = None
+ self.subject = None
+ self.object = None
+ self.list = None
+ self.language = None
+ self.datatype = None
+ self.declared = None
+ self.data = None
+
+ def next_li(self):
+ self.li += 1
+ return RDFNS[self.li]
+
+
+class RDFXMLHandler(handler.ContentHandler):
+
+ def __init__(self, store):
+ self.store = store
+ self.preserve_bnode_ids = False
+ self.reset()
+
+ def reset(self):
+ document_element = ElementHandler()
+ document_element.start = self.document_element_start
+ document_element.end = lambda name, qname: None
+ self.stack = [None, document_element,]
+ self.ids = {} # remember IDs we have already seen
+ self.bnode = {}
+ self._ns_contexts = [{}] # contains uri -> prefix dicts
+ self._current_context = self._ns_contexts[-1]
+
+ # ContentHandler methods
+
+ def setDocumentLocator(self, locator):
+ self.locator = locator
+
+ def startDocument(self):
+ pass
+
+ def startPrefixMapping(self, prefix, namespace):
+ self._ns_contexts.append(self._current_context.copy())
+ self._current_context[namespace] = prefix
+ self.store.bind(prefix, URIRef(namespace), override=False)
+
+ def endPrefixMapping(self, prefix):
+ self._current_context = self._ns_contexts[-1]
+ del self._ns_contexts[-1]
+
+ def startElementNS(self, name, qname, attrs):
+ stack = self.stack
+ stack.append(ElementHandler())
+ current = self.current
+ parent = self.parent
+ base = attrs.get(BASE, None)
+ if base is not None:
+ base, frag = urldefrag(base)
+ if parent and parent.base:
+ base = urljoin(parent.base, base)
+ else:
+ systemId = self.locator.getPublicId() or self.locator.getSystemId()
+ if systemId:
+ base = urljoin(systemId, base)
+ else:
+ if parent:
+ base = parent.base
+ if base is None:
+ systemId = self.locator.getPublicId() or self.locator.getSystemId()
+ if systemId:
+ base, frag = urldefrag(systemId)
+ current.base = base
+ language = attrs.get(LANG, None)
+ if language is None:
+ if parent:
+ language = parent.language
+ current.language = language
+ current.start(name, qname, attrs)
+
+ def endElementNS(self, name, qname):
+ self.current.end(name, qname)
+ self.stack.pop()
+
+ def characters(self, content):
+ char = self.current.char
+ if char:
+ char(content)
+
+ def ignorableWhitespace(self, content):
+ pass
+
+ def processingInstruction(self, target, data):
+ pass
+
+ def add_reified(self, sid, (s, p, o)):
+ self.store.add((sid, RDF.type, RDF.Statement))
+ self.store.add((sid, RDF.subject, s))
+ self.store.add((sid, RDF.predicate, p))
+ self.store.add((sid, RDF.object, o))
+
+ def error(self, message):
+ locator = self.locator
+ info = "%s:%s:%s: " % (locator.getSystemId(),
+ locator.getLineNumber(), locator.getColumnNumber())
+ raise ParserError(info + message)
+
+ def get_current(self):
+ return self.stack[-2]
+ # Create a read only property called current so that self.current
+ # give the current element handler.
+ current = property(get_current)
+
+ def get_next(self):
+ return self.stack[-1]
+ # Create a read only property that gives the element handler to be
+ # used for the next element.
+ next = property(get_next)
+
+ def get_parent(self):
+ return self.stack[-3]
+ # Create a read only property that gives the current parent
+ # element handler
+ parent = property(get_parent)
+
+ def absolutize(self, uri):
+ result = urljoin(self.current.base, uri, allow_fragments=1)
+ if uri and uri[-1]=="#" and result[-1]!="#":
+ result = "%s#" % result
+ return URIRef(result)
+
+ def convert(self, name, qname, attrs):
+ if name[0] is None:
+ name = URIRef(name[1])
+ else:
+ name = URIRef("".join(name))
+ atts = {}
+ for (n, v) in attrs.items(): #attrs._attrs.iteritems(): #
+ if n[0] is None:
+ att = URIRef(n[1])
+ else:
+ att = URIRef("".join(n))
+ if att.startswith(XMLNS) or att[0:3].lower()=="xml":
+ pass
+ elif att in UNQUALIFIED:
+ #if not RDFNS[att] in atts:
+ atts[RDFNS[att]] = v
+ else:
+ atts[URIRef(att)] = v
+ return name, atts
+
+ def document_element_start(self, name, qname, attrs):
+ if name[0] and URIRef("".join(name)) == RDF.RDF:
+ # Cheap hack so 2to3 doesn't turn it into __next__
+ next = getattr(self, 'next')
+ next.start = self.node_element_start
+ next.end = self.node_element_end
+ else:
+ self.node_element_start(name, qname, attrs)
+ #self.current.end = self.node_element_end
+ # TODO... set end to something that sets start such that
+ # another element will cause error
+
+
+ def node_element_start(self, name, qname, attrs):
+ name, atts = self.convert(name, qname, attrs)
+ current = self.current
+ absolutize = self.absolutize
+
+ # Cheap hack so 2to3 doesn't turn it into __next__
+ next = getattr(self, 'next')
+ next.start = self.property_element_start
+ next.end = self.property_element_end
+
+ if name in NODE_ELEMENT_EXCEPTIONS:
+ self.error("Invalid node element URI: %s" % name)
+
+ if RDF.ID in atts:
+ if RDF.about in atts or RDF.nodeID in atts:
+ self.error("Can have at most one of rdf:ID, rdf:about, and rdf:nodeID")
+
+ id = atts[RDF.ID]
+ if not is_ncname(id):
+ self.error("rdf:ID value is not a valid NCName: %s" % id)
+ subject = absolutize("#%s" % id)
+ if subject in self.ids:
+ self.error("two elements cannot use the same ID: '%s'" % subject)
+ self.ids[subject] = 1 # IDs can only appear once within a document
+ elif RDF.nodeID in atts:
+ if RDF.ID in atts or RDF.about in atts:
+ self.error("Can have at most one of rdf:ID, rdf:about, and rdf:nodeID")
+ nodeID = atts[RDF.nodeID]
+ if not is_ncname(nodeID):
+ self.error("rdf:nodeID value is not a valid NCName: %s" % nodeID)
+ if self.preserve_bnode_ids is False:
+ if nodeID in self.bnode:
+ subject = self.bnode[nodeID]
+ else:
+ subject = BNode()
+ self.bnode[nodeID] = subject
+ else:
+ subject = BNode(nodeID)
+ elif RDF.about in atts:
+ if RDF.ID in atts or RDF.nodeID in atts:
+ self.error("Can have at most one of rdf:ID, rdf:about, and rdf:nodeID")
+ subject = absolutize(atts[RDF.about])
+ else:
+ subject = BNode()
+
+ if name!=RDF.Description: # S1
+ self.store.add((subject, RDF.type, absolutize(name)))
+
+ language = current.language
+ for att in atts:
+ if not att.startswith(str(RDFNS)):
+ predicate = absolutize(att)
+ try:
+ object = Literal(atts[att], language)
+ except Error, e:
+ self.error(e.msg)
+ elif att==RDF.type: #S2
+ predicate = RDF.type
+ object = absolutize(atts[RDF.type])
+ elif att in NODE_ELEMENT_ATTRIBUTES:
+ continue
+ elif att in PROPERTY_ATTRIBUTE_EXCEPTIONS: #S3
+ self.error("Invalid property attribute URI: %s" % att)
+ continue # for when error does not throw an exception
+ else:
+ predicate = absolutize(att)
+ try:
+ object = Literal(atts[att], language)
+ except Error, e:
+ self.error(e.msg)
+ self.store.add((subject, predicate, object))
+
+ current.subject = subject
+
+
+ def node_element_end(self, name, qname):
+ self.parent.object = self.current.subject
+
+ def property_element_start(self, name, qname, attrs):
+ name, atts = self.convert(name, qname, attrs)
+ current = self.current
+ absolutize = self.absolutize
+
+ # Cheap hack so 2to3 doesn't turn it into __next__
+ next = getattr(self, 'next')
+ object = None
+ current.data = None
+ current.list = None
+
+ if not name.startswith(str(RDFNS)):
+ current.predicate = absolutize(name)
+ elif name==RDF.li:
+ current.predicate = current.next_li()
+ elif name in PROPERTY_ELEMENT_EXCEPTIONS:
+ self.error("Invalid property element URI: %s" % name)
+ else:
+ current.predicate = absolutize(name)
+
+ id = atts.get(RDF.ID, None)
+ if id is not None:
+ if not is_ncname(id):
+ self.error("rdf:ID value is not a value NCName: %s" % id)
+ current.id = absolutize("#%s" % id)
+ else:
+ current.id = None
+
+ resource = atts.get(RDF.resource, None)
+ nodeID = atts.get(RDF.nodeID, None)
+ parse_type = atts.get(RDF.parseType, None)
+ if resource is not None and nodeID is not None:
+ self.error("Property element cannot have both rdf:nodeID and rdf:resource")
+ if resource is not None:
+ object = absolutize(resource)
+ next.start = self.node_element_start
+ next.end = self.node_element_end
+ elif nodeID is not None:
+ if not is_ncname(nodeID):
+ self.error("rdf:nodeID value is not a valid NCName: %s" % nodeID)
+ if self.preserve_bnode_ids is False:
+ if nodeID in self.bnode:
+ object = self.bnode[nodeID]
+ else:
+ subject = BNode()
+ self.bnode[nodeID] = subject
+ object = subject
+ else:
+ object = subject = BNode(nodeID)
+ next.start = self.node_element_start
+ next.end = self.node_element_end
+ else:
+ if parse_type is not None:
+ for att in atts:
+ if att!=RDF.parseType and att!=RDF.ID:
+ self.error("Property attr '%s' now allowed here" % att)
+ if parse_type=="Resource":
+ current.subject = object = BNode()
+ current.char = self.property_element_char
+ next.start = self.property_element_start
+ next.end = self.property_element_end
+ elif parse_type=="Collection":
+ current.char = None
+ object = current.list = RDF.nil #BNode()#self.parent.subject
+ next.start = self.node_element_start
+ next.end = self.list_node_element_end
+ else: #if parse_type=="Literal":
+ # All other values are treated as Literal
+ # See: http://www.w3.org/TR/rdf-syntax-grammar/#parseTypeOtherPropertyElt
+ object = Literal("", datatype=RDF.XMLLiteral)
+ current.char = self.literal_element_char
+ current.declared = {}
+ next.start = self.literal_element_start
+ next.char = self.literal_element_char
+ next.end = self.literal_element_end
+ current.object = object
+ return
+ else:
+ object = None
+ current.char = self.property_element_char
+ next.start = self.node_element_start
+ next.end = self.node_element_end
+
+ datatype = current.datatype = atts.get(RDF.datatype, None)
+ language = current.language
+ if datatype is not None:
+ # TODO: check that there are no atts other than datatype and id
+ datatype = absolutize(datatype)
+ else:
+ for att in atts:
+ if not att.startswith(str(RDFNS)):
+ predicate = absolutize(att)
+ elif att in PROPERTY_ELEMENT_ATTRIBUTES:
+ continue
+ elif att in PROPERTY_ATTRIBUTE_EXCEPTIONS:
+ self.error("""Invalid property attribute URI: %s""" % att)
+ else:
+ predicate = absolutize(att)
+
+ if att==RDF.type:
+ o = URIRef(atts[att])
+ else:
+ if datatype is not None:
+ language = None
+ o = Literal(atts[att], language, datatype)
+
+ if object is None:
+ object = BNode()
+ self.store.add((object, predicate, o))
+ if object is None:
+ current.data = ""
+ current.object = None
+ else:
+ current.data = None
+ current.object = object
+
+ def property_element_char(self, data):
+ current = self.current
+ if current.data is not None:
+ current.data += data
+
+ def property_element_end(self, name, qname):
+ current = self.current
+ if current.data is not None and current.object is None:
+ literalLang = current.language
+ if current.datatype is not None:
+ literalLang = None
+ current.object = Literal(current.data, literalLang, current.datatype)
+ current.data = None
+ if self.next.end==self.list_node_element_end:
+ if current.object!=RDF.nil:
+ self.store.add((current.list, RDF.rest, RDF.nil))
+ if current.object is not None:
+ self.store.add((self.parent.subject, current.predicate, current.object))
+ if current.id is not None:
+ self.add_reified(current.id, (self.parent.subject,
+ current.predicate, current.object))
+ current.subject = None
+
+ def list_node_element_end(self, name, qname):
+ current = self.current
+ if self.parent.list==RDF.nil:
+ list = BNode()
+ # Removed between 20030123 and 20030905
+ #self.store.add((list, RDF.type, LIST))
+ self.parent.list = list
+ self.store.add((self.parent.list, RDF.first, current.subject))
+ self.parent.object = list
+ self.parent.char = None
+ else:
+ list = BNode()
+ # Removed between 20030123 and 20030905
+ #self.store.add((list, RDF.type, LIST))
+ self.store.add((self.parent.list, RDF.rest, list))
+ self.store.add((list, RDF.first, current.subject))
+ self.parent.list = list
+
+ def literal_element_start(self, name, qname, attrs):
+ current = self.current
+ self.next.start = self.literal_element_start
+ self.next.char = self.literal_element_char
+ self.next.end = self.literal_element_end
+ current.declared = self.parent.declared.copy()
+ if name[0]:
+ prefix = self._current_context[name[0]]
+ if prefix:
+ current.object = "<%s:%s" % (prefix, name[1])
+ else:
+ current.object = "<%s" % name[1]
+ if not name[0] in current.declared:
+ current.declared[name[0]] = prefix
+ if prefix:
+ current.object += (' xmlns:%s="%s"' % (prefix, name[0]))
+ else:
+ current.object += (' xmlns="%s"' % name[0])
+ else:
+ current.object = "<%s" % name[1]
+
+ for (name, value) in attrs.items():
+ if name[0]:
+ if not name[0] in current.declared:
+ current.declared[name[0]] = self._current_context[name[0]]
+ name = current.declared[name[0]] + ":" + name[1]
+ else:
+ name = name[1]
+ current.object += (' %s=%s' % (name, quoteattr(value)))
+ current.object += ">"
+
+ def literal_element_char(self, data):
+ self.current.object += escape(data)
+
+ def literal_element_end(self, name, qname):
+ if name[0]:
+ prefix = self._current_context[name[0]]
+ if prefix:
+ end = u"</%s:%s>" % (prefix, name[1])
+ else:
+ end = u"</%s>" % name[1]
+ else:
+ end = u"</%s>" % name[1]
+ self.parent.object += self.current.object + end
+
+
+def create_parser(target, store):
+ parser = make_parser()
+ try:
+ # Workaround for bug in expatreader.py. Needed when
+ # expatreader is trying to guess a prefix.
+ parser.start_namespace_decl("xml", "http://www.w3.org/XML/1998/namespace")
+ except AttributeError:
+ pass # Not present in Jython (at least)
+ parser.setFeature(handler.feature_namespaces, 1)
+ rdfxml = RDFXMLHandler(store)
+ rdfxml.setDocumentLocator(target)
+ #rdfxml.setDocumentLocator(_Locator(self.url, self.parser))
+ parser.setContentHandler(rdfxml)
+ parser.setErrorHandler(ErrorHandler())
+ return parser
+
+
+class RDFXMLParser(Parser):
+
+ def __init__(self):
+ pass
+
+ def parse(self, source, sink, **args):
+ self._parser = create_parser(source, sink)
+ content_handler = self._parser.getContentHandler()
+ preserve_bnode_ids = args.get("preserve_bnode_ids", None)
+ if preserve_bnode_ids is not None:
+ content_handler.preserve_bnode_ids = preserve_bnode_ids
+ # We're only using it once now
+ #content_handler.reset()
+ #self._parser.reset()
+ self._parser.parse(source)
+
+
+
diff --git a/creactistore/_templates/lib/rdflib_/plugins/parsers/trix.py b/creactistore/_templates/lib/rdflib_/plugins/parsers/trix.py
new file mode 100644
index 0000000..9a1c3ba
--- /dev/null
+++ b/creactistore/_templates/lib/rdflib_/plugins/parsers/trix.py
@@ -0,0 +1,286 @@
+# Copyright (c) 2002, Daniel Krech, http://eikeon.com/
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met:
+#
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+#
+# * Redistributions in binary form must reproduce the above
+# copyright notice, this list of conditions and the following
+# disclaimer in the documentation and/or other materials provided
+# with the distribution.
+#
+# * Neither the name of Daniel Krech nor the names of its
+# contributors may be used to endorse or promote products derived
+# from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+"""
+"""
+from rdflib_.namespace import Namespace
+from rdflib_.term import URIRef
+from rdflib_.term import BNode
+from rdflib_.term import Literal
+from rdflib_.graph import Graph, ConjunctiveGraph
+from rdflib_.exceptions import ParserError
+from rdflib_.parser import Parser
+
+from xml.sax.saxutils import handler
+from xml.sax import make_parser
+from xml.sax.handler import ErrorHandler
+
+__all__ = ['create_parser', 'TriXHandler', 'TriXParser']
+
+
+TRIXNS=Namespace("http://www.w3.org/2004/03/trix/trix-1/")
+XMLNS=Namespace("http://www.w3.org/XML/1998/namespace")
+
+class TriXHandler(handler.ContentHandler):
+ """An Sax Handler for TriX. See http://sw.nokia.com/trix/"""
+
+ def __init__(self, store):
+ self.store = store
+ self.preserve_bnode_ids = False
+ self.reset()
+
+ def reset(self):
+ self.bnode = {}
+ self.graph=self.store
+ self.triple=None
+ self.state=0
+ self.lang=None
+ self.datatype=None
+
+ # ContentHandler methods
+
+ def setDocumentLocator(self, locator):
+ self.locator = locator
+
+ def startDocument(self):
+ pass
+
+ def startPrefixMapping(self, prefix, namespace):
+ pass
+
+ def endPrefixMapping(self, prefix):
+ pass
+
+ def startElementNS(self, name, qname, attrs):
+
+ if name[0]!=str(TRIXNS):
+ self.error("Only elements in the TriX namespace are allowed. %s!=%s"%(name[0],TRIXNS))
+
+ if name[1]=="TriX":
+ if self.state==0:
+ self.state=1
+ else:
+ self.error("Unexpected TriX element")
+
+ elif name[1]=="graph":
+ if self.state==1:
+ self.state=2
+ else:
+ self.error("Unexpected graph element")
+
+ elif name[1]=="uri":
+ if self.state==2:
+ # the context uri
+ self.state=3
+ elif self.state==4:
+ # part of a triple
+ pass
+ else:
+ self.error("Unexpected uri element")
+
+ elif name[1]=="triple":
+ if self.state==2:
+ if self.graph==None:
+ # anonymous graph, create one with random bnode id
+ self.graph=Graph(store=self.store.store)
+ # start of a triple
+ self.triple=[]
+ self.state=4
+ else:
+ self.error("Unexpected triple element")
+
+ elif name[1]=="typedLiteral":
+ if self.state==4:
+ # part of triple
+ self.lang=None
+ self.datatype=None
+
+ try:
+ self.lang=attrs.getValue((unicode(XMLNS), u"lang"))
+ except:
+ # language not required - ignore
+ pass
+ try:
+ self.datatype=attrs.getValueByQName(u"datatype")
+ except KeyError:
+ self.error("No required attribute 'datatype'")
+ else:
+ self.error("Unexpected typedLiteral element")
+
+ elif name[1]=="plainLiteral":
+ if self.state==4:
+ # part of triple
+ self.lang=None
+ self.datatype=None
+ try:
+ self.lang=attrs.getValue((unicode(XMLNS), u"lang"))
+ except:
+ # language not required - ignore
+ pass
+
+ else:
+ self.error("Unexpected plainLiteral element")
+
+ elif name[1]=="id":
+ if self.state==2:
+ # the context uri
+ self.state=3
+
+ elif self.state==4:
+ # part of triple
+ pass
+ else:
+ self.error("Unexpected id element")
+
+ else:
+ self.error("Unknown element %s in TriX namespace"%name[1])
+
+ self.chars=""
+
+
+ def endElementNS(self, name, qname):
+ if name[0]!=str(TRIXNS):
+ self.error("Only elements in the TriX namespace are allowed. %s!=%s"%(name[0], TRIXNS))
+
+ if name[1]=="uri":
+ if self.state==3:
+ self.graph=Graph(store=self.store.store, identifier=URIRef(self.chars.strip()))
+ self.state=2
+ elif self.state==4:
+ self.triple+=[URIRef(self.chars.strip())]
+ else:
+ self.error("Illegal internal self.state - This should never happen if the SAX parser ensures XML syntax correctness")
+
+ elif name[1]=="id":
+ if self.state==3:
+ self.graph=Graph(self.store.store,identifier=self.get_bnode(self.chars.strip()))
+ self.state=2
+ elif self.state==4:
+ self.triple+=[self.get_bnode(self.chars.strip())]
+ else:
+ self.error("Illegal internal self.state - This should never happen if the SAX parser ensures XML syntax correctness")
+
+ elif name[1]=="plainLiteral" or name[1]=="typedLiteral":
+ if self.state==4:
+ self.triple+=[Literal(self.chars, lang=self.lang, datatype=self.datatype)]
+ else:
+ self.error("This should never happen if the SAX parser ensures XML syntax correctness")
+
+ elif name[1]=="triple":
+ if self.state==4:
+ if len(self.triple)!=3:
+ self.error("Triple has wrong length, got %d elements: %s"%(len(self.triple),self.triple))
+
+ self.graph.add(self.triple)
+ #self.store.store.add(self.triple,context=self.graph)
+ #self.store.addN([self.triple+[self.graph]])
+ self.state=2
+ else:
+ self.error("This should never happen if the SAX parser ensures XML syntax correctness")
+
+ elif name[1]=="graph":
+ self.graph=None
+ self.state=1
+
+ elif name[1]=="TriX":
+ self.state=0
+
+ else:
+ self.error("Unexpected close element")
+
+
+ def get_bnode(self,label):
+ if self.preserve_bnode_ids:
+ bn=BNode(label)
+ else:
+ if label in self.bnode:
+ bn=self.bnode[label]
+ else:
+ bn=BNode(label)
+ self.bnode[label]=bn
+ return bn
+
+
+ def characters(self, content):
+ self.chars+=content
+
+
+ def ignorableWhitespace(self, content):
+ pass
+
+ def processingInstruction(self, target, data):
+ pass
+
+
+ def error(self, message):
+ locator = self.locator
+ info = "%s:%s:%s: " % (locator.getSystemId(),
+ locator.getLineNumber(), locator.getColumnNumber())
+ raise ParserError(info + message)
+
+
+def create_parser(store):
+ parser = make_parser()
+ try:
+ # Workaround for bug in expatreader.py. Needed when
+ # expatreader is trying to guess a prefix.
+ parser.start_namespace_decl("xml", "http://www.w3.org/XML/1998/namespace")
+ except AttributeError:
+ pass # Not present in Jython (at least)
+ parser.setFeature(handler.feature_namespaces, 1)
+ trix = TriXHandler(store)
+ parser.setContentHandler(trix)
+ parser.setErrorHandler(ErrorHandler())
+ return parser
+
+
+class TriXParser(Parser):
+ """A parser for TriX. See http://sw.nokia.com/trix/"""
+
+ def __init__(self):
+ pass
+
+ def parse(self, source, sink, **args):
+ assert sink.store.context_aware
+ g=ConjunctiveGraph(store=sink.store)
+
+ self._parser = create_parser(g)
+ content_handler = self._parser.getContentHandler()
+ preserve_bnode_ids = args.get("preserve_bnode_ids", None)
+ if preserve_bnode_ids is not None:
+ content_handler.preserve_bnode_ids = preserve_bnode_ids
+ # We're only using it once now
+ #content_handler.reset()
+ #self._parser.reset()
+ self._parser.parse(source)
+
+
+
diff --git a/creactistore/_templates/lib/rdflib_/plugins/parsers/trix.py~ b/creactistore/_templates/lib/rdflib_/plugins/parsers/trix.py~
new file mode 100644
index 0000000..0c2e708
--- /dev/null
+++ b/creactistore/_templates/lib/rdflib_/plugins/parsers/trix.py~
@@ -0,0 +1,286 @@
+# Copyright (c) 2002, Daniel Krech, http://eikeon.com/
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met:
+#
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+#
+# * Redistributions in binary form must reproduce the above
+# copyright notice, this list of conditions and the following
+# disclaimer in the documentation and/or other materials provided
+# with the distribution.
+#
+# * Neither the name of Daniel Krech nor the names of its
+# contributors may be used to endorse or promote products derived
+# from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+"""
+"""
+from rdflib.namespace import Namespace
+from rdflib.term import URIRef
+from rdflib.term import BNode
+from rdflib.term import Literal
+from rdflib.graph import Graph, ConjunctiveGraph
+from rdflib.exceptions import ParserError
+from rdflib.parser import Parser
+
+from xml.sax.saxutils import handler
+from xml.sax import make_parser
+from xml.sax.handler import ErrorHandler
+
+__all__ = ['create_parser', 'TriXHandler', 'TriXParser']
+
+
+TRIXNS=Namespace("http://www.w3.org/2004/03/trix/trix-1/")
+XMLNS=Namespace("http://www.w3.org/XML/1998/namespace")
+
+class TriXHandler(handler.ContentHandler):
+ """An Sax Handler for TriX. See http://sw.nokia.com/trix/"""
+
+ def __init__(self, store):
+ self.store = store
+ self.preserve_bnode_ids = False
+ self.reset()
+
+ def reset(self):
+ self.bnode = {}
+ self.graph=self.store
+ self.triple=None
+ self.state=0
+ self.lang=None
+ self.datatype=None
+
+ # ContentHandler methods
+
+ def setDocumentLocator(self, locator):
+ self.locator = locator
+
+ def startDocument(self):
+ pass
+
+ def startPrefixMapping(self, prefix, namespace):
+ pass
+
+ def endPrefixMapping(self, prefix):
+ pass
+
+ def startElementNS(self, name, qname, attrs):
+
+ if name[0]!=str(TRIXNS):
+ self.error("Only elements in the TriX namespace are allowed. %s!=%s"%(name[0],TRIXNS))
+
+ if name[1]=="TriX":
+ if self.state==0:
+ self.state=1
+ else:
+ self.error("Unexpected TriX element")
+
+ elif name[1]=="graph":
+ if self.state==1:
+ self.state=2
+ else:
+ self.error("Unexpected graph element")
+
+ elif name[1]=="uri":
+ if self.state==2:
+ # the context uri
+ self.state=3
+ elif self.state==4:
+ # part of a triple
+ pass
+ else:
+ self.error("Unexpected uri element")
+
+ elif name[1]=="triple":
+ if self.state==2:
+ if self.graph==None:
+ # anonymous graph, create one with random bnode id
+ self.graph=Graph(store=self.store.store)
+ # start of a triple
+ self.triple=[]
+ self.state=4
+ else:
+ self.error("Unexpected triple element")
+
+ elif name[1]=="typedLiteral":
+ if self.state==4:
+ # part of triple
+ self.lang=None
+ self.datatype=None
+
+ try:
+ self.lang=attrs.getValue((unicode(XMLNS), u"lang"))
+ except:
+ # language not required - ignore
+ pass
+ try:
+ self.datatype=attrs.getValueByQName(u"datatype")
+ except KeyError:
+ self.error("No required attribute 'datatype'")
+ else:
+ self.error("Unexpected typedLiteral element")
+
+ elif name[1]=="plainLiteral":
+ if self.state==4:
+ # part of triple
+ self.lang=None
+ self.datatype=None
+ try:
+ self.lang=attrs.getValue((unicode(XMLNS), u"lang"))
+ except:
+ # language not required - ignore
+ pass
+
+ else:
+ self.error("Unexpected plainLiteral element")
+
+ elif name[1]=="id":
+ if self.state==2:
+ # the context uri
+ self.state=3
+
+ elif self.state==4:
+ # part of triple
+ pass
+ else:
+ self.error("Unexpected id element")
+
+ else:
+ self.error("Unknown element %s in TriX namespace"%name[1])
+
+ self.chars=""
+
+
+ def endElementNS(self, name, qname):
+ if name[0]!=str(TRIXNS):
+ self.error("Only elements in the TriX namespace are allowed. %s!=%s"%(name[0], TRIXNS))
+
+ if name[1]=="uri":
+ if self.state==3:
+ self.graph=Graph(store=self.store.store, identifier=URIRef(self.chars.strip()))
+ self.state=2
+ elif self.state==4:
+ self.triple+=[URIRef(self.chars.strip())]
+ else:
+ self.error("Illegal internal self.state - This should never happen if the SAX parser ensures XML syntax correctness")
+
+ elif name[1]=="id":
+ if self.state==3:
+ self.graph=Graph(self.store.store,identifier=self.get_bnode(self.chars.strip()))
+ self.state=2
+ elif self.state==4:
+ self.triple+=[self.get_bnode(self.chars.strip())]
+ else:
+ self.error("Illegal internal self.state - This should never happen if the SAX parser ensures XML syntax correctness")
+
+ elif name[1]=="plainLiteral" or name[1]=="typedLiteral":
+ if self.state==4:
+ self.triple+=[Literal(self.chars, lang=self.lang, datatype=self.datatype)]
+ else:
+ self.error("This should never happen if the SAX parser ensures XML syntax correctness")
+
+ elif name[1]=="triple":
+ if self.state==4:
+ if len(self.triple)!=3:
+ self.error("Triple has wrong length, got %d elements: %s"%(len(self.triple),self.triple))
+
+ self.graph.add(self.triple)
+ #self.store.store.add(self.triple,context=self.graph)
+ #self.store.addN([self.triple+[self.graph]])
+ self.state=2
+ else:
+ self.error("This should never happen if the SAX parser ensures XML syntax correctness")
+
+ elif name[1]=="graph":
+ self.graph=None
+ self.state=1
+
+ elif name[1]=="TriX":
+ self.state=0
+
+ else:
+ self.error("Unexpected close element")
+
+
+ def get_bnode(self,label):
+ if self.preserve_bnode_ids:
+ bn=BNode(label)
+ else:
+ if label in self.bnode:
+ bn=self.bnode[label]
+ else:
+ bn=BNode(label)
+ self.bnode[label]=bn
+ return bn
+
+
+ def characters(self, content):
+ self.chars+=content
+
+
+ def ignorableWhitespace(self, content):
+ pass
+
+ def processingInstruction(self, target, data):
+ pass
+
+
+ def error(self, message):
+ locator = self.locator
+ info = "%s:%s:%s: " % (locator.getSystemId(),
+ locator.getLineNumber(), locator.getColumnNumber())
+ raise ParserError(info + message)
+
+
+def create_parser(store):
+ parser = make_parser()
+ try:
+ # Workaround for bug in expatreader.py. Needed when
+ # expatreader is trying to guess a prefix.
+ parser.start_namespace_decl("xml", "http://www.w3.org/XML/1998/namespace")
+ except AttributeError:
+ pass # Not present in Jython (at least)
+ parser.setFeature(handler.feature_namespaces, 1)
+ trix = TriXHandler(store)
+ parser.setContentHandler(trix)
+ parser.setErrorHandler(ErrorHandler())
+ return parser
+
+
+class TriXParser(Parser):
+ """A parser for TriX. See http://sw.nokia.com/trix/"""
+
+ def __init__(self):
+ pass
+
+ def parse(self, source, sink, **args):
+ assert sink.store.context_aware
+ g=ConjunctiveGraph(store=sink.store)
+
+ self._parser = create_parser(g)
+ content_handler = self._parser.getContentHandler()
+ preserve_bnode_ids = args.get("preserve_bnode_ids", None)
+ if preserve_bnode_ids is not None:
+ content_handler.preserve_bnode_ids = preserve_bnode_ids
+ # We're only using it once now
+ #content_handler.reset()
+ #self._parser.reset()
+ self._parser.parse(source)
+
+
+
diff --git a/creactistore/_templates/lib/rdflib_/plugins/serializers/__init__.py b/creactistore/_templates/lib/rdflib_/plugins/serializers/__init__.py
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/creactistore/_templates/lib/rdflib_/plugins/serializers/__init__.py
diff --git a/creactistore/_templates/lib/rdflib_/plugins/serializers/n3.py b/creactistore/_templates/lib/rdflib_/plugins/serializers/n3.py
new file mode 100644
index 0000000..c7177c1
--- /dev/null
+++ b/creactistore/_templates/lib/rdflib_/plugins/serializers/n3.py
@@ -0,0 +1,123 @@
+"""
+Notation 3 (N3) RDF graph serializer for RDFLib.
+"""
+from rdflib_.graph import Graph
+from rdflib_.namespace import Namespace, OWL
+from rdflib_.plugins.serializers.turtle import (TurtleSerializer,
+ SUBJECT, VERB, OBJECT)
+
+__all__ = ['N3Serializer']
+
+SWAP_LOG = Namespace("http://www.w3.org/2000/10/swap/log#")
+
+
+class N3Serializer(TurtleSerializer):
+
+ short_name = "n3"
+
+ def __init__(self, store, parent=None):
+ super(N3Serializer, self).__init__(store)
+ self.keywords.update({
+ OWL.sameAs: '=',
+ SWAP_LOG.implies: '=>'
+ })
+ self.parent = parent
+
+ def reset(self):
+ super(N3Serializer, self).reset()
+ self._stores = {}
+
+ def subjectDone(self, subject):
+ super(N3Serializer, self).subjectDone(subject)
+ if self.parent:
+ self.parent.subjectDone(subject)
+
+ def isDone(self, subject):
+ return (super(N3Serializer, self).isDone(subject)
+ and (not self.parent or self.parent.isDone(subject)))
+
+ def startDocument(self):
+ super(N3Serializer, self).startDocument()
+ #if not isinstance(self.store, N3Store):
+ # return
+ #
+ #all_list = [self.label(var) for var in
+ # self.store.get_universals(recurse=False)]
+ #all_list.sort()
+ #some_list = [self.label(var) for var in
+ # self.store.get_existentials(recurse=False)]
+ #some_list.sort()
+ #
+ #for var in all_list:
+ # self.write('\n'+self.indent()+'@forAll %s. '%var)
+ #for var in some_list:
+ # self.write('\n'+self.indent()+'@forSome %s. '%var)
+ #
+ #if (len(all_list) + len(some_list)) > 0:
+ # self.write('\n')
+
+ def endDocument(self):
+ if not self.parent:
+ super(N3Serializer, self).endDocument()
+
+ def indent(self, modifier=0):
+ indent = super(N3Serializer, self).indent(modifier)
+ if self.parent is not None:
+ indent += self.parent.indent()#modifier)
+ return indent
+
+ def preprocessTriple(self, triple):
+ super(N3Serializer, self).preprocessTriple(triple)
+ if isinstance(triple[0], Graph):
+ for t in triple[0]:
+ self.preprocessTriple(t)
+ if isinstance(triple[2], Graph):
+ for t in triple[2]:
+ self.preprocessTriple(t)
+
+ def getQName(self, uri, gen_prefix=True):
+ qname = None
+ if self.parent is not None:
+ qname = self.parent.getQName(uri, gen_prefix)
+ if qname is None:
+ qname = super(N3Serializer, self).getQName(uri, gen_prefix)
+ return qname
+
+ def statement(self, subject):
+ self.subjectDone(subject)
+ properties = self.buildPredicateHash(subject)
+ if len(properties) == 0:
+ return False
+ return (self.s_clause(subject)
+ or super(N3Serializer, self).statement(subject))
+
+ def path(self, node, position, newline=False):
+ if not self.p_clause(node, position):
+ super(N3Serializer, self).path(node, position, newline)
+
+ def s_clause(self, subject):
+ if isinstance(subject, Graph):
+ self.write('\n'+self.indent())
+ self.p_clause(subject, SUBJECT)
+ self.predicateList(subject)
+ self.write(' .')
+ return True
+ else:
+ return False
+
+ def p_clause(self, node, position):
+ if isinstance(node, Graph):
+ self.subjectDone(node)
+ if position is OBJECT:
+ self.write(' ')
+ self.write('{')
+ self.depth += 1
+ serializer = N3Serializer(node, parent=self)
+ serializer.serialize(self.stream)
+ self.depth -= 1
+ self.write(self.indent()+'}')
+ return True
+ else:
+ return False
+
+
diff --git a/creactistore/_templates/lib/rdflib_/plugins/serializers/n3.py~ b/creactistore/_templates/lib/rdflib_/plugins/serializers/n3.py~
new file mode 100644
index 0000000..63faf9d
--- /dev/null
+++ b/creactistore/_templates/lib/rdflib_/plugins/serializers/n3.py~
@@ -0,0 +1,123 @@
+"""
+Notation 3 (N3) RDF graph serializer for RDFLib.
+"""
+from rdflib.graph import Graph
+from rdflib.namespace import Namespace, OWL
+from rdflib.plugins.serializers.turtle import (TurtleSerializer,
+ SUBJECT, VERB, OBJECT)
+
+__all__ = ['N3Serializer']
+
+SWAP_LOG = Namespace("http://www.w3.org/2000/10/swap/log#")
+
+
+class N3Serializer(TurtleSerializer):
+
+ short_name = "n3"
+
+ def __init__(self, store, parent=None):
+ super(N3Serializer, self).__init__(store)
+ self.keywords.update({
+ OWL.sameAs: '=',
+ SWAP_LOG.implies: '=>'
+ })
+ self.parent = parent
+
+ def reset(self):
+ super(N3Serializer, self).reset()
+ self._stores = {}
+
+ def subjectDone(self, subject):
+ super(N3Serializer, self).subjectDone(subject)
+ if self.parent:
+ self.parent.subjectDone(subject)
+
+ def isDone(self, subject):
+ return (super(N3Serializer, self).isDone(subject)
+ and (not self.parent or self.parent.isDone(subject)))
+
+ def startDocument(self):
+ super(N3Serializer, self).startDocument()
+ #if not isinstance(self.store, N3Store):
+ # return
+ #
+ #all_list = [self.label(var) for var in
+ # self.store.get_universals(recurse=False)]
+ #all_list.sort()
+ #some_list = [self.label(var) for var in
+ # self.store.get_existentials(recurse=False)]
+ #some_list.sort()
+ #
+ #for var in all_list:
+ # self.write('\n'+self.indent()+'@forAll %s. '%var)
+ #for var in some_list:
+ # self.write('\n'+self.indent()+'@forSome %s. '%var)
+ #
+ #if (len(all_list) + len(some_list)) > 0:
+ # self.write('\n')
+
+ def endDocument(self):
+ if not self.parent:
+ super(N3Serializer, self).endDocument()
+
+ def indent(self, modifier=0):
+ indent = super(N3Serializer, self).indent(modifier)
+ if self.parent is not None:
+ indent += self.parent.indent()#modifier)
+ return indent
+
+ def preprocessTriple(self, triple):
+ super(N3Serializer, self).preprocessTriple(triple)
+ if isinstance(triple[0], Graph):
+ for t in triple[0]:
+ self.preprocessTriple(t)
+ if isinstance(triple[2], Graph):
+ for t in triple[2]:
+ self.preprocessTriple(t)
+
+ def getQName(self, uri, gen_prefix=True):
+ qname = None
+ if self.parent is not None:
+ qname = self.parent.getQName(uri, gen_prefix)
+ if qname is None:
+ qname = super(N3Serializer, self).getQName(uri, gen_prefix)
+ return qname
+
+ def statement(self, subject):
+ self.subjectDone(subject)
+ properties = self.buildPredicateHash(subject)
+ if len(properties) == 0:
+ return False
+ return (self.s_clause(subject)
+ or super(N3Serializer, self).statement(subject))
+
+ def path(self, node, position, newline=False):
+ if not self.p_clause(node, position):
+ super(N3Serializer, self).path(node, position, newline)
+
+ def s_clause(self, subject):
+ if isinstance(subject, Graph):
+ self.write('\n'+self.indent())
+ self.p_clause(subject, SUBJECT)
+ self.predicateList(subject)
+ self.write(' .')
+ return True
+ else:
+ return False
+
+ def p_clause(self, node, position):
+ if isinstance(node, Graph):
+ self.subjectDone(node)
+ if position is OBJECT:
+ self.write(' ')
+ self.write('{')
+ self.depth += 1
+ serializer = N3Serializer(node, parent=self)
+ serializer.serialize(self.stream)
+ self.depth -= 1
+ self.write(self.indent()+'}')
+ return True
+ else:
+ return False
+
+
diff --git a/creactistore/_templates/lib/rdflib_/plugins/serializers/nquads.py b/creactistore/_templates/lib/rdflib_/plugins/serializers/nquads.py
new file mode 100644
index 0000000..ff0eed0
--- /dev/null
+++ b/creactistore/_templates/lib/rdflib_/plugins/serializers/nquads.py
@@ -0,0 +1,34 @@
+import warnings
+
+from rdflib_.serializer import Serializer
+from rdflib_.py3compat import b
+
+from rdflib_.plugins.serializers.nt import _xmlcharref_encode
+
+__all__ = ['NQuadsSerializer']
+
+class NQuadsSerializer(Serializer):
+
+ def __init__(self, store):
+ if not store.context_aware:
+ raise Exception("NQuads serialization only makes sense for context-aware stores!")
+
+ super(NQuadsSerializer, self).__init__(store)
+
+ def serialize(self, stream, base=None, encoding=None, **args):
+ if base is not None:
+ warnings.warn("NQuadsSerializer does not support base.")
+ if encoding is not None:
+ warnings.warn("NQuadsSerializer does not use custom encoding.")
+ encoding = self.encoding
+ for context in self.store.contexts():
+ for triple in context:
+ stream.write(_nq_row(triple, context.identifier).encode(encoding, "replace"))
+ stream.write(b("\n"))
+
+def _nq_row(triple,context):
+ return u"%s %s %s %s .\n" % (triple[0].n3(),
+ triple[1].n3(),
+ _xmlcharref_encode(triple[2].n3()),
+ context.n3())
+
diff --git a/creactistore/_templates/lib/rdflib_/plugins/serializers/nquads.py~ b/creactistore/_templates/lib/rdflib_/plugins/serializers/nquads.py~
new file mode 100644
index 0000000..29e0dff
--- /dev/null
+++ b/creactistore/_templates/lib/rdflib_/plugins/serializers/nquads.py~
@@ -0,0 +1,34 @@
+import warnings
+
+from rdflib.serializer import Serializer
+from rdflib.py3compat import b
+
+from rdflib.plugins.serializers.nt import _xmlcharref_encode
+
+__all__ = ['NQuadsSerializer']
+
+class NQuadsSerializer(Serializer):
+
+ def __init__(self, store):
+ if not store.context_aware:
+ raise Exception("NQuads serialization only makes sense for context-aware stores!")
+
+ super(NQuadsSerializer, self).__init__(store)
+
+ def serialize(self, stream, base=None, encoding=None, **args):
+ if base is not None:
+ warnings.warn("NQuadsSerializer does not support base.")
+ if encoding is not None:
+ warnings.warn("NQuadsSerializer does not use custom encoding.")
+ encoding = self.encoding
+ for context in self.store.contexts():
+ for triple in context:
+ stream.write(_nq_row(triple, context.identifier).encode(encoding, "replace"))
+ stream.write(b("\n"))
+
+def _nq_row(triple,context):
+ return u"%s %s %s %s .\n" % (triple[0].n3(),
+ triple[1].n3(),
+ _xmlcharref_encode(triple[2].n3()),
+ context.n3())
+
diff --git a/creactistore/_templates/lib/rdflib_/plugins/serializers/nt.py b/creactistore/_templates/lib/rdflib_/plugins/serializers/nt.py
new file mode 100644
index 0000000..35a9d53
--- /dev/null
+++ b/creactistore/_templates/lib/rdflib_/plugins/serializers/nt.py
@@ -0,0 +1,76 @@
+"""
+N-Triples RDF graph serializer for RDFLib.
+See <http://www.w3.org/TR/rdf-testcases/#ntriples> for details about the
+format.
+"""
+from rdflib_.serializer import Serializer
+from rdflib_.py3compat import b
+import warnings
+
+__all__ = ['NTSerializer']
+
+class NTSerializer(Serializer):
+ """
+ Serializes RDF graphs to NTriples format.
+ """
+
+ def serialize(self, stream, base=None, encoding=None, **args):
+ if base is not None:
+ warnings.warn("NTSerializer does not support base.")
+ if encoding is not None:
+ warnings.warn("NTSerializer does not use custom encoding.")
+ encoding = self.encoding
+ for triple in self.store:
+ stream.write(_nt_row(triple).encode(encoding, "replace"))
+ stream.write(b("\n"))
+
+
+def _nt_row(triple):
+ return u"%s %s %s .\n" % (triple[0].n3(),
+ triple[1].n3(),
+ _xmlcharref_encode(triple[2].n3()))
+
+# from <http://code.activestate.com/recipes/303668/>
+def _xmlcharref_encode(unicode_data, encoding="ascii"):
+ """Emulate Python 2.3's 'xmlcharrefreplace' encoding error handler."""
+ chars = []
+
+ # nothing to do about xmlchars, but replace newlines with escapes:
+ unicode_data=unicode_data.replace("\n","\\n")
+ if unicode_data.startswith('"""'):
+ # Updated with Bernhard Schandl's patch...
+ # unicode_data = unicode_data.replace('"""', '"') # original
+
+ last_triplequote_pos = unicode_data.rfind('"""')
+ payload = unicode_data[3:last_triplequote_pos]
+ trail = unicode_data[last_triplequote_pos+3:]
+
+ # fix three-quotes encoding
+ payload = payload.replace('\\"""', '"""')
+
+ # corner case: if string ends with " it is already encoded.
+ # so we need to de-escape it before it will be re-escaped in the next step.
+ if payload.endswith('\\"'):
+ payload = payload.replace('\\"', '"')
+
+ # escape quotes in payload
+ payload = payload.replace('"', '\\"')
+
+ # reconstruct result using single quotes
+ unicode_data = '"%s"%s' % (payload, trail)
+
+ # Step through the unicode_data string one character at a time in
+ # order to catch unencodable characters:
+ for char in unicode_data:
+ try:
+ char.encode(encoding, 'strict')
+ except UnicodeError:
+ if ord(char) <= 0xFFFF:
+ chars.append('\\u%04X' % ord(char))
+ else:
+ chars.append('\\U%08X' % ord(char))
+ else:
+ chars.append(char)
+
+ return ''.join(chars)
+
diff --git a/creactistore/_templates/lib/rdflib_/plugins/serializers/nt.py~ b/creactistore/_templates/lib/rdflib_/plugins/serializers/nt.py~
new file mode 100644
index 0000000..bbbe720
--- /dev/null
+++ b/creactistore/_templates/lib/rdflib_/plugins/serializers/nt.py~
@@ -0,0 +1,76 @@
+"""
+N-Triples RDF graph serializer for RDFLib.
+See <http://www.w3.org/TR/rdf-testcases/#ntriples> for details about the
+format.
+"""
+from rdflib.serializer import Serializer
+from rdflib.py3compat import b
+import warnings
+
+__all__ = ['NTSerializer']
+
+class NTSerializer(Serializer):
+ """
+ Serializes RDF graphs to NTriples format.
+ """
+
+ def serialize(self, stream, base=None, encoding=None, **args):
+ if base is not None:
+ warnings.warn("NTSerializer does not support base.")
+ if encoding is not None:
+ warnings.warn("NTSerializer does not use custom encoding.")
+ encoding = self.encoding
+ for triple in self.store:
+ stream.write(_nt_row(triple).encode(encoding, "replace"))
+ stream.write(b("\n"))
+
+
+def _nt_row(triple):
+ return u"%s %s %s .\n" % (triple[0].n3(),
+ triple[1].n3(),
+ _xmlcharref_encode(triple[2].n3()))
+
+# from <http://code.activestate.com/recipes/303668/>
+def _xmlcharref_encode(unicode_data, encoding="ascii"):
+ """Emulate Python 2.3's 'xmlcharrefreplace' encoding error handler."""
+ chars = []
+
+ # nothing to do about xmlchars, but replace newlines with escapes:
+ unicode_data=unicode_data.replace("\n","\\n")
+ if unicode_data.startswith('"""'):
+ # Updated with Bernhard Schandl's patch...
+ # unicode_data = unicode_data.replace('"""', '"') # original
+
+ last_triplequote_pos = unicode_data.rfind('"""')
+ payload = unicode_data[3:last_triplequote_pos]
+ trail = unicode_data[last_triplequote_pos+3:]
+
+ # fix three-quotes encoding
+ payload = payload.replace('\\"""', '"""')
+
+ # corner case: if string ends with " it is already encoded.
+ # so we need to de-escape it before it will be re-escaped in the next step.
+ if payload.endswith('\\"'):
+ payload = payload.replace('\\"', '"')
+
+ # escape quotes in payload
+ payload = payload.replace('"', '\\"')
+
+ # reconstruct result using single quotes
+ unicode_data = '"%s"%s' % (payload, trail)
+
+ # Step through the unicode_data string one character at a time in
+ # order to catch unencodable characters:
+ for char in unicode_data:
+ try:
+ char.encode(encoding, 'strict')
+ except UnicodeError:
+ if ord(char) <= 0xFFFF:
+ chars.append('\\u%04X' % ord(char))
+ else:
+ chars.append('\\U%08X' % ord(char))
+ else:
+ chars.append(char)
+
+ return ''.join(chars)
+
diff --git a/creactistore/_templates/lib/rdflib_/plugins/serializers/rdfxml.py b/creactistore/_templates/lib/rdflib_/plugins/serializers/rdfxml.py
new file mode 100644
index 0000000..002f3e5
--- /dev/null
+++ b/creactistore/_templates/lib/rdflib_/plugins/serializers/rdfxml.py
@@ -0,0 +1,282 @@
+from __future__ import generators
+
+from rdflib_.plugins.serializers.xmlwriter import XMLWriter
+
+from rdflib_.namespace import Namespace, RDF, RDFS, split_uri
+
+from rdflib_.term import URIRef, Literal, BNode
+from rdflib_.util import first, more_than
+from rdflib_.collection import Collection
+from rdflib_.serializer import Serializer
+
+from rdflib_.exceptions import Error
+
+from rdflib_.py3compat import b
+
+from xml.sax.saxutils import quoteattr, escape
+
+__all__ = ['fix', 'XMLSerializer', 'PrettyXMLSerializer']
+
+class XMLSerializer(Serializer):
+
+ def __init__(self, store):
+ super(XMLSerializer, self).__init__(store)
+
+ def __bindings(self):
+ store = self.store
+ nm = store.namespace_manager
+ bindings = {}
+ for predicate in set(store.predicates()):
+ prefix, namespace, name = nm.compute_qname(predicate)
+ bindings[prefix] = URIRef(namespace)
+ RDFNS = URIRef("http://www.w3.org/1999/02/22-rdf-syntax-ns#")
+ if "rdf" in bindings:
+ assert bindings["rdf"]==RDFNS
+ else:
+ bindings["rdf"] = RDFNS
+ for prefix, namespace in bindings.iteritems():
+ yield prefix, namespace
+
+
+ def serialize(self, stream, base=None, encoding=None, **args):
+ self.base = base
+ self.__stream = stream
+ self.__serialized = {}
+ encoding = self.encoding
+ self.write = write = lambda uni: stream.write(uni.encode(encoding, 'replace'))
+
+ # startDocument
+ write('<?xml version="1.0" encoding="%s"?>\n' % self.encoding)
+
+ # startRDF
+ write('<rdf:RDF\n')
+ # If provided, write xml:base attribute for the RDF
+ if "xml_base" in args:
+ write(' xml:base="%s"\n' % args['xml_base'])
+ # TODO: assert(namespaces["http://www.w3.org/1999/02/22-rdf-syntax-ns#"]=='rdf')
+ bindings = list(self.__bindings())
+ bindings.sort()
+ for prefix, namespace in bindings:
+ if prefix:
+ write(' xmlns:%s="%s"\n' % (prefix, namespace))
+ else:
+ write(' xmlns="%s"\n' % namespace)
+ write('>\n')
+
+ # write out triples by subject
+ for subject in self.store.subjects():
+ self.subject(subject, 1)
+
+ # endRDF
+ write( "</rdf:RDF>\n" )
+
+ # Set to None so that the memory can get garbage collected.
+ #self.__serialized = None
+ del self.__serialized
+
+
+ def subject(self, subject, depth=1):
+ if not subject in self.__serialized:
+ self.__serialized[subject] = 1
+ if isinstance(subject, (BNode,URIRef)):
+ write = self.write
+ indent = " " * depth
+ element_name = "rdf:Description"
+ if isinstance(subject, BNode):
+ write( '%s<%s rdf:nodeID="%s"' %
+ (indent, element_name, subject))
+ else:
+ uri = quoteattr(self.relativize(subject))
+ write( "%s<%s rdf:about=%s" % (indent, element_name, uri))
+ if (subject, None, None) in self.store:
+ write( ">\n" )
+ for predicate, object in self.store.predicate_objects(subject):
+ self.predicate(predicate, object, depth+1)
+ write( "%s</%s>\n" % (indent, element_name))
+ else:
+ write( "/>\n" )
+
+ def predicate(self, predicate, object, depth=1):
+ write = self.write
+ indent = " " * depth
+ qname = self.store.namespace_manager.qname(predicate)
+ if isinstance(object, Literal):
+ attributes = ""
+ if object.language:
+ attributes += ' xml:lang="%s"'%object.language
+
+ if object.datatype:
+ attributes += ' rdf:datatype="%s"'%object.datatype
+
+ write("%s<%s%s>%s</%s>\n" %
+ (indent, qname, attributes,
+ escape(object), qname) )
+ else:
+ if isinstance(object, BNode):
+ write('%s<%s rdf:nodeID="%s"/>\n' %
+ (indent, qname, object))
+ else:
+ write("%s<%s rdf:resource=%s/>\n" %
+ (indent, qname, quoteattr(self.relativize(object))))
+
+
+
+XMLLANG = "http://www.w3.org/XML/1998/namespacelang"
+XMLBASE = "http://www.w3.org/XML/1998/namespacebase"
+OWL_NS = Namespace('http://www.w3.org/2002/07/owl#')
+
+# TODO:
+def fix(val):
+ "strip off _: from nodeIDs... as they are not valid NCNames"
+ if val.startswith("_:"):
+ return val[2:]
+ else:
+ return val
+
+
+class PrettyXMLSerializer(Serializer):
+
+ def __init__(self, store, max_depth=3):
+ super(PrettyXMLSerializer, self).__init__(store)
+ self.forceRDFAbout=set()
+
+ def serialize(self, stream, base=None, encoding=None, **args):
+ self.__serialized = {}
+ store = self.store
+ self.base = base
+ self.max_depth = args.get("max_depth", 3)
+ assert self.max_depth>0, "max_depth must be greater than 0"
+
+ self.nm = nm = store.namespace_manager
+ self.writer = writer = XMLWriter(stream, nm, encoding)
+
+ namespaces = {}
+ possible = set(store.predicates()).union(store.objects(None, RDF.type))
+ for predicate in possible:
+ prefix, namespace, local = nm.compute_qname(predicate)
+ namespaces[prefix] = namespace
+ namespaces["rdf"] = "http://www.w3.org/1999/02/22-rdf-syntax-ns#"
+ writer.push(RDF.RDF)
+ if "xml_base" in args:
+ writer.attribute(XMLBASE, args["xml_base"])
+ writer.namespaces(namespaces.iteritems())
+
+ # Write out subjects that can not be inline
+ for subject in store.subjects():
+ if (None, None, subject) in store:
+ if (subject, None, subject) in store:
+ self.subject(subject, 1)
+ else:
+ self.subject(subject, 1)
+
+ # write out anything that has not yet been reached
+ # write out BNodes last (to ensure they can be inlined where possible)
+ bnodes=set()
+ for subject in store.subjects():
+ if isinstance(subject,BNode):
+ bnodes.add(subject)
+ continue
+ self.subject(subject, 1)
+ #now serialize only those BNodes that have not been serialized yet
+ for bnode in bnodes:
+ if bnode not in self.__serialized:
+ self.subject(subject, 1)
+ writer.pop(RDF.RDF)
+ stream.write(b("\n"))
+
+ # Set to None so that the memory can get garbage collected.
+ self.__serialized = None
+
+
+
+ def subject(self, subject, depth=1):
+ store = self.store
+ writer = self.writer
+ if subject in self.forceRDFAbout:
+ writer.push(RDF.Description)
+ writer.attribute(RDF.about, self.relativize(subject))
+ writer.pop(RDF.Description)
+ self.forceRDFAbout.remove(subject)
+ elif not subject in self.__serialized:
+ self.__serialized[subject] = 1
+ type = first(store.objects(subject, RDF.type))
+ try:
+ self.nm.qname(type)
+ except:
+ type = None
+ element = type or RDF.Description
+ writer.push(element)
+ if isinstance(subject, BNode):
+ def subj_as_obj_more_than(ceil):
+ return True
+ # more_than(store.triples((None, None, subject)), ceil)
+
+ #here we only include BNode labels if they are referenced
+ #more than once (this reduces the use of redundant BNode identifiers)
+ if subj_as_obj_more_than(1):
+ writer.attribute(RDF.nodeID, fix(subject))
+ else:
+ writer.attribute(RDF.about, self.relativize(subject))
+ if (subject, None, None) in store:
+ for predicate, object in store.predicate_objects(subject):
+ if not (predicate==RDF.type and object==type):
+ self.predicate(predicate, object, depth+1)
+ writer.pop(element)
+ elif subject in self.forceRDFAbout:
+ writer.push(RDF.Description)
+ writer.attribute(RDF.about, self.relativize(subject))
+ writer.pop(RDF.Description)
+ self.forceRDFAbout.remove(subject)
+
+ def predicate(self, predicate, object, depth=1):
+ writer = self.writer
+ store = self.store
+ writer.push(predicate)
+ if isinstance(object, Literal):
+ attributes = ""
+ if object.language:
+ writer.attribute(XMLLANG, object.language)
+ if object.datatype:
+ writer.attribute(RDF.datatype, object.datatype)
+ writer.text(object)
+ elif object in self.__serialized or not (object, None, None) in store:
+ if isinstance(object, BNode):
+ if more_than(store.triples((None, None, object)), 0):
+ writer.attribute(RDF.nodeID, fix(object))
+ else:
+ writer.attribute(RDF.resource, self.relativize(object))
+ else:
+ if first(store.objects(object, RDF.first)): # may not have type RDF.List
+ collection = object
+ self.__serialized[object] = 1
+ # TODO: warn that any assertions on object other than
+ # RDF.first and RDF.rest are ignored... including RDF.List
+ writer.attribute(RDF.parseType, "Collection")
+ col=Collection(store,object)
+ for item in col:
+ if isinstance(item,URIRef):
+ self.forceRDFAbout.add(item)
+ self.subject(item)
+ if not isinstance(item,URIRef):
+ self.__serialized[item] = 1
+ else:
+ if first(store.triples_choices((object,
+ RDF.type,
+ [OWL_NS.Class,RDFS.Class]))) and\
+ isinstance(object, URIRef):
+ writer.attribute(RDF.resource, self.relativize(object))
+ elif depth<=self.max_depth:
+ self.subject(object, depth+1)
+ elif isinstance(object, BNode):
+ if not object in self.__serialized and \
+ (object, None, None) in store and \
+ len(list(store.subjects(object=object)))==1:
+ #inline blank nodes if they haven't been serialized yet and are
+ #only referenced once (regardless of depth)
+ self.subject(object, depth+1)
+ else:
+ writer.attribute(RDF.nodeID, fix(object))
+ else:
+ writer.attribute(RDF.resource, self.relativize(object))
+ writer.pop(predicate)
+
diff --git a/creactistore/_templates/lib/rdflib_/plugins/serializers/rdfxml.py~ b/creactistore/_templates/lib/rdflib_/plugins/serializers/rdfxml.py~
new file mode 100644
index 0000000..d72c27e
--- /dev/null
+++ b/creactistore/_templates/lib/rdflib_/plugins/serializers/rdfxml.py~
@@ -0,0 +1,282 @@
+from __future__ import generators
+
+from rdflib.plugins.serializers.xmlwriter import XMLWriter
+
+from rdflib.namespace import Namespace, RDF, RDFS, split_uri
+
+from rdflib.term import URIRef, Literal, BNode
+from rdflib.util import first, more_than
+from rdflib.collection import Collection
+from rdflib.serializer import Serializer
+
+from rdflib.exceptions import Error
+
+from rdflib.py3compat import b
+
+from xml.sax.saxutils import quoteattr, escape
+
+__all__ = ['fix', 'XMLSerializer', 'PrettyXMLSerializer']
+
+class XMLSerializer(Serializer):
+
+ def __init__(self, store):
+ super(XMLSerializer, self).__init__(store)
+
+ def __bindings(self):
+ store = self.store
+ nm = store.namespace_manager
+ bindings = {}
+ for predicate in set(store.predicates()):
+ prefix, namespace, name = nm.compute_qname(predicate)
+ bindings[prefix] = URIRef(namespace)
+ RDFNS = URIRef("http://www.w3.org/1999/02/22-rdf-syntax-ns#")
+ if "rdf" in bindings:
+ assert bindings["rdf"]==RDFNS
+ else:
+ bindings["rdf"] = RDFNS
+ for prefix, namespace in bindings.iteritems():
+ yield prefix, namespace
+
+
+ def serialize(self, stream, base=None, encoding=None, **args):
+ self.base = base
+ self.__stream = stream
+ self.__serialized = {}
+ encoding = self.encoding
+ self.write = write = lambda uni: stream.write(uni.encode(encoding, 'replace'))
+
+ # startDocument
+ write('<?xml version="1.0" encoding="%s"?>\n' % self.encoding)
+
+ # startRDF
+ write('<rdf:RDF\n')
+ # If provided, write xml:base attribute for the RDF
+ if "xml_base" in args:
+ write(' xml:base="%s"\n' % args['xml_base'])
+ # TODO: assert(namespaces["http://www.w3.org/1999/02/22-rdf-syntax-ns#"]=='rdf')
+ bindings = list(self.__bindings())
+ bindings.sort()
+ for prefix, namespace in bindings:
+ if prefix:
+ write(' xmlns:%s="%s"\n' % (prefix, namespace))
+ else:
+ write(' xmlns="%s"\n' % namespace)
+ write('>\n')
+
+ # write out triples by subject
+ for subject in self.store.subjects():
+ self.subject(subject, 1)
+
+ # endRDF
+ write( "</rdf:RDF>\n" )
+
+ # Set to None so that the memory can get garbage collected.
+ #self.__serialized = None
+ del self.__serialized
+
+
+ def subject(self, subject, depth=1):
+ if not subject in self.__serialized:
+ self.__serialized[subject] = 1
+ if isinstance(subject, (BNode,URIRef)):
+ write = self.write
+ indent = " " * depth
+ element_name = "rdf:Description"
+ if isinstance(subject, BNode):
+ write( '%s<%s rdf:nodeID="%s"' %
+ (indent, element_name, subject))
+ else:
+ uri = quoteattr(self.relativize(subject))
+ write( "%s<%s rdf:about=%s" % (indent, element_name, uri))
+ if (subject, None, None) in self.store:
+ write( ">\n" )
+ for predicate, object in self.store.predicate_objects(subject):
+ self.predicate(predicate, object, depth+1)
+ write( "%s</%s>\n" % (indent, element_name))
+ else:
+ write( "/>\n" )
+
+ def predicate(self, predicate, object, depth=1):
+ write = self.write
+ indent = " " * depth
+ qname = self.store.namespace_manager.qname(predicate)
+ if isinstance(object, Literal):
+ attributes = ""
+ if object.language:
+ attributes += ' xml:lang="%s"'%object.language
+
+ if object.datatype:
+ attributes += ' rdf:datatype="%s"'%object.datatype
+
+ write("%s<%s%s>%s</%s>\n" %
+ (indent, qname, attributes,
+ escape(object), qname) )
+ else:
+ if isinstance(object, BNode):
+ write('%s<%s rdf:nodeID="%s"/>\n' %
+ (indent, qname, object))
+ else:
+ write("%s<%s rdf:resource=%s/>\n" %
+ (indent, qname, quoteattr(self.relativize(object))))
+
+
+
+XMLLANG = "http://www.w3.org/XML/1998/namespacelang"
+XMLBASE = "http://www.w3.org/XML/1998/namespacebase"
+OWL_NS = Namespace('http://www.w3.org/2002/07/owl#')
+
+# TODO:
+def fix(val):
+ "strip off _: from nodeIDs... as they are not valid NCNames"
+ if val.startswith("_:"):
+ return val[2:]
+ else:
+ return val
+
+
+class PrettyXMLSerializer(Serializer):
+
+ def __init__(self, store, max_depth=3):
+ super(PrettyXMLSerializer, self).__init__(store)
+ self.forceRDFAbout=set()
+
+ def serialize(self, stream, base=None, encoding=None, **args):
+ self.__serialized = {}
+ store = self.store
+ self.base = base
+ self.max_depth = args.get("max_depth", 3)
+ assert self.max_depth>0, "max_depth must be greater than 0"
+
+ self.nm = nm = store.namespace_manager
+ self.writer = writer = XMLWriter(stream, nm, encoding)
+
+ namespaces = {}
+ possible = set(store.predicates()).union(store.objects(None, RDF.type))
+ for predicate in possible:
+ prefix, namespace, local = nm.compute_qname(predicate)
+ namespaces[prefix] = namespace
+ namespaces["rdf"] = "http://www.w3.org/1999/02/22-rdf-syntax-ns#"
+ writer.push(RDF.RDF)
+ if "xml_base" in args:
+ writer.attribute(XMLBASE, args["xml_base"])
+ writer.namespaces(namespaces.iteritems())
+
+ # Write out subjects that can not be inline
+ for subject in store.subjects():
+ if (None, None, subject) in store:
+ if (subject, None, subject) in store:
+ self.subject(subject, 1)
+ else:
+ self.subject(subject, 1)
+
+ # write out anything that has not yet been reached
+ # write out BNodes last (to ensure they can be inlined where possible)
+ bnodes=set()
+ for subject in store.subjects():
+ if isinstance(subject,BNode):
+ bnodes.add(subject)
+ continue
+ self.subject(subject, 1)
+ #now serialize only those BNodes that have not been serialized yet
+ for bnode in bnodes:
+ if bnode not in self.__serialized:
+ self.subject(subject, 1)
+ writer.pop(RDF.RDF)
+ stream.write(b("\n"))
+
+ # Set to None so that the memory can get garbage collected.
+ self.__serialized = None
+
+
+
+ def subject(self, subject, depth=1):
+ store = self.store
+ writer = self.writer
+ if subject in self.forceRDFAbout:
+ writer.push(RDF.Description)
+ writer.attribute(RDF.about, self.relativize(subject))
+ writer.pop(RDF.Description)
+ self.forceRDFAbout.remove(subject)
+ elif not subject in self.__serialized:
+ self.__serialized[subject] = 1
+ type = first(store.objects(subject, RDF.type))
+ try:
+ self.nm.qname(type)
+ except:
+ type = None
+ element = type or RDF.Description
+ writer.push(element)
+ if isinstance(subject, BNode):
+ def subj_as_obj_more_than(ceil):
+ return True
+ # more_than(store.triples((None, None, subject)), ceil)
+
+ #here we only include BNode labels if they are referenced
+ #more than once (this reduces the use of redundant BNode identifiers)
+ if subj_as_obj_more_than(1):
+ writer.attribute(RDF.nodeID, fix(subject))
+ else:
+ writer.attribute(RDF.about, self.relativize(subject))
+ if (subject, None, None) in store:
+ for predicate, object in store.predicate_objects(subject):
+ if not (predicate==RDF.type and object==type):
+ self.predicate(predicate, object, depth+1)
+ writer.pop(element)
+ elif subject in self.forceRDFAbout:
+ writer.push(RDF.Description)
+ writer.attribute(RDF.about, self.relativize(subject))
+ writer.pop(RDF.Description)
+ self.forceRDFAbout.remove(subject)
+
+ def predicate(self, predicate, object, depth=1):
+ writer = self.writer
+ store = self.store
+ writer.push(predicate)
+ if isinstance(object, Literal):
+ attributes = ""
+ if object.language:
+ writer.attribute(XMLLANG, object.language)
+ if object.datatype:
+ writer.attribute(RDF.datatype, object.datatype)
+ writer.text(object)
+ elif object in self.__serialized or not (object, None, None) in store:
+ if isinstance(object, BNode):
+ if more_than(store.triples((None, None, object)), 0):
+ writer.attribute(RDF.nodeID, fix(object))
+ else:
+ writer.attribute(RDF.resource, self.relativize(object))
+ else:
+ if first(store.objects(object, RDF.first)): # may not have type RDF.List
+ collection = object
+ self.__serialized[object] = 1
+ # TODO: warn that any assertions on object other than
+ # RDF.first and RDF.rest are ignored... including RDF.List
+ writer.attribute(RDF.parseType, "Collection")
+ col=Collection(store,object)
+ for item in col:
+ if isinstance(item,URIRef):
+ self.forceRDFAbout.add(item)
+ self.subject(item)
+ if not isinstance(item,URIRef):
+ self.__serialized[item] = 1
+ else:
+ if first(store.triples_choices((object,
+ RDF.type,
+ [OWL_NS.Class,RDFS.Class]))) and\
+ isinstance(object, URIRef):
+ writer.attribute(RDF.resource, self.relativize(object))
+ elif depth<=self.max_depth:
+ self.subject(object, depth+1)
+ elif isinstance(object, BNode):
+ if not object in self.__serialized and \
+ (object, None, None) in store and \
+ len(list(store.subjects(object=object)))==1:
+ #inline blank nodes if they haven't been serialized yet and are
+ #only referenced once (regardless of depth)
+ self.subject(object, depth+1)
+ else:
+ writer.attribute(RDF.nodeID, fix(object))
+ else:
+ writer.attribute(RDF.resource, self.relativize(object))
+ writer.pop(predicate)
+
diff --git a/creactistore/_templates/lib/rdflib_/plugins/serializers/trix.py b/creactistore/_templates/lib/rdflib_/plugins/serializers/trix.py
new file mode 100644
index 0000000..c89d049
--- /dev/null
+++ b/creactistore/_templates/lib/rdflib_/plugins/serializers/trix.py
@@ -0,0 +1,72 @@
+from rdflib_.serializer import Serializer
+from rdflib_.plugins.serializers.xmlwriter import XMLWriter
+
+from rdflib_.term import URIRef, Literal, BNode
+from rdflib_.namespace import Namespace
+
+from rdflib_.graph import Graph, ConjunctiveGraph
+
+from rdflib_.py3compat import b
+
+__all__ = ['TriXSerializer']
+
+## TODO: MOve this somewhere central
+TRIXNS=Namespace("http://www.w3.org/2004/03/trix/trix-1/")
+XMLNS=Namespace("http://www.w3.org/XML/1998/namespace")
+
+class TriXSerializer(Serializer):
+ def __init__(self, store):
+ super(TriXSerializer, self).__init__(store)
+
+ def serialize(self, stream, base=None, encoding=None, **args):
+
+ nm=self.store.namespace_manager
+
+ self.writer=XMLWriter(stream, nm, encoding, extra_ns={"": TRIXNS})
+
+ self.writer.push(TRIXNS[u"TriX"])
+ self.writer.namespaces()
+
+ if isinstance(self.store, ConjunctiveGraph):
+ for subgraph in self.store.contexts():
+ self._writeGraph(subgraph)
+ elif isinstance(self.store, Graph):
+ self._writeGraph(self.store)
+ else:
+ raise Exception("Unknown graph type: "+type(self.store))
+
+ self.writer.pop()
+ stream.write(b("\n"))
+
+
+ def _writeGraph(self, graph):
+ self.writer.push(TRIXNS[u"graph"])
+ if isinstance(graph.identifier, URIRef):
+ self.writer.element(TRIXNS[u"uri"], content=unicode(graph.identifier))
+
+ for triple in graph.triples((None,None,None)):
+ self._writeTriple(triple)
+ self.writer.pop()
+
+ def _writeTriple(self, triple):
+ self.writer.push(TRIXNS[u"triple"])
+ for component in triple:
+ if isinstance(component, URIRef):
+ self.writer.element(TRIXNS[u"uri"],
+ content=unicode(component))
+ elif isinstance(component, BNode):
+ self.writer.element(TRIXNS[u"id"],
+ content=unicode(component))
+ elif isinstance(component, Literal):
+ if component.datatype:
+ self.writer.element(TRIXNS[u"typedLiteral"],
+ content=unicode(component),
+ attributes={ TRIXNS[u"datatype"]: unicode(component.datatype) })
+ elif component.language:
+ self.writer.element(TRIXNS[u"plainLiteral"],
+ content=unicode(component),
+ attributes={ XMLNS[u"lang"]: unicode(component.language) })
+ else:
+ self.writer.element(TRIXNS[u"plainLiteral"],
+ content=unicode(component))
+ self.writer.pop()
diff --git a/creactistore/_templates/lib/rdflib_/plugins/serializers/trix.py~ b/creactistore/_templates/lib/rdflib_/plugins/serializers/trix.py~
new file mode 100644
index 0000000..c7115c0
--- /dev/null
+++ b/creactistore/_templates/lib/rdflib_/plugins/serializers/trix.py~
@@ -0,0 +1,72 @@
+from rdflib.serializer import Serializer
+from rdflib.plugins.serializers.xmlwriter import XMLWriter
+
+from rdflib.term import URIRef, Literal, BNode
+from rdflib.namespace import Namespace
+
+from rdflib.graph import Graph, ConjunctiveGraph
+
+from rdflib.py3compat import b
+
+__all__ = ['TriXSerializer']
+
+## TODO: MOve this somewhere central
+TRIXNS=Namespace("http://www.w3.org/2004/03/trix/trix-1/")
+XMLNS=Namespace("http://www.w3.org/XML/1998/namespace")
+
+class TriXSerializer(Serializer):
+ def __init__(self, store):
+ super(TriXSerializer, self).__init__(store)
+
+ def serialize(self, stream, base=None, encoding=None, **args):
+
+ nm=self.store.namespace_manager
+
+ self.writer=XMLWriter(stream, nm, encoding, extra_ns={"": TRIXNS})
+
+ self.writer.push(TRIXNS[u"TriX"])
+ self.writer.namespaces()
+
+ if isinstance(self.store, ConjunctiveGraph):
+ for subgraph in self.store.contexts():
+ self._writeGraph(subgraph)
+ elif isinstance(self.store, Graph):
+ self._writeGraph(self.store)
+ else:
+ raise Exception("Unknown graph type: "+type(self.store))
+
+ self.writer.pop()
+ stream.write(b("\n"))
+
+
+ def _writeGraph(self, graph):
+ self.writer.push(TRIXNS[u"graph"])
+ if isinstance(graph.identifier, URIRef):
+ self.writer.element(TRIXNS[u"uri"], content=unicode(graph.identifier))
+
+ for triple in graph.triples((None,None,None)):
+ self._writeTriple(triple)
+ self.writer.pop()
+
+ def _writeTriple(self, triple):
+ self.writer.push(TRIXNS[u"triple"])
+ for component in triple:
+ if isinstance(component, URIRef):
+ self.writer.element(TRIXNS[u"uri"],
+ content=unicode(component))
+ elif isinstance(component, BNode):
+ self.writer.element(TRIXNS[u"id"],
+ content=unicode(component))
+ elif isinstance(component, Literal):
+ if component.datatype:
+ self.writer.element(TRIXNS[u"typedLiteral"],
+ content=unicode(component),
+ attributes={ TRIXNS[u"datatype"]: unicode(component.datatype) })
+ elif component.language:
+ self.writer.element(TRIXNS[u"plainLiteral"],
+ content=unicode(component),
+ attributes={ XMLNS[u"lang"]: unicode(component.language) })
+ else:
+ self.writer.element(TRIXNS[u"plainLiteral"],
+ content=unicode(component))
+ self.writer.pop()
diff --git a/creactistore/_templates/lib/rdflib_/plugins/serializers/turtle.py b/creactistore/_templates/lib/rdflib_/plugins/serializers/turtle.py
new file mode 100644
index 0000000..bba4ed9
--- /dev/null
+++ b/creactistore/_templates/lib/rdflib_/plugins/serializers/turtle.py
@@ -0,0 +1,364 @@
+"""
+Turtle RDF graph serializer for RDFLib.
+See <http://www.w3.org/TeamSubmission/turtle/> for syntax specification.
+"""
+from rdflib_.term import BNode, Literal, URIRef
+
+from rdflib_.exceptions import Error
+
+from rdflib_.serializer import Serializer
+
+from rdflib_.namespace import RDF, RDFS
+
+__all__ = ['RecursiveSerializer', 'TurtleSerializer']
+
+class RecursiveSerializer(Serializer):
+
+ topClasses = [RDFS.Class]
+ predicateOrder = [RDF.type, RDFS.label]
+ maxDepth = 10
+ indentString = u" "
+
+ def __init__(self, store):
+
+ super(RecursiveSerializer, self).__init__(store)
+ self.stream = None
+ self.reset()
+
+ def addNamespace(self, prefix, uri):
+ self.namespaces[prefix] = uri
+
+ def checkSubject(self, subject):
+ """Check to see if the subject should be serialized yet"""
+ if ((self.isDone(subject))
+ or (subject not in self._subjects)
+ or ((subject in self._topLevels) and (self.depth > 1))
+ or (isinstance(subject, URIRef) and (self.depth >= self.maxDepth))
+ ):
+ return False
+ return True
+
+ def isDone(self, subject):
+ """Return true if subject is serialized"""
+ return subject in self._serialized
+
+ def orderSubjects(self):
+ seen = {}
+ subjects = []
+
+ for classURI in self.topClasses:
+ members = list(self.store.subjects(RDF.type, classURI))
+ members.sort()
+
+ for member in members:
+ subjects.append(member)
+ self._topLevels[member] = True
+ seen[member] = True
+
+ recursable = [(isinstance(subject,BNode), self.refCount(subject), subject) for subject in self._subjects
+ if subject not in seen]
+
+ recursable.sort()
+ subjects.extend([subject for (isbnode, refs, subject) in recursable])
+
+ return subjects
+
+ def preprocess(self):
+ for triple in self.store.triples((None,None,None)):
+ self.preprocessTriple(triple)
+
+ def preprocessTriple(self, (s,p,o)):
+ references = self.refCount(o) + 1
+ self._references[o] = references
+ self._subjects[s] = True
+
+ def refCount(self, node):
+ """Return the number of times this node has been referenced in the object position"""
+ return self._references.get(node, 0)
+
+ def reset(self):
+ self.depth = 0
+ self.lists = {}
+ self.namespaces = {}
+ self._references = {}
+ self._serialized = {}
+ self._subjects = {}
+ self._topLevels = {}
+
+ def buildPredicateHash(self, subject):
+ """Build a hash key by predicate to a list of objects for the given subject"""
+ properties = {}
+ for s,p,o in self.store.triples((subject, None, None)):
+ oList = properties.get(p, [])
+ oList.append(o)
+ properties[p] = oList
+ return properties
+
+ def sortProperties(self, properties):
+ """Take a hash from predicate uris to lists of values.
+ Sort the lists of values. Return a sorted list of properties."""
+ # Sort object lists
+ for prop, objects in properties.items():
+ objects.sort()
+
+ # Make sorted list of properties
+ propList = []
+ seen = {}
+ for prop in self.predicateOrder:
+ if (prop in properties) and (prop not in seen):
+ propList.append(prop)
+ seen[prop] = True
+ props = properties.keys()
+ props.sort()
+ for prop in props:
+ if prop not in seen:
+ propList.append(prop)
+ seen[prop] = True
+ return propList
+
+ def subjectDone(self, subject):
+ """Mark a subject as done."""
+ self._serialized[subject] = True
+
+ def indent(self, modifier=0):
+ """Returns indent string multiplied by the depth"""
+ return (self.depth+modifier)*self.indentString
+
+ def write(self, text):
+ """Write text in given encoding."""
+ self.stream.write(text.encode(self.encoding, 'replace'))
+
+
+SUBJECT = 0
+VERB = 1
+OBJECT = 2
+
+_GEN_QNAME_FOR_DT = False
+_SPACIOUS_OUTPUT = False
+
+
+class TurtleSerializer(RecursiveSerializer):
+
+ short_name = "turtle"
+ indentString = ' '
+
+ def __init__(self, store):
+ super(TurtleSerializer, self).__init__(store)
+ self.keywords = {
+ RDF.type: 'a'
+ }
+ self.reset()
+ self.stream = None
+ self._spacious = _SPACIOUS_OUTPUT
+
+ def reset(self):
+ super(TurtleSerializer, self).reset()
+ self._shortNames = {}
+ self._started = False
+
+ def serialize(self, stream, base=None, encoding=None, spacious=None, **args):
+ self.reset()
+ self.stream = stream
+ self.base = base
+
+ if spacious is not None:
+ self._spacious = spacious
+ # In newer rdflibs these are always in the namespace manager
+ #self.store.prefix_mapping('rdf', RDFNS)
+ #self.store.prefix_mapping('rdfs', RDFSNS)
+
+ self.preprocess()
+ subjects_list = self.orderSubjects()
+
+ self.startDocument()
+
+ firstTime = True
+ for subject in subjects_list:
+ if self.isDone(subject):
+ continue
+ if firstTime:
+ firstTime = False
+ if self.statement(subject) and not firstTime:
+ self.write('\n')
+
+ self.endDocument()
+ stream.write(u"\n".encode('ascii'))
+
+ def preprocessTriple(self, triple):
+ super(TurtleSerializer, self).preprocessTriple(triple)
+ for i, node in enumerate(triple):
+ if node in self.keywords:
+ continue
+ # Don't use generated prefixes for subjects and objects
+ self.getQName(node, gen_prefix=(i==VERB))
+ if isinstance(node, Literal) and node.datatype:
+ self.getQName(node.datatype, gen_prefix=_GEN_QNAME_FOR_DT)
+ p = triple[1]
+ if isinstance(p, BNode):
+ self._references[p] = self.refCount(p) + 1
+
+ def getQName(self, uri, gen_prefix=True):
+ if not isinstance(uri, URIRef):
+ return None
+
+ parts=None
+
+ try:
+ parts = self.store.compute_qname(uri, generate=gen_prefix)
+ except:
+
+ # is the uri a namespace in itself?
+ pfx = self.store.store.prefix(uri)
+
+ if pfx is not None:
+ parts = (pfx, uri, '')
+ else:
+ # nothing worked
+ return None
+
+ prefix, namespace, local = parts
+ # Local parts with '.' will mess up serialization
+ if '.' in local:
+ return None
+ self.addNamespace(prefix, namespace)
+ return u'%s:%s' % (prefix, local)
+
+ def startDocument(self):
+ self._started = True
+ ns_list = sorted(self.namespaces.items())
+ for prefix, uri in ns_list:
+ self.write(self.indent()+'@prefix %s: <%s> .\n' % (prefix, uri))
+ if ns_list and self._spacious:
+ self.write('\n')
+
+ def endDocument(self):
+ if self._spacious:
+ self.write('\n')
+
+ def statement(self, subject):
+ self.subjectDone(subject)
+ return self.s_squared(subject) or self.s_default(subject)
+
+ def s_default(self, subject):
+ self.write('\n'+self.indent())
+ self.path(subject, SUBJECT)
+ self.predicateList(subject)
+ self.write(' .')
+ return True
+
+ def s_squared(self, subject):
+ if (self.refCount(subject) > 0) or not isinstance(subject, BNode):
+ return False
+ self.write('\n'+self.indent()+'[]')
+ #self.depth+=1
+ self.predicateList(subject)
+ #self.depth-=1
+ self.write(' .')
+ return True
+
+ def path(self, node, position, newline=False):
+ if not (self.p_squared(node, position, newline)
+ or self.p_default(node, position, newline)):
+ raise Error("Cannot serialize node '%s'"%(node, ))
+
+ def p_default(self, node, position, newline=False):
+ if position != SUBJECT and not newline:
+ self.write(' ')
+ self.write(self.label(node, position))
+ return True
+
+ def label(self, node, position):
+ if node == RDF.nil:
+ return '()'
+ if position is VERB and node in self.keywords:
+ return self.keywords[node]
+ if isinstance(node, Literal):
+ return node._literal_n3(use_plain=True,
+ qname_callback=lambda dt:
+ self.getQName(dt, _GEN_QNAME_FOR_DT))
+ else:
+ return self.getQName(node, position==VERB) or node.n3()
+
+ def p_squared(self, node, position, newline=False):
+ if (not isinstance(node, BNode)
+ or node in self._serialized
+ or self.refCount(node) > 1
+ or position == SUBJECT):
+ return False
+
+ if not newline:
+ self.write(' ')
+
+ if self.isValidList(node):
+ # this is a list
+ self.write('(')
+ self.depth += 1#2
+ self.doList(node)
+ self.depth -= 1#2
+ self.write(' )')
+ else:
+ self.subjectDone(node)
+ self.depth += 2
+ #self.write('[\n' + self.indent())
+ self.write('[')
+ self.depth -= 1
+ #self.predicateList(node, newline=True)
+ self.predicateList(node, newline=False)
+ #self.write('\n' + self.indent() + ']')
+ self.write(' ]')
+ self.depth -= 1
+
+ return True
+
+ def isValidList(self, l):
+ """
+ Checks if l is a valid RDF list, i.e. no nodes have other properties.
+ """
+ try:
+ if not self.store.value(l, RDF.first):
+ return False
+ except:
+ return False
+ while l:
+ if l != RDF.nil and len(
+ list(self.store.predicate_objects(l))) != 2:
+ return False
+ l = self.store.value(l, RDF.rest)
+ return True
+
+ def doList(self,l):
+ while l:
+ item = self.store.value(l, RDF.first)
+ if item:
+ self.path(item, OBJECT)
+ self.subjectDone(l)
+ l = self.store.value(l, RDF.rest)
+
+ def predicateList(self, subject, newline=False):
+ properties = self.buildPredicateHash(subject)
+ propList = self.sortProperties(properties)
+ if len(propList) == 0:
+ return
+ self.verb(propList[0], newline=newline)
+ self.objectList(properties[propList[0]])
+ for predicate in propList[1:]:
+ self.write(';\n' + self.indent(1))
+ self.verb(predicate, newline=True)
+ self.objectList(properties[predicate])
+
+ def verb(self, node, newline=False):
+ self.path(node, VERB, newline)
+
+ def objectList(self, objects):
+ count = len(objects)
+ if count == 0:
+ return
+ depthmod = (count == 1) and 0 or 1
+ self.depth += depthmod
+ self.path(objects[0], OBJECT)
+ for obj in objects[1:]:
+ self.write(',\n' + self.indent(1))
+ self.path(obj, OBJECT, newline=True)
+ self.depth -= depthmod
+
+
diff --git a/creactistore/_templates/lib/rdflib_/plugins/serializers/turtle.py~ b/creactistore/_templates/lib/rdflib_/plugins/serializers/turtle.py~
new file mode 100644
index 0000000..6878013
--- /dev/null
+++ b/creactistore/_templates/lib/rdflib_/plugins/serializers/turtle.py~
@@ -0,0 +1,364 @@
+"""
+Turtle RDF graph serializer for RDFLib.
+See <http://www.w3.org/TeamSubmission/turtle/> for syntax specification.
+"""
+from rdflib.term import BNode, Literal, URIRef
+
+from rdflib.exceptions import Error
+
+from rdflib.serializer import Serializer
+
+from rdflib.namespace import RDF, RDFS
+
+__all__ = ['RecursiveSerializer', 'TurtleSerializer']
+
+class RecursiveSerializer(Serializer):
+
+ topClasses = [RDFS.Class]
+ predicateOrder = [RDF.type, RDFS.label]
+ maxDepth = 10
+ indentString = u" "
+
+ def __init__(self, store):
+
+ super(RecursiveSerializer, self).__init__(store)
+ self.stream = None
+ self.reset()
+
+ def addNamespace(self, prefix, uri):
+ self.namespaces[prefix] = uri
+
+ def checkSubject(self, subject):
+ """Check to see if the subject should be serialized yet"""
+ if ((self.isDone(subject))
+ or (subject not in self._subjects)
+ or ((subject in self._topLevels) and (self.depth > 1))
+ or (isinstance(subject, URIRef) and (self.depth >= self.maxDepth))
+ ):
+ return False
+ return True
+
+ def isDone(self, subject):
+ """Return true if subject is serialized"""
+ return subject in self._serialized
+
+ def orderSubjects(self):
+ seen = {}
+ subjects = []
+
+ for classURI in self.topClasses:
+ members = list(self.store.subjects(RDF.type, classURI))
+ members.sort()
+
+ for member in members:
+ subjects.append(member)
+ self._topLevels[member] = True
+ seen[member] = True
+
+ recursable = [(isinstance(subject,BNode), self.refCount(subject), subject) for subject in self._subjects
+ if subject not in seen]
+
+ recursable.sort()
+ subjects.extend([subject for (isbnode, refs, subject) in recursable])
+
+ return subjects
+
+ def preprocess(self):
+ for triple in self.store.triples((None,None,None)):
+ self.preprocessTriple(triple)
+
+ def preprocessTriple(self, (s,p,o)):
+ references = self.refCount(o) + 1
+ self._references[o] = references
+ self._subjects[s] = True
+
+ def refCount(self, node):
+ """Return the number of times this node has been referenced in the object position"""
+ return self._references.get(node, 0)
+
+ def reset(self):
+ self.depth = 0
+ self.lists = {}
+ self.namespaces = {}
+ self._references = {}
+ self._serialized = {}
+ self._subjects = {}
+ self._topLevels = {}
+
+ def buildPredicateHash(self, subject):
+ """Build a hash key by predicate to a list of objects for the given subject"""
+ properties = {}
+ for s,p,o in self.store.triples((subject, None, None)):
+ oList = properties.get(p, [])
+ oList.append(o)
+ properties[p] = oList
+ return properties
+
+ def sortProperties(self, properties):
+ """Take a hash from predicate uris to lists of values.
+ Sort the lists of values. Return a sorted list of properties."""
+ # Sort object lists
+ for prop, objects in properties.items():
+ objects.sort()
+
+ # Make sorted list of properties
+ propList = []
+ seen = {}
+ for prop in self.predicateOrder:
+ if (prop in properties) and (prop not in seen):
+ propList.append(prop)
+ seen[prop] = True
+ props = properties.keys()
+ props.sort()
+ for prop in props:
+ if prop not in seen:
+ propList.append(prop)
+ seen[prop] = True
+ return propList
+
+ def subjectDone(self, subject):
+ """Mark a subject as done."""
+ self._serialized[subject] = True
+
+ def indent(self, modifier=0):
+ """Returns indent string multiplied by the depth"""
+ return (self.depth+modifier)*self.indentString
+
+ def write(self, text):
+ """Write text in given encoding."""
+ self.stream.write(text.encode(self.encoding, 'replace'))
+
+
+SUBJECT = 0
+VERB = 1
+OBJECT = 2
+
+_GEN_QNAME_FOR_DT = False
+_SPACIOUS_OUTPUT = False
+
+
+class TurtleSerializer(RecursiveSerializer):
+
+ short_name = "turtle"
+ indentString = ' '
+
+ def __init__(self, store):
+ super(TurtleSerializer, self).__init__(store)
+ self.keywords = {
+ RDF.type: 'a'
+ }
+ self.reset()
+ self.stream = None
+ self._spacious = _SPACIOUS_OUTPUT
+
+ def reset(self):
+ super(TurtleSerializer, self).reset()
+ self._shortNames = {}
+ self._started = False
+
+ def serialize(self, stream, base=None, encoding=None, spacious=None, **args):
+ self.reset()
+ self.stream = stream
+ self.base = base
+
+ if spacious is not None:
+ self._spacious = spacious
+ # In newer rdflibs these are always in the namespace manager
+ #self.store.prefix_mapping('rdf', RDFNS)
+ #self.store.prefix_mapping('rdfs', RDFSNS)
+
+ self.preprocess()
+ subjects_list = self.orderSubjects()
+
+ self.startDocument()
+
+ firstTime = True
+ for subject in subjects_list:
+ if self.isDone(subject):
+ continue
+ if firstTime:
+ firstTime = False
+ if self.statement(subject) and not firstTime:
+ self.write('\n')
+
+ self.endDocument()
+ stream.write(u"\n".encode('ascii'))
+
+ def preprocessTriple(self, triple):
+ super(TurtleSerializer, self).preprocessTriple(triple)
+ for i, node in enumerate(triple):
+ if node in self.keywords:
+ continue
+ # Don't use generated prefixes for subjects and objects
+ self.getQName(node, gen_prefix=(i==VERB))
+ if isinstance(node, Literal) and node.datatype:
+ self.getQName(node.datatype, gen_prefix=_GEN_QNAME_FOR_DT)
+ p = triple[1]
+ if isinstance(p, BNode):
+ self._references[p] = self.refCount(p) + 1
+
+ def getQName(self, uri, gen_prefix=True):
+ if not isinstance(uri, URIRef):
+ return None
+
+ parts=None
+
+ try:
+ parts = self.store.compute_qname(uri, generate=gen_prefix)
+ except:
+
+ # is the uri a namespace in itself?
+ pfx = self.store.store.prefix(uri)
+
+ if pfx is not None:
+ parts = (pfx, uri, '')
+ else:
+ # nothing worked
+ return None
+
+ prefix, namespace, local = parts
+ # Local parts with '.' will mess up serialization
+ if '.' in local:
+ return None
+ self.addNamespace(prefix, namespace)
+ return u'%s:%s' % (prefix, local)
+
+ def startDocument(self):
+ self._started = True
+ ns_list = sorted(self.namespaces.items())
+ for prefix, uri in ns_list:
+ self.write(self.indent()+'@prefix %s: <%s> .\n' % (prefix, uri))
+ if ns_list and self._spacious:
+ self.write('\n')
+
+ def endDocument(self):
+ if self._spacious:
+ self.write('\n')
+
+ def statement(self, subject):
+ self.subjectDone(subject)
+ return self.s_squared(subject) or self.s_default(subject)
+
+ def s_default(self, subject):
+ self.write('\n'+self.indent())
+ self.path(subject, SUBJECT)
+ self.predicateList(subject)
+ self.write(' .')
+ return True
+
+ def s_squared(self, subject):
+ if (self.refCount(subject) > 0) or not isinstance(subject, BNode):
+ return False
+ self.write('\n'+self.indent()+'[]')
+ #self.depth+=1
+ self.predicateList(subject)
+ #self.depth-=1
+ self.write(' .')
+ return True
+
+ def path(self, node, position, newline=False):
+ if not (self.p_squared(node, position, newline)
+ or self.p_default(node, position, newline)):
+ raise Error("Cannot serialize node '%s'"%(node, ))
+
+ def p_default(self, node, position, newline=False):
+ if position != SUBJECT and not newline:
+ self.write(' ')
+ self.write(self.label(node, position))
+ return True
+
+ def label(self, node, position):
+ if node == RDF.nil:
+ return '()'
+ if position is VERB and node in self.keywords:
+ return self.keywords[node]
+ if isinstance(node, Literal):
+ return node._literal_n3(use_plain=True,
+ qname_callback=lambda dt:
+ self.getQName(dt, _GEN_QNAME_FOR_DT))
+ else:
+ return self.getQName(node, position==VERB) or node.n3()
+
+ def p_squared(self, node, position, newline=False):
+ if (not isinstance(node, BNode)
+ or node in self._serialized
+ or self.refCount(node) > 1
+ or position == SUBJECT):
+ return False
+
+ if not newline:
+ self.write(' ')
+
+ if self.isValidList(node):
+ # this is a list
+ self.write('(')
+ self.depth += 1#2
+ self.doList(node)
+ self.depth -= 1#2
+ self.write(' )')
+ else:
+ self.subjectDone(node)
+ self.depth += 2
+ #self.write('[\n' + self.indent())
+ self.write('[')
+ self.depth -= 1
+ #self.predicateList(node, newline=True)
+ self.predicateList(node, newline=False)
+ #self.write('\n' + self.indent() + ']')
+ self.write(' ]')
+ self.depth -= 1
+
+ return True
+
+ def isValidList(self, l):
+ """
+ Checks if l is a valid RDF list, i.e. no nodes have other properties.
+ """
+ try:
+ if not self.store.value(l, RDF.first):
+ return False
+ except:
+ return False
+ while l:
+ if l != RDF.nil and len(
+ list(self.store.predicate_objects(l))) != 2:
+ return False
+ l = self.store.value(l, RDF.rest)
+ return True
+
+ def doList(self,l):
+ while l:
+ item = self.store.value(l, RDF.first)
+ if item:
+ self.path(item, OBJECT)
+ self.subjectDone(l)
+ l = self.store.value(l, RDF.rest)
+
+ def predicateList(self, subject, newline=False):
+ properties = self.buildPredicateHash(subject)
+ propList = self.sortProperties(properties)
+ if len(propList) == 0:
+ return
+ self.verb(propList[0], newline=newline)
+ self.objectList(properties[propList[0]])
+ for predicate in propList[1:]:
+ self.write(';\n' + self.indent(1))
+ self.verb(predicate, newline=True)
+ self.objectList(properties[predicate])
+
+ def verb(self, node, newline=False):
+ self.path(node, VERB, newline)
+
+ def objectList(self, objects):
+ count = len(objects)
+ if count == 0:
+ return
+ depthmod = (count == 1) and 0 or 1
+ self.depth += depthmod
+ self.path(objects[0], OBJECT)
+ for obj in objects[1:]:
+ self.write(',\n' + self.indent(1))
+ self.path(obj, OBJECT, newline=True)
+ self.depth -= depthmod
+
+
diff --git a/creactistore/_templates/lib/rdflib_/plugins/serializers/xmlwriter.py b/creactistore/_templates/lib/rdflib_/plugins/serializers/xmlwriter.py
new file mode 100644
index 0000000..d36af4b
--- /dev/null
+++ b/creactistore/_templates/lib/rdflib_/plugins/serializers/xmlwriter.py
@@ -0,0 +1,103 @@
+import codecs
+from xml.sax.saxutils import quoteattr, escape
+
+__all__ = ['XMLWriter']
+
+class XMLWriter(object):
+ def __init__(self, stream, namespace_manager, encoding=None, decl=1, extra_ns={}):
+ encoding = encoding or 'utf-8'
+ encoder, decoder, stream_reader, stream_writer = codecs.lookup(encoding)
+ self.stream = stream = stream_writer(stream)
+ if decl:
+ stream.write('<?xml version="1.0" encoding="%s"?>' % encoding)
+ self.element_stack = []
+ self.nm = namespace_manager
+ self.extra_ns=extra_ns
+ self.closed = True
+
+ def __get_indent(self):
+ return " " * len(self.element_stack)
+ indent = property(__get_indent)
+
+ def __close_start_tag(self):
+ if not self.closed: # TODO:
+ self.closed = True
+ self.stream.write(">")
+
+ def push(self, uri):
+ self.__close_start_tag()
+ write = self.stream.write
+ write("\n")
+ write(self.indent)
+ write("<%s" % self.qname(uri))
+ self.element_stack.append(uri)
+ self.closed = False
+ self.parent = False
+
+ def pop(self, uri=None):
+ top = self.element_stack.pop()
+ if uri:
+ assert uri == top
+ write = self.stream.write
+ if not self.closed:
+ self.closed = True
+ write("/>")
+ else:
+ if self.parent:
+ write("\n")
+ write(self.indent)
+ write("</%s>" % self.qname(top))
+ self.parent = True
+
+ def element(self, uri, content, attributes={}):
+ """Utility method for adding a complete simple element"""
+ self.push(uri)
+ for k, v in attributes.iteritems():
+ self.attribute(k,v)
+ self.text(content)
+ self.pop()
+
+ def namespaces(self, namespaces=None):
+ if not namespaces:
+ namespaces=self.nm.namespaces()
+
+ write = self.stream.write
+ write("\n")
+ for prefix, namespace in namespaces:
+ if prefix:
+ write(' xmlns:%s="%s"\n' % (prefix, namespace))
+ else:
+ write(' xmlns="%s"\n' % namespace)
+
+ for prefix, namespace in self.extra_ns.items():
+ if prefix:
+ write(' xmlns:%s="%s"\n' % (prefix, namespace))
+ else:
+ write(' xmlns="%s"\n' % namespace)
+
+
+ def attribute(self, uri, value):
+ write = self.stream.write
+ write(" %s=%s" % (self.qname(uri), quoteattr(value)))
+
+ def text(self, text):
+ self.__close_start_tag()
+ if "<" in text and ">" in text and not "]]>" in text:
+ self.stream.write("<![CDATA[")
+ self.stream.write(text)
+ self.stream.write("]]>")
+ else:
+ self.stream.write(escape(text))
+
+ def qname(self,uri):
+ """Compute qname for a uri using our extra namespaces,
+ or the given namespace manager"""
+
+ for pre,ns in self.extra_ns.items():
+ if uri.startswith(ns):
+ if pre!="":
+ return ":".join(pre,uri[len(ns):])
+ else:
+ return uri[len(ns):]
+
+ return self.nm.qname(uri)
diff --git a/creactistore/_templates/lib/rdflib_/plugins/serializers/xmlwriter.py~ b/creactistore/_templates/lib/rdflib_/plugins/serializers/xmlwriter.py~
new file mode 100644
index 0000000..d36af4b
--- /dev/null
+++ b/creactistore/_templates/lib/rdflib_/plugins/serializers/xmlwriter.py~
@@ -0,0 +1,103 @@
+import codecs
+from xml.sax.saxutils import quoteattr, escape
+
+__all__ = ['XMLWriter']
+
+class XMLWriter(object):
+ def __init__(self, stream, namespace_manager, encoding=None, decl=1, extra_ns={}):
+ encoding = encoding or 'utf-8'
+ encoder, decoder, stream_reader, stream_writer = codecs.lookup(encoding)
+ self.stream = stream = stream_writer(stream)
+ if decl:
+ stream.write('<?xml version="1.0" encoding="%s"?>' % encoding)
+ self.element_stack = []
+ self.nm = namespace_manager
+ self.extra_ns=extra_ns
+ self.closed = True
+
+ def __get_indent(self):
+ return " " * len(self.element_stack)
+ indent = property(__get_indent)
+
+ def __close_start_tag(self):
+ if not self.closed: # TODO:
+ self.closed = True
+ self.stream.write(">")
+
+ def push(self, uri):
+ self.__close_start_tag()
+ write = self.stream.write
+ write("\n")
+ write(self.indent)
+ write("<%s" % self.qname(uri))
+ self.element_stack.append(uri)
+ self.closed = False
+ self.parent = False
+
+ def pop(self, uri=None):
+ top = self.element_stack.pop()
+ if uri:
+ assert uri == top
+ write = self.stream.write
+ if not self.closed:
+ self.closed = True
+ write("/>")
+ else:
+ if self.parent:
+ write("\n")
+ write(self.indent)
+ write("</%s>" % self.qname(top))
+ self.parent = True
+
+ def element(self, uri, content, attributes={}):
+ """Utility method for adding a complete simple element"""
+ self.push(uri)
+ for k, v in attributes.iteritems():
+ self.attribute(k,v)
+ self.text(content)
+ self.pop()
+
+ def namespaces(self, namespaces=None):
+ if not namespaces:
+ namespaces=self.nm.namespaces()
+
+ write = self.stream.write
+ write("\n")
+ for prefix, namespace in namespaces:
+ if prefix:
+ write(' xmlns:%s="%s"\n' % (prefix, namespace))
+ else:
+ write(' xmlns="%s"\n' % namespace)
+
+ for prefix, namespace in self.extra_ns.items():
+ if prefix:
+ write(' xmlns:%s="%s"\n' % (prefix, namespace))
+ else:
+ write(' xmlns="%s"\n' % namespace)
+
+
+ def attribute(self, uri, value):
+ write = self.stream.write
+ write(" %s=%s" % (self.qname(uri), quoteattr(value)))
+
+ def text(self, text):
+ self.__close_start_tag()
+ if "<" in text and ">" in text and not "]]>" in text:
+ self.stream.write("<![CDATA[")
+ self.stream.write(text)
+ self.stream.write("]]>")
+ else:
+ self.stream.write(escape(text))
+
+ def qname(self,uri):
+ """Compute qname for a uri using our extra namespaces,
+ or the given namespace manager"""
+
+ for pre,ns in self.extra_ns.items():
+ if uri.startswith(ns):
+ if pre!="":
+ return ":".join(pre,uri[len(ns):])
+ else:
+ return uri[len(ns):]
+
+ return self.nm.qname(uri)
diff --git a/creactistore/_templates/lib/rdflib_/plugins/sleepycat.py b/creactistore/_templates/lib/rdflib_/plugins/sleepycat.py
new file mode 100644
index 0000000..282d4de
--- /dev/null
+++ b/creactistore/_templates/lib/rdflib_/plugins/sleepycat.py
@@ -0,0 +1,531 @@
+from rdflib_.store import Store, VALID_STORE, CORRUPTED_STORE, NO_STORE, UNKNOWN
+from rdflib_.term import URIRef
+from rdflib_.py3compat import b
+def bb(u): return u.encode('utf-8')
+
+try:
+ from bsddb import db
+ has_bsddb = True
+except ImportError:
+ try:
+ from bsddb3 import db
+ has_bsddb = True
+ except ImportError:
+ has_bsddb = False
+from os import mkdir
+from os.path import exists, abspath
+from urllib import pathname2url
+from threading import Thread
+
+import logging
+_logger = logging.getLogger(__name__)
+
+__all__ = ['Sleepycat']
+
+class Sleepycat(Store):
+ context_aware = True
+ formula_aware = True
+ transaction_aware = False
+ db_env = None
+
+ def __init__(self, configuration=None, identifier=None):
+ if not has_bsddb: raise Exception("Unable to import bsddb/bsddb3, store is unusable.")
+ self.__open = False
+ self.__identifier = identifier
+ super(Sleepycat, self).__init__(configuration)
+ self._loads = self.node_pickler.loads
+ self._dumps = self.node_pickler.dumps
+
+ def __get_identifier(self):
+ return self.__identifier
+ identifier = property(__get_identifier)
+
+ def _init_db_environment(self, homeDir, create=True):
+ envsetflags = db.DB_CDB_ALLDB
+ envflags = db.DB_INIT_MPOOL | db.DB_INIT_CDB | db.DB_THREAD
+ if not exists(homeDir):
+ if create==True:
+ mkdir(homeDir) # TODO: implement create method and refactor this to it
+ self.create(homeDir)
+ else:
+ return NO_STORE
+ db_env = db.DBEnv()
+ db_env.set_cachesize(0, 1024*1024*50) # TODO
+ #db_env.set_lg_max(1024*1024)
+ db_env.set_flags(envsetflags, 1)
+ db_env.open(homeDir, envflags | db.DB_CREATE)
+ return db_env
+
+ def is_open(self):
+ return self.__open
+
+ def open(self, path, create=True):
+ if not has_bsddb: return NO_STORE
+ homeDir = path
+
+ if self.__identifier is None:
+ self.__identifier = URIRef(pathname2url(abspath(homeDir)))
+
+ db_env = self._init_db_environment(homeDir, create)
+ if db_env == NO_STORE:
+ return NO_STORE
+ self.db_env = db_env
+ self.__open = True
+
+ dbname = None
+ dbtype = db.DB_BTREE
+ # auto-commit ensures that the open-call commits when transactions are enabled
+ dbopenflags = db.DB_THREAD
+ if self.transaction_aware == True:
+ dbopenflags |= db.DB_AUTO_COMMIT
+
+ dbmode = 0660
+ dbsetflags = 0
+
+ # create and open the DBs
+ self.__indicies = [None,] * 3
+ self.__indicies_info = [None,] * 3
+ for i in xrange(0, 3):
+ index_name = to_key_func(i)((b("s"), b("p"), b("o")), b("c")).decode()
+ index = db.DB(db_env)
+ index.set_flags(dbsetflags)
+ index.open(index_name, dbname, dbtype, dbopenflags|db.DB_CREATE, dbmode)
+ self.__indicies[i] = index
+ self.__indicies_info[i] = (index, to_key_func(i), from_key_func(i))
+
+ lookup = {}
+ for i in xrange(0, 8):
+ results = []
+ for start in xrange(0, 3):
+ score = 1
+ len = 0
+ for j in xrange(start, start+3):
+ if i & (1<<(j%3)):
+ score = score << 1
+ len += 1
+ else:
+ break
+ tie_break = 2-start
+ results.append(((score, tie_break), start, len))
+
+ results.sort()
+ score, start, len = results[-1]
+
+ def get_prefix_func(start, end):
+ def get_prefix(triple, context):
+ if context is None:
+ yield ""
+ else:
+ yield context
+ i = start
+ while i<end:
+ yield triple[i%3]
+ i += 1
+ yield ""
+ return get_prefix
+
+ lookup[i] = (self.__indicies[start], get_prefix_func(start, start + len), from_key_func(start), results_from_key_func(start, self._from_string))
+
+
+ self.__lookup_dict = lookup
+
+ self.__contexts = db.DB(db_env)
+ self.__contexts.set_flags(dbsetflags)
+ self.__contexts.open("contexts", dbname, dbtype, dbopenflags|db.DB_CREATE, dbmode)
+
+ self.__namespace = db.DB(db_env)
+ self.__namespace.set_flags(dbsetflags)
+ self.__namespace.open("namespace", dbname, dbtype, dbopenflags|db.DB_CREATE, dbmode)
+
+ self.__prefix = db.DB(db_env)
+ self.__prefix.set_flags(dbsetflags)
+ self.__prefix.open("prefix", dbname, dbtype, dbopenflags|db.DB_CREATE, dbmode)
+
+ self.__k2i = db.DB(db_env)
+ self.__k2i.set_flags(dbsetflags)
+ self.__k2i.open("k2i", dbname, db.DB_HASH, dbopenflags|db.DB_CREATE, dbmode)
+
+ self.__i2k = db.DB(db_env)
+ self.__i2k.set_flags(dbsetflags)
+ self.__i2k.open("i2k", dbname, db.DB_RECNO, dbopenflags|db.DB_CREATE, dbmode)
+
+ self.__needs_sync = False
+ t = Thread(target=self.__sync_run)
+ t.setDaemon(True)
+ t.start()
+ self.__sync_thread = t
+ return VALID_STORE
+
+
+ def __sync_run(self):
+ from time import sleep, time
+ try:
+ min_seconds, max_seconds = 10, 300
+ while self.__open:
+ if self.__needs_sync:
+ t0 = t1 = time()
+ self.__needs_sync = False
+ while self.__open:
+ sleep(.1)
+ if self.__needs_sync:
+ t1 = time()
+ self.__needs_sync = False
+ if time()-t1 > min_seconds or time()-t0 > max_seconds:
+ self.__needs_sync = False
+ _logger.debug("sync")
+ self.sync()
+ break
+ else:
+ sleep(1)
+ except Exception, e:
+ _logger.exception(e)
+
+ def sync(self):
+ if self.__open:
+ for i in self.__indicies:
+ i.sync()
+ self.__contexts.sync()
+ self.__namespace.sync()
+ self.__prefix.sync()
+ self.__i2k.sync()
+ self.__k2i.sync()
+
+ def close(self, commit_pending_transaction=False):
+ self.__open = False
+ self.__sync_thread.join()
+ for i in self.__indicies:
+ i.close()
+ self.__contexts.close()
+ self.__namespace.close()
+ self.__prefix.close()
+ self.__i2k.close()
+ self.__k2i.close()
+ self.db_env.close()
+
+ def add(self, (subject, predicate, object), context, quoted=False, txn=None):
+ """\
+ Add a triple to the store of triples.
+ """
+ assert self.__open, "The Store must be open."
+ assert context!=self, "Can not add triple directly to store"
+ Store.add(self, (subject, predicate, object), context, quoted)
+
+ _to_string = self._to_string
+
+ s = _to_string(subject, txn=txn)
+ p = _to_string(predicate, txn=txn)
+ o = _to_string(object, txn=txn)
+ c = _to_string(context, txn=txn)
+
+ cspo, cpos, cosp = self.__indicies
+
+ value = cspo.get(bb("%s^%s^%s^%s^" % (c, s, p, o)), txn=txn)
+ if value is None:
+ self.__contexts.put(bb(c), "", txn=txn)
+
+ contexts_value = cspo.get(bb("%s^%s^%s^%s^" % ("", s, p, o)), txn=txn) or b("")
+ contexts = set(contexts_value.split(b("^")))
+ contexts.add(bb(c))
+ contexts_value = b("^").join(contexts)
+ assert contexts_value!=None
+
+ cspo.put(bb("%s^%s^%s^%s^" % (c, s, p, o)), "", txn=txn)
+ cpos.put(bb("%s^%s^%s^%s^" % (c, p, o, s)), "", txn=txn)
+ cosp.put(bb("%s^%s^%s^%s^" % (c, o, s, p)), "", txn=txn)
+ if not quoted:
+ cspo.put(bb("%s^%s^%s^%s^" % ("", s, p, o)), contexts_value, txn=txn)
+ cpos.put(bb("%s^%s^%s^%s^" % ("", p, o, s)), contexts_value, txn=txn)
+ cosp.put(bb("%s^%s^%s^%s^" % ("", o, s, p)), contexts_value, txn=txn)
+
+ self.__needs_sync = True
+
+ def __remove(self, (s, p, o), c, quoted=False, txn=None):
+ cspo, cpos, cosp = self.__indicies
+ contexts_value = cspo.get(b("^").join([b(""), s, p, o, b("")]), txn=txn) or b("")
+ contexts = set(contexts_value.split(b("^")))
+ contexts.discard(c)
+ contexts_value = b("^").join(contexts)
+ for i, _to_key, _from_key in self.__indicies_info:
+ i.delete(_to_key((s, p, o), c), txn=txn)
+ if not quoted:
+ if contexts_value:
+ for i, _to_key, _from_key in self.__indicies_info:
+ i.put(_to_key((s, p, o), b("")), contexts_value, txn=txn)
+ else:
+ for i, _to_key, _from_key in self.__indicies_info:
+ try:
+ i.delete(_to_key((s, p, o), b("")), txn=txn)
+ except db.DBNotFoundError, e:
+ pass # TODO: is it okay to ignore these?
+
+ def remove(self, (subject, predicate, object), context, txn=None):
+ assert self.__open, "The Store must be open."
+ Store.remove(self, (subject, predicate, object), context)
+ _to_string = self._to_string
+
+ if context is not None:
+ if context == self:
+ context = None
+
+ if subject is not None and predicate is not None and object is not None and context is not None:
+ s = _to_string(subject, txn=txn)
+ p = _to_string(predicate, txn=txn)
+ o = _to_string(object, txn=txn)
+ c = _to_string(context, txn=txn)
+ value = self.__indicies[0].get(bb("%s^%s^%s^%s^" % (c, s, p, o)), txn=txn)
+ if value is not None:
+ self.__remove((bb(s), bb(p), bb(o)), bb(c), txn=txn)
+ self.__needs_sync = True
+ else:
+ cspo, cpos, cosp = self.__indicies
+ index, prefix, from_key, results_from_key = self.__lookup((subject, predicate, object), context, txn=txn)
+
+ cursor = index.cursor(txn=txn)
+ try:
+ current = cursor.set_range(prefix)
+ needs_sync = True
+ except db.DBNotFoundError:
+ current = None
+ needs_sync = False
+ cursor.close()
+ while current:
+ key, value = current
+ cursor = index.cursor(txn=txn)
+ try:
+ cursor.set_range(key)
+ # Hack to stop 2to3 converting this to next(cursor)
+ current = getattr(cursor, 'next')()
+ except db.DBNotFoundError:
+ current = None
+ cursor.close()
+ if key.startswith(prefix):
+ c, s, p, o = from_key(key)
+ if context is None:
+ contexts_value = index.get(key, txn=txn) or b("")
+ contexts = set(contexts_value.split(b("^"))) # remove triple from all non quoted contexts
+ contexts.add(b("")) # and from the conjunctive index
+ for c in contexts:
+ for i, _to_key, _ in self.__indicies_info:
+ i.delete(_to_key((s, p, o), c), txn=txn)
+ else:
+ self.__remove((s, p, o), c, txn=txn)
+ else:
+ break
+
+ if context is not None:
+ if subject is None and predicate is None and object is None:
+ # TODO: also if context becomes empty and not just on remove((None, None, None), c)
+ try:
+ self.__contexts.delete(bb(_to_string(context, txn=txn)), txn=txn)
+ except db.DBNotFoundError, e:
+ pass
+
+ self.__needs_sync = needs_sync
+
+ def triples(self, (subject, predicate, object), context=None, txn=None):
+ """A generator over all the triples matching """
+ assert self.__open, "The Store must be open."
+
+ if context is not None:
+ if context == self:
+ context = None
+
+ _from_string = self._from_string
+ index, prefix, from_key, results_from_key = self.__lookup((subject, predicate, object), context, txn=txn)
+
+ cursor = index.cursor(txn=txn)
+ try:
+ current = cursor.set_range(prefix)
+ except db.DBNotFoundError:
+ current = None
+ cursor.close()
+ while current:
+ key, value = current
+ cursor = index.cursor(txn=txn)
+ try:
+ cursor.set_range(key)
+ # Cheap hack so 2to3 doesn't convert to next(cursor)
+ current = getattr(cursor, 'next')()
+ except db.DBNotFoundError:
+ current = None
+ cursor.close()
+ if key and key.startswith(prefix):
+ contexts_value = index.get(key, txn=txn)
+ yield results_from_key(key, subject, predicate, object, contexts_value)
+ else:
+ break
+
+ def __len__(self, context=None):
+ assert self.__open, "The Store must be open."
+ if context is not None:
+ if context == self:
+ context = None
+
+ if context is None:
+ prefix = b("^")
+ else:
+ prefix = bb("%s^" % self._to_string(context))
+
+ index = self.__indicies[0]
+ cursor = index.cursor()
+ current = cursor.set_range(prefix)
+ count = 0
+ while current:
+ key, value = current
+ if key.startswith(prefix):
+ count +=1
+ # Hack to stop 2to3 converting this to next(cursor)
+ current = getattr(cursor, 'next')()
+ else:
+ break
+ cursor.close()
+ return count
+
+ def bind(self, prefix, namespace):
+ prefix = prefix.encode("utf-8")
+ namespace = namespace.encode("utf-8")
+ bound_prefix = self.__prefix.get(namespace)
+ if bound_prefix:
+ self.__namespace.delete(bound_prefix)
+ self.__prefix[namespace] = prefix
+ self.__namespace[prefix] = namespace
+
+ def namespace(self, prefix):
+ prefix = prefix.encode("utf-8")
+ ns = self.__namespace.get(prefix, None)
+ if ns is not None:
+ return ns.decode('utf-8')
+ return None
+
+ def prefix(self, namespace):
+ namespace = namespace.encode("utf-8")
+ prefix = self.__prefix.get(namespace, None)
+ if prefix is not None:
+ return prefix.decode('utf-8')
+ return None
+
+ def namespaces(self):
+ cursor = self.__namespace.cursor()
+ results = []
+ current = cursor.first()
+ while current:
+ prefix, namespace = current
+ results.append((prefix.decode('utf-8'), namespace.decode('utf-8')))
+ # Hack to stop 2to3 converting this to next(cursor)
+ current = getattr(cursor, 'next')()
+ cursor.close()
+ for prefix, namespace in results:
+ yield prefix, URIRef(namespace)
+
+ def contexts(self, triple=None):
+ _from_string = self._from_string
+ _to_string = self._to_string
+
+ if triple:
+ s, p, o = triple
+ s = _to_string(s)
+ p = _to_string(p)
+ o = _to_string(o)
+ contexts = self.__indicies[0].get(bb("%s^%s^%s^%s^" % ("", s, p, o)))
+ if contexts:
+ for c in contexts.split(b("^")):
+ if c:
+ yield _from_string(c)
+ else:
+ index = self.__contexts
+ cursor = index.cursor()
+ current = cursor.first()
+ cursor.close()
+ while current:
+ key, value = current
+ context = _from_string(key)
+ yield context
+ cursor = index.cursor()
+ try:
+ cursor.set_range(key)
+ # Hack to stop 2to3 converting this to next(cursor)
+ current = getattr(cursor, 'next')()
+ except db.DBNotFoundError:
+ current = None
+ cursor.close()
+
+ def _from_string(self, i):
+ k = self.__i2k.get(int(i))
+ return self._loads(k)
+
+ def _to_string(self, term, txn=None):
+ k = self._dumps(term)
+ i = self.__k2i.get(k, txn=txn)
+ if i is None:
+ # weird behavoir from bsddb not taking a txn as a keyword argument
+ # for append
+ if self.transaction_aware:
+ i = "%s" % self.__i2k.append(k, txn)
+ else:
+ i = "%s" % self.__i2k.append(k)
+
+ self.__k2i.put(k, i, txn=txn)
+ else:
+ i = i.decode()
+ return i
+
+ def __lookup(self, (subject, predicate, object), context, txn=None):
+ _to_string = self._to_string
+ if context is not None:
+ context = _to_string(context, txn=txn)
+ i = 0
+ if subject is not None:
+ i += 1
+ subject = _to_string(subject, txn=txn)
+ if predicate is not None:
+ i += 2
+ predicate = _to_string(predicate, txn=txn)
+ if object is not None:
+ i += 4
+ object = _to_string(object, txn=txn)
+ index, prefix_func, from_key, results_from_key = self.__lookup_dict[i]
+ #print (subject, predicate, object), context, prefix_func, index #DEBUG
+ prefix = bb("^".join(prefix_func((subject, predicate, object), context)))
+ return index, prefix, from_key, results_from_key
+
+
+def to_key_func(i):
+ def to_key(triple, context):
+ "Takes a string; returns key"
+ return b("^").join((context, triple[i%3], triple[(i+1)%3], triple[(i+2)%3], b(""))) # "" to tac on the trailing ^
+ return to_key
+
+def from_key_func(i):
+ def from_key(key):
+ "Takes a key; returns string"
+ parts = key.split(b("^"))
+ return parts[0], parts[(3-i+0)%3+1], parts[(3-i+1)%3+1], parts[(3-i+2)%3+1]
+ return from_key
+
+def results_from_key_func(i, from_string):
+ def from_key(key, subject, predicate, object, contexts_value):
+ "Takes a key and subject, predicate, object; returns tuple for yield"
+ parts = key.split(b("^"))
+ if subject is None:
+ # TODO: i & 1: # dis assemble and/or measure to see which is faster
+ # subject is None or i & 1
+ s = from_string(parts[(3-i+0)%3+1])
+ else:
+ s = subject
+ if predicate is None:#i & 2:
+ p = from_string(parts[(3-i+1)%3+1])
+ else:
+ p = predicate
+ if object is None:#i & 4:
+ o = from_string(parts[(3-i+2)%3+1])
+ else:
+ o = object
+ return (s, p, o), (from_string(c) for c in contexts_value.split(b("^")) if c)
+ return from_key
+
+def readable_index(i):
+ s, p, o = "?" * 3
+ if i & 1: s = "s"
+ if i & 2: p = "p"
+ if i & 4: o = "o"
+ return "%s,%s,%s" % (s, p, o)
diff --git a/creactistore/_templates/lib/rdflib_/plugins/sleepycat.py~ b/creactistore/_templates/lib/rdflib_/plugins/sleepycat.py~
new file mode 100644
index 0000000..67fcc17
--- /dev/null
+++ b/creactistore/_templates/lib/rdflib_/plugins/sleepycat.py~
@@ -0,0 +1,531 @@
+from rdflib.store import Store, VALID_STORE, CORRUPTED_STORE, NO_STORE, UNKNOWN
+from rdflib.term import URIRef
+from rdflib.py3compat import b
+def bb(u): return u.encode('utf-8')
+
+try:
+ from bsddb import db
+ has_bsddb = True
+except ImportError:
+ try:
+ from bsddb3 import db
+ has_bsddb = True
+ except ImportError:
+ has_bsddb = False
+from os import mkdir
+from os.path import exists, abspath
+from urllib import pathname2url
+from threading import Thread
+
+import logging
+_logger = logging.getLogger(__name__)
+
+__all__ = ['Sleepycat']
+
+class Sleepycat(Store):
+ context_aware = True
+ formula_aware = True
+ transaction_aware = False
+ db_env = None
+
+ def __init__(self, configuration=None, identifier=None):
+ if not has_bsddb: raise Exception("Unable to import bsddb/bsddb3, store is unusable.")
+ self.__open = False
+ self.__identifier = identifier
+ super(Sleepycat, self).__init__(configuration)
+ self._loads = self.node_pickler.loads
+ self._dumps = self.node_pickler.dumps
+
+ def __get_identifier(self):
+ return self.__identifier
+ identifier = property(__get_identifier)
+
+ def _init_db_environment(self, homeDir, create=True):
+ envsetflags = db.DB_CDB_ALLDB
+ envflags = db.DB_INIT_MPOOL | db.DB_INIT_CDB | db.DB_THREAD
+ if not exists(homeDir):
+ if create==True:
+ mkdir(homeDir) # TODO: implement create method and refactor this to it
+ self.create(homeDir)
+ else:
+ return NO_STORE
+ db_env = db.DBEnv()
+ db_env.set_cachesize(0, 1024*1024*50) # TODO
+ #db_env.set_lg_max(1024*1024)
+ db_env.set_flags(envsetflags, 1)
+ db_env.open(homeDir, envflags | db.DB_CREATE)
+ return db_env
+
+ def is_open(self):
+ return self.__open
+
+ def open(self, path, create=True):
+ if not has_bsddb: return NO_STORE
+ homeDir = path
+
+ if self.__identifier is None:
+ self.__identifier = URIRef(pathname2url(abspath(homeDir)))
+
+ db_env = self._init_db_environment(homeDir, create)
+ if db_env == NO_STORE:
+ return NO_STORE
+ self.db_env = db_env
+ self.__open = True
+
+ dbname = None
+ dbtype = db.DB_BTREE
+ # auto-commit ensures that the open-call commits when transactions are enabled
+ dbopenflags = db.DB_THREAD
+ if self.transaction_aware == True:
+ dbopenflags |= db.DB_AUTO_COMMIT
+
+ dbmode = 0660
+ dbsetflags = 0
+
+ # create and open the DBs
+ self.__indicies = [None,] * 3
+ self.__indicies_info = [None,] * 3
+ for i in xrange(0, 3):
+ index_name = to_key_func(i)((b("s"), b("p"), b("o")), b("c")).decode()
+ index = db.DB(db_env)
+ index.set_flags(dbsetflags)
+ index.open(index_name, dbname, dbtype, dbopenflags|db.DB_CREATE, dbmode)
+ self.__indicies[i] = index
+ self.__indicies_info[i] = (index, to_key_func(i), from_key_func(i))
+
+ lookup = {}
+ for i in xrange(0, 8):
+ results = []
+ for start in xrange(0, 3):
+ score = 1
+ len = 0
+ for j in xrange(start, start+3):
+ if i & (1<<(j%3)):
+ score = score << 1
+ len += 1
+ else:
+ break
+ tie_break = 2-start
+ results.append(((score, tie_break), start, len))
+
+ results.sort()
+ score, start, len = results[-1]
+
+ def get_prefix_func(start, end):
+ def get_prefix(triple, context):
+ if context is None:
+ yield ""
+ else:
+ yield context
+ i = start
+ while i<end:
+ yield triple[i%3]
+ i += 1
+ yield ""
+ return get_prefix
+
+ lookup[i] = (self.__indicies[start], get_prefix_func(start, start + len), from_key_func(start), results_from_key_func(start, self._from_string))
+
+
+ self.__lookup_dict = lookup
+
+ self.__contexts = db.DB(db_env)
+ self.__contexts.set_flags(dbsetflags)
+ self.__contexts.open("contexts", dbname, dbtype, dbopenflags|db.DB_CREATE, dbmode)
+
+ self.__namespace = db.DB(db_env)
+ self.__namespace.set_flags(dbsetflags)
+ self.__namespace.open("namespace", dbname, dbtype, dbopenflags|db.DB_CREATE, dbmode)
+
+ self.__prefix = db.DB(db_env)
+ self.__prefix.set_flags(dbsetflags)
+ self.__prefix.open("prefix", dbname, dbtype, dbopenflags|db.DB_CREATE, dbmode)
+
+ self.__k2i = db.DB(db_env)
+ self.__k2i.set_flags(dbsetflags)
+ self.__k2i.open("k2i", dbname, db.DB_HASH, dbopenflags|db.DB_CREATE, dbmode)
+
+ self.__i2k = db.DB(db_env)
+ self.__i2k.set_flags(dbsetflags)
+ self.__i2k.open("i2k", dbname, db.DB_RECNO, dbopenflags|db.DB_CREATE, dbmode)
+
+ self.__needs_sync = False
+ t = Thread(target=self.__sync_run)
+ t.setDaemon(True)
+ t.start()
+ self.__sync_thread = t
+ return VALID_STORE
+
+
+ def __sync_run(self):
+ from time import sleep, time
+ try:
+ min_seconds, max_seconds = 10, 300
+ while self.__open:
+ if self.__needs_sync:
+ t0 = t1 = time()
+ self.__needs_sync = False
+ while self.__open:
+ sleep(.1)
+ if self.__needs_sync:
+ t1 = time()
+ self.__needs_sync = False
+ if time()-t1 > min_seconds or time()-t0 > max_seconds:
+ self.__needs_sync = False
+ _logger.debug("sync")
+ self.sync()
+ break
+ else:
+ sleep(1)
+ except Exception, e:
+ _logger.exception(e)
+
+ def sync(self):
+ if self.__open:
+ for i in self.__indicies:
+ i.sync()
+ self.__contexts.sync()
+ self.__namespace.sync()
+ self.__prefix.sync()
+ self.__i2k.sync()
+ self.__k2i.sync()
+
+ def close(self, commit_pending_transaction=False):
+ self.__open = False
+ self.__sync_thread.join()
+ for i in self.__indicies:
+ i.close()
+ self.__contexts.close()
+ self.__namespace.close()
+ self.__prefix.close()
+ self.__i2k.close()
+ self.__k2i.close()
+ self.db_env.close()
+
+ def add(self, (subject, predicate, object), context, quoted=False, txn=None):
+ """\
+ Add a triple to the store of triples.
+ """
+ assert self.__open, "The Store must be open."
+ assert context!=self, "Can not add triple directly to store"
+ Store.add(self, (subject, predicate, object), context, quoted)
+
+ _to_string = self._to_string
+
+ s = _to_string(subject, txn=txn)
+ p = _to_string(predicate, txn=txn)
+ o = _to_string(object, txn=txn)
+ c = _to_string(context, txn=txn)
+
+ cspo, cpos, cosp = self.__indicies
+
+ value = cspo.get(bb("%s^%s^%s^%s^" % (c, s, p, o)), txn=txn)
+ if value is None:
+ self.__contexts.put(bb(c), "", txn=txn)
+
+ contexts_value = cspo.get(bb("%s^%s^%s^%s^" % ("", s, p, o)), txn=txn) or b("")
+ contexts = set(contexts_value.split(b("^")))
+ contexts.add(bb(c))
+ contexts_value = b("^").join(contexts)
+ assert contexts_value!=None
+
+ cspo.put(bb("%s^%s^%s^%s^" % (c, s, p, o)), "", txn=txn)
+ cpos.put(bb("%s^%s^%s^%s^" % (c, p, o, s)), "", txn=txn)
+ cosp.put(bb("%s^%s^%s^%s^" % (c, o, s, p)), "", txn=txn)
+ if not quoted:
+ cspo.put(bb("%s^%s^%s^%s^" % ("", s, p, o)), contexts_value, txn=txn)
+ cpos.put(bb("%s^%s^%s^%s^" % ("", p, o, s)), contexts_value, txn=txn)
+ cosp.put(bb("%s^%s^%s^%s^" % ("", o, s, p)), contexts_value, txn=txn)
+
+ self.__needs_sync = True
+
+ def __remove(self, (s, p, o), c, quoted=False, txn=None):
+ cspo, cpos, cosp = self.__indicies
+ contexts_value = cspo.get(b("^").join([b(""), s, p, o, b("")]), txn=txn) or b("")
+ contexts = set(contexts_value.split(b("^")))
+ contexts.discard(c)
+ contexts_value = b("^").join(contexts)
+ for i, _to_key, _from_key in self.__indicies_info:
+ i.delete(_to_key((s, p, o), c), txn=txn)
+ if not quoted:
+ if contexts_value:
+ for i, _to_key, _from_key in self.__indicies_info:
+ i.put(_to_key((s, p, o), b("")), contexts_value, txn=txn)
+ else:
+ for i, _to_key, _from_key in self.__indicies_info:
+ try:
+ i.delete(_to_key((s, p, o), b("")), txn=txn)
+ except db.DBNotFoundError, e:
+ pass # TODO: is it okay to ignore these?
+
+ def remove(self, (subject, predicate, object), context, txn=None):
+ assert self.__open, "The Store must be open."
+ Store.remove(self, (subject, predicate, object), context)
+ _to_string = self._to_string
+
+ if context is not None:
+ if context == self:
+ context = None
+
+ if subject is not None and predicate is not None and object is not None and context is not None:
+ s = _to_string(subject, txn=txn)
+ p = _to_string(predicate, txn=txn)
+ o = _to_string(object, txn=txn)
+ c = _to_string(context, txn=txn)
+ value = self.__indicies[0].get(bb("%s^%s^%s^%s^" % (c, s, p, o)), txn=txn)
+ if value is not None:
+ self.__remove((bb(s), bb(p), bb(o)), bb(c), txn=txn)
+ self.__needs_sync = True
+ else:
+ cspo, cpos, cosp = self.__indicies
+ index, prefix, from_key, results_from_key = self.__lookup((subject, predicate, object), context, txn=txn)
+
+ cursor = index.cursor(txn=txn)
+ try:
+ current = cursor.set_range(prefix)
+ needs_sync = True
+ except db.DBNotFoundError:
+ current = None
+ needs_sync = False
+ cursor.close()
+ while current:
+ key, value = current
+ cursor = index.cursor(txn=txn)
+ try:
+ cursor.set_range(key)
+ # Hack to stop 2to3 converting this to next(cursor)
+ current = getattr(cursor, 'next')()
+ except db.DBNotFoundError:
+ current = None
+ cursor.close()
+ if key.startswith(prefix):
+ c, s, p, o = from_key(key)
+ if context is None:
+ contexts_value = index.get(key, txn=txn) or b("")
+ contexts = set(contexts_value.split(b("^"))) # remove triple from all non quoted contexts
+ contexts.add(b("")) # and from the conjunctive index
+ for c in contexts:
+ for i, _to_key, _ in self.__indicies_info:
+ i.delete(_to_key((s, p, o), c), txn=txn)
+ else:
+ self.__remove((s, p, o), c, txn=txn)
+ else:
+ break
+
+ if context is not None:
+ if subject is None and predicate is None and object is None:
+ # TODO: also if context becomes empty and not just on remove((None, None, None), c)
+ try:
+ self.__contexts.delete(bb(_to_string(context, txn=txn)), txn=txn)
+ except db.DBNotFoundError, e:
+ pass
+
+ self.__needs_sync = needs_sync
+
+ def triples(self, (subject, predicate, object), context=None, txn=None):
+ """A generator over all the triples matching """
+ assert self.__open, "The Store must be open."
+
+ if context is not None:
+ if context == self:
+ context = None
+
+ _from_string = self._from_string
+ index, prefix, from_key, results_from_key = self.__lookup((subject, predicate, object), context, txn=txn)
+
+ cursor = index.cursor(txn=txn)
+ try:
+ current = cursor.set_range(prefix)
+ except db.DBNotFoundError:
+ current = None
+ cursor.close()
+ while current:
+ key, value = current
+ cursor = index.cursor(txn=txn)
+ try:
+ cursor.set_range(key)
+ # Cheap hack so 2to3 doesn't convert to next(cursor)
+ current = getattr(cursor, 'next')()
+ except db.DBNotFoundError:
+ current = None
+ cursor.close()
+ if key and key.startswith(prefix):
+ contexts_value = index.get(key, txn=txn)
+ yield results_from_key(key, subject, predicate, object, contexts_value)
+ else:
+ break
+
+ def __len__(self, context=None):
+ assert self.__open, "The Store must be open."
+ if context is not None:
+ if context == self:
+ context = None
+
+ if context is None:
+ prefix = b("^")
+ else:
+ prefix = bb("%s^" % self._to_string(context))
+
+ index = self.__indicies[0]
+ cursor = index.cursor()
+ current = cursor.set_range(prefix)
+ count = 0
+ while current:
+ key, value = current
+ if key.startswith(prefix):
+ count +=1
+ # Hack to stop 2to3 converting this to next(cursor)
+ current = getattr(cursor, 'next')()
+ else:
+ break
+ cursor.close()
+ return count
+
+ def bind(self, prefix, namespace):
+ prefix = prefix.encode("utf-8")
+ namespace = namespace.encode("utf-8")
+ bound_prefix = self.__prefix.get(namespace)
+ if bound_prefix:
+ self.__namespace.delete(bound_prefix)
+ self.__prefix[namespace] = prefix
+ self.__namespace[prefix] = namespace
+
+ def namespace(self, prefix):
+ prefix = prefix.encode("utf-8")
+ ns = self.__namespace.get(prefix, None)
+ if ns is not None:
+ return ns.decode('utf-8')
+ return None
+
+ def prefix(self, namespace):
+ namespace = namespace.encode("utf-8")
+ prefix = self.__prefix.get(namespace, None)
+ if prefix is not None:
+ return prefix.decode('utf-8')
+ return None
+
+ def namespaces(self):
+ cursor = self.__namespace.cursor()
+ results = []
+ current = cursor.first()
+ while current:
+ prefix, namespace = current
+ results.append((prefix.decode('utf-8'), namespace.decode('utf-8')))
+ # Hack to stop 2to3 converting this to next(cursor)
+ current = getattr(cursor, 'next')()
+ cursor.close()
+ for prefix, namespace in results:
+ yield prefix, URIRef(namespace)
+
+ def contexts(self, triple=None):
+ _from_string = self._from_string
+ _to_string = self._to_string
+
+ if triple:
+ s, p, o = triple
+ s = _to_string(s)
+ p = _to_string(p)
+ o = _to_string(o)
+ contexts = self.__indicies[0].get(bb("%s^%s^%s^%s^" % ("", s, p, o)))
+ if contexts:
+ for c in contexts.split(b("^")):
+ if c:
+ yield _from_string(c)
+ else:
+ index = self.__contexts
+ cursor = index.cursor()
+ current = cursor.first()
+ cursor.close()
+ while current:
+ key, value = current
+ context = _from_string(key)
+ yield context
+ cursor = index.cursor()
+ try:
+ cursor.set_range(key)
+ # Hack to stop 2to3 converting this to next(cursor)
+ current = getattr(cursor, 'next')()
+ except db.DBNotFoundError:
+ current = None
+ cursor.close()
+
+ def _from_string(self, i):
+ k = self.__i2k.get(int(i))
+ return self._loads(k)
+
+ def _to_string(self, term, txn=None):
+ k = self._dumps(term)
+ i = self.__k2i.get(k, txn=txn)
+ if i is None:
+ # weird behavoir from bsddb not taking a txn as a keyword argument
+ # for append
+ if self.transaction_aware:
+ i = "%s" % self.__i2k.append(k, txn)
+ else:
+ i = "%s" % self.__i2k.append(k)
+
+ self.__k2i.put(k, i, txn=txn)
+ else:
+ i = i.decode()
+ return i
+
+ def __lookup(self, (subject, predicate, object), context, txn=None):
+ _to_string = self._to_string
+ if context is not None:
+ context = _to_string(context, txn=txn)
+ i = 0
+ if subject is not None:
+ i += 1
+ subject = _to_string(subject, txn=txn)
+ if predicate is not None:
+ i += 2
+ predicate = _to_string(predicate, txn=txn)
+ if object is not None:
+ i += 4
+ object = _to_string(object, txn=txn)
+ index, prefix_func, from_key, results_from_key = self.__lookup_dict[i]
+ #print (subject, predicate, object), context, prefix_func, index #DEBUG
+ prefix = bb("^".join(prefix_func((subject, predicate, object), context)))
+ return index, prefix, from_key, results_from_key
+
+
+def to_key_func(i):
+ def to_key(triple, context):
+ "Takes a string; returns key"
+ return b("^").join((context, triple[i%3], triple[(i+1)%3], triple[(i+2)%3], b(""))) # "" to tac on the trailing ^
+ return to_key
+
+def from_key_func(i):
+ def from_key(key):
+ "Takes a key; returns string"
+ parts = key.split(b("^"))
+ return parts[0], parts[(3-i+0)%3+1], parts[(3-i+1)%3+1], parts[(3-i+2)%3+1]
+ return from_key
+
+def results_from_key_func(i, from_string):
+ def from_key(key, subject, predicate, object, contexts_value):
+ "Takes a key and subject, predicate, object; returns tuple for yield"
+ parts = key.split(b("^"))
+ if subject is None:
+ # TODO: i & 1: # dis assemble and/or measure to see which is faster
+ # subject is None or i & 1
+ s = from_string(parts[(3-i+0)%3+1])
+ else:
+ s = subject
+ if predicate is None:#i & 2:
+ p = from_string(parts[(3-i+1)%3+1])
+ else:
+ p = predicate
+ if object is None:#i & 4:
+ o = from_string(parts[(3-i+2)%3+1])
+ else:
+ o = object
+ return (s, p, o), (from_string(c) for c in contexts_value.split(b("^")) if c)
+ return from_key
+
+def readable_index(i):
+ s, p, o = "?" * 3
+ if i & 1: s = "s"
+ if i & 2: p = "p"
+ if i & 4: o = "o"
+ return "%s,%s,%s" % (s, p, o)