diff options
Diffstat (limited to 'creactistore/_templates/lib/rdflib_/plugins')
48 files changed, 13856 insertions, 0 deletions
diff --git a/creactistore/_templates/lib/rdflib_/plugins/__init__.py b/creactistore/_templates/lib/rdflib_/plugins/__init__.py new file mode 100644 index 0000000..4622bb0 --- /dev/null +++ b/creactistore/_templates/lib/rdflib_/plugins/__init__.py @@ -0,0 +1,7 @@ +""" +Default plugins for rdflib. + +This is a namespace package and contains the default plugins for +rdflib. + +""" diff --git a/creactistore/_templates/lib/rdflib_/plugins/memory.py b/creactistore/_templates/lib/rdflib_/plugins/memory.py new file mode 100644 index 0000000..a9d6fad --- /dev/null +++ b/creactistore/_templates/lib/rdflib_/plugins/memory.py @@ -0,0 +1,563 @@ +from __future__ import generators +from rdflib_.term import BNode +from rdflib_.store import Store, NO_STORE, VALID_STORE + +__all__ = ['Memory', 'IOMemory'] + +ANY = Any = None + +class Memory(Store): + """\ + An in memory implementation of a triple store. + + This triple store uses nested dictionaries to store triples. Each + triple is stored in two such indices as follows spo[s][p][o] = 1 and + pos[p][o][s] = 1. + + Authors: Michel Pelletier, Daniel Krech, Stefan Niederhauser + """ + def __init__(self, configuration=None, identifier=None): + super(Memory, self).__init__(configuration) + self.identifier = identifier + + # indexed by [subject][predicate][object] + self.__spo = {} + + # indexed by [predicate][object][subject] + self.__pos = {} + + # indexed by [predicate][object][subject] + self.__osp = {} + + self.__namespace = {} + self.__prefix = {} + + def add(self, (subject, predicate, object), context, quoted=False): + """\ + Add a triple to the store of triples. + """ + # add dictionary entries for spo[s][p][p] = 1 and pos[p][o][s] + # = 1, creating the nested dictionaries where they do not yet + # exits. + spo = self.__spo + try: + po = spo[subject] + except: + po = spo[subject] = {} + try: + o = po[predicate] + except: + o = po[predicate] = {} + o[object] = 1 + + pos = self.__pos + try: + os = pos[predicate] + except: + os = pos[predicate] = {} + try: + s = os[object] + except: + s = os[object] = {} + s[subject] = 1 + + osp = self.__osp + try: + sp = osp[object] + except: + sp = osp[object] = {} + try: + p = sp[subject] + except: + p = sp[subject] = {} + p[predicate] = 1 + + def remove(self, (subject, predicate, object), context=None): + for (subject, predicate, object), c in self.triples( + (subject, predicate, object)): + del self.__spo[subject][predicate][object] + del self.__pos[predicate][object][subject] + del self.__osp[object][subject][predicate] + + def triples(self, (subject, predicate, object), context=None): + """A generator over all the triples matching """ + if subject!=ANY: # subject is given + spo = self.__spo + if subject in spo: + subjectDictionary = spo[subject] + if predicate!=ANY: # subject+predicate is given + if predicate in subjectDictionary: + if object!=ANY: # subject+predicate+object is given + if object in subjectDictionary[predicate]: + yield (subject, predicate, object), \ + self.__contexts() + else: # given object not found + pass + else: # subject+predicate is given, object unbound + for o in subjectDictionary[predicate].keys(): + yield (subject, predicate, o), \ + self.__contexts() + else: # given predicate not found + pass + else: # subject given, predicate unbound + for p in subjectDictionary.keys(): + if object!=ANY: # object is given + if object in subjectDictionary[p]: + yield (subject, p, object), self.__contexts() + else: # given object not found + pass + else: # object unbound + for o in subjectDictionary[p].keys(): + yield (subject, p, o), self.__contexts() + else: # given subject not found + pass + elif predicate!=ANY: # predicate is given, subject unbound + pos = self.__pos + if predicate in pos: + predicateDictionary = pos[predicate] + if object!=ANY: # predicate+object is given, subject unbound + if object in predicateDictionary: + for s in predicateDictionary[object].keys(): + yield (s, predicate, object), self.__contexts() + else: # given object not found + pass + else: # predicate is given, object+subject unbound + for o in predicateDictionary.keys(): + for s in predicateDictionary[o].keys(): + yield (s, predicate, o), self.__contexts() + elif object!=ANY: # object is given, subject+predicate unbound + osp = self.__osp + if object in osp: + objectDictionary = osp[object] + for s in objectDictionary.keys(): + for p in objectDictionary[s].keys(): + yield (s, p, object), self.__contexts() + else: # subject+predicate+object unbound + spo = self.__spo + for s in spo.keys(): + subjectDictionary = spo[s] + for p in subjectDictionary.keys(): + for o in subjectDictionary[p].keys(): + yield (s, p, o), self.__contexts() + + def __len__(self, context=None): + #@@ optimize + i = 0 + for triple in self.triples((None, None, None)): + i += 1 + return i + + def bind(self, prefix, namespace): + self.__prefix[namespace] = prefix + self.__namespace[prefix] = namespace + + def namespace(self, prefix): + return self.__namespace.get(prefix, None) + + def prefix(self, namespace): + return self.__prefix.get(namespace, None) + + def namespaces(self): + for prefix, namespace in self.__namespace.iteritems(): + yield prefix, namespace + + def __contexts(self): + return (c for c in []) # TODO: best way to return empty generator + +class IOMemory(Store): + """\ + An integer-key-optimized-context-aware-in-memory store. + + Uses nested dictionaries to store triples and context. Each triple + is stored in six such indices as follows cspo[c][s][p][o] = 1 + and cpos[c][p][o][s] = 1 and cosp[c][o][s][p] = 1 as well as + spo[s][p][o] = [c] and pos[p][o][s] = [c] and pos[o][s][p] = [c] + + Context information is used to track the 'source' of the triple + data for merging, unmerging, remerging purposes. context aware + store stores consume more memory size than non context stores. + + """ + + context_aware = True + formula_aware = True + + def __init__(self, configuration=None, identifier=None): + super(IOMemory, self).__init__() + + # indexed by [context][subject][predicate][object] = 1 + self.cspo = self.createIndex() + + # indexed by [context][predicate][object][subject] = 1 + self.cpos = self.createIndex() + + # indexed by [context][object][subject][predicate] = 1 + self.cosp = self.createIndex() + + # indexed by [subject][predicate][object] = [context] + self.spo = self.createIndex() + + # indexed by [predicate][object][subject] = [context] + self.pos = self.createIndex() + + # indexed by [object][subject][predicate] = [context] + self.osp = self.createIndex() + + # indexes integer keys to identifiers + self.forward = self.createForward() + + # reverse index of forward + self.reverse = self.createReverse() + + self.identifier = identifier or BNode() + + self.__namespace = self.createPrefixMap() + self.__prefix = self.createPrefixMap() + + def open(self, configuration, create=False): + if not create: + # An IOMemory Store never exists. + return NO_STORE + else: + return VALID_STORE + + def bind(self, prefix, namespace): + self.__prefix[namespace] = prefix + self.__namespace[prefix] = namespace + + def namespace(self, prefix): + return self.__namespace.get(prefix, None) + + def prefix(self, namespace): + return self.__prefix.get(namespace, None) + + def namespaces(self): + for prefix, namespace in self.__namespace.iteritems(): + yield prefix, namespace + + def defaultContext(self): + return self.default_context + + def addContext(self, context): + """ Add context w/o adding statement. Dan you can remove this if you want """ + + if not self.reverse.has_key(context): + ci=randid() + while not self.forward.insert(ci, context): + ci=randid() + self.reverse[context] = ci + + def intToIdentifier(self, (si, pi, oi)): + """ Resolve an integer triple into identifers. """ + return (self.forward[si], self.forward[pi], self.forward[oi]) + + def identifierToInt(self, (s, p, o)): + """ Resolve an identifier triple into integers. """ + return (self.reverse[s], self.reverse[p], self.reverse[o]) + + def uniqueSubjects(self, context=None): + if context is None: + index = self.spo + else: + index = self.cspo[context] + for si in index.keys(): + yield self.forward[si] + + def uniquePredicates(self, context=None): + if context is None: + index = self.pos + else: + index = self.cpos[context] + for pi in index.keys(): + yield self.forward[pi] + + def uniqueObjects(self, context=None): + if context is None: + index = self.osp + else: + index = self.cosp[context] + for oi in index.keys(): + yield self.forward[oi] + + def createForward(self): + return {} + + def createReverse(self): + return {} + + def createIndex(self): + return {} + + def createPrefixMap(self): + return {} + + def add(self, triple, context, quoted=False): + """\ + Add a triple to the store. + """ + Store.add(self, triple, context, quoted) + for triple, cg in self.triples(triple, context): + #triple is already in the store. + return + + subject, predicate, object = triple + + f = self.forward + r = self.reverse + + # assign keys for new identifiers + + if not r.has_key(subject): + si=randid() + while f.has_key(si): + si=randid() + f[si] = subject + r[subject] = si + else: + si = r[subject] + + if not r.has_key(predicate): + pi=randid() + while f.has_key(pi): + pi=randid() + f[pi] = predicate + r[predicate] = pi + else: + pi = r[predicate] + + if not r.has_key(object): + oi=randid() + while f.has_key(oi): + oi=randid() + f[oi] = object + r[object] = oi + else: + oi = r[object] + + if not r.has_key(context): + ci=randid() + while f.has_key(ci): + ci=randid() + f[ci] = context + r[context] = ci + else: + ci = r[context] + + # add dictionary entries for cspo[c][s][p][o] = 1, + # cpos[c][p][o][s] = 1, and cosp[c][o][s][p] = 1, creating the + # nested {} where they do not yet exits. + self._setNestedIndex(self.cspo, ci, si, pi, oi) + self._setNestedIndex(self.cpos, ci, pi, oi, si) + self._setNestedIndex(self.cosp, ci, oi, si, pi) + + if not quoted: + self._setNestedIndex(self.spo, si, pi, oi, ci) + self._setNestedIndex(self.pos, pi, oi, si, ci) + self._setNestedIndex(self.osp, oi, si, pi, ci) + + def _setNestedIndex(self, index, *keys): + for key in keys[:-1]: + if not index.has_key(key): + index[key] = self.createIndex() + index = index[key] + index[keys[-1]] = 1 + + + def _removeNestedIndex(self, index, *keys): + """ Remove context from the list of contexts in a nested index. + + Afterwards, recursively remove nested indexes when they became empty. + """ + parents = [] + for key in keys[:-1]: + parents.append(index) + index = index[key] + del index[keys[-1]] + + n = len(parents) + for i in xrange(n): + index = parents[n-1-i] + key = keys[n-1-i] + if len(index[key]) == 0: + del index[key] + + def remove(self, triple, context=None): + Store.remove(self, triple, context) + if context is not None: + if context == self: + context = None + + f = self.forward + r = self.reverse + if context is None: + for triple, cg in self.triples(triple): + subject, predicate, object = triple + si, pi, oi = self.identifierToInt((subject, predicate, object)) + contexts = list(self.contexts(triple)) + for context in contexts: + ci = r[context] + del self.cspo[ci][si][pi][oi] + del self.cpos[ci][pi][oi][si] + del self.cosp[ci][oi][si][pi] + + self._removeNestedIndex(self.spo, si, pi, oi, ci) + self._removeNestedIndex(self.pos, pi, oi, si, ci) + self._removeNestedIndex(self.osp, oi, si, pi, ci) + # grr!! hafta ref-count these before you can collect them dumbass! + #del f[si], f[pi], f[oi] + #del r[subject], r[predicate], r[object] + else: + subject, predicate, object = triple + ci = r.get(context, None) + if ci: + for triple, cg in self.triples(triple, context): + si, pi, oi = self.identifierToInt(triple) + del self.cspo[ci][si][pi][oi] + del self.cpos[ci][pi][oi][si] + del self.cosp[ci][oi][si][pi] + + try: + self._removeNestedIndex(self.spo, si, pi, oi, ci) + self._removeNestedIndex(self.pos, pi, oi, si, ci) + self._removeNestedIndex(self.osp, oi, si, pi, ci) + except KeyError: + # the context may be a quoted one in which + # there will not be a triple in spo, pos or + # osp. So ignore any KeyErrors + pass + # TODO delete references to resources in self.forward/self.reverse + # that are not in use anymore... + + if subject is None and predicate is None and object is None: + # remove context + try: + ci = self.reverse[context] + del self.cspo[ci], self.cpos[ci], self.cosp[ci] + except KeyError: + # TODO: no exception when removing non-existant context? + pass + + + def triples(self, triple, context=None): + """A generator over all the triples matching """ + + if context is not None: + if context == self: + context = None + + subject, predicate, object = triple + ci = si = pi = oi = Any + + if context is None: + spo = self.spo + pos = self.pos + osp = self.osp + else: + try: + ci = self.reverse[context] # TODO: Really ignore keyerror here + spo = self.cspo[ci] + pos = self.cpos[ci] + osp = self.cosp[ci] + except KeyError: + return + try: + if subject is not Any: + si = self.reverse[subject] # throws keyerror if subject doesn't exist ;( + if predicate is not Any: + pi = self.reverse[predicate] + if object is not Any: + oi = self.reverse[object] + except KeyError, e: + return #raise StopIteration + + if si != Any: # subject is given + if spo.has_key(si): + subjectDictionary = spo[si] + if pi != Any: # subject+predicate is given + if subjectDictionary.has_key(pi): + if oi!= Any: # subject+predicate+object is given + if subjectDictionary[pi].has_key(oi): + ss, pp, oo = self.intToIdentifier((si, pi, oi)) + yield (ss, pp, oo), (c for c in self.contexts((ss, pp, oo))) + else: # given object not found + pass + else: # subject+predicate is given, object unbound + for o in subjectDictionary[pi].keys(): + ss, pp, oo = self.intToIdentifier((si, pi, o)) + yield (ss, pp, oo), (c for c in self.contexts((ss, pp, oo))) + else: # given predicate not found + pass + else: # subject given, predicate unbound + for p in subjectDictionary.keys(): + if oi != Any: # object is given + if subjectDictionary[p].has_key(oi): + ss, pp, oo = self.intToIdentifier((si, p, oi)) + yield (ss, pp, oo), (c for c in self.contexts((ss, pp, oo))) + else: # given object not found + pass + else: # object unbound + for o in subjectDictionary[p].keys(): + ss, pp, oo = self.intToIdentifier((si, p, o)) + yield (ss, pp, oo), (c for c in self.contexts((ss, pp, oo))) + else: # given subject not found + pass + elif pi != Any: # predicate is given, subject unbound + if pos.has_key(pi): + predicateDictionary = pos[pi] + if oi != Any: # predicate+object is given, subject unbound + if predicateDictionary.has_key(oi): + for s in predicateDictionary[oi].keys(): + ss, pp, oo = self.intToIdentifier((s, pi, oi)) + yield (ss, pp, oo), (c for c in self.contexts((ss, pp, oo))) + else: # given object not found + pass + else: # predicate is given, object+subject unbound + for o in predicateDictionary.keys(): + for s in predicateDictionary[o].keys(): + ss, pp, oo = self.intToIdentifier((s, pi, o)) + yield (ss, pp, oo), (c for c in self.contexts((ss, pp, oo))) + elif oi != Any: # object is given, subject+predicate unbound + if osp.has_key(oi): + objectDictionary = osp[oi] + for s in objectDictionary.keys(): + for p in objectDictionary[s].keys(): + ss, pp, oo = self.intToIdentifier((s, p, oi)) + yield (ss, pp, oo), (c for c in self.contexts((ss, pp, oo))) + else: # subject+predicate+object unbound + for s in spo.keys(): + subjectDictionary = spo[s] + for p in subjectDictionary.keys(): + for o in subjectDictionary[p].keys(): + ss, pp, oo = self.intToIdentifier((s, p, o)) + yield (ss, pp, oo), (c for c in self.contexts((ss, pp, oo))) + + def __len__(self, context=None): + + if context is not None: + if context == self: + context = None + + # TODO: for eff. implementation + count = 0 + for triple, cg in self.triples((Any, Any, Any), context): + count += 1 + return count + + def contexts(self, triple=None): + if triple: + si, pi, oi = self.identifierToInt(triple) + for ci in self.spo[si][pi][oi]: + yield self.forward[ci] + else: + for ci in self.cspo.keys(): + yield self.forward[ci] + + + + +import random + +def randid(randint=random.randint, choice=random.choice, signs=(-1,1)): + return choice(signs)*randint(1,2000000000) + +del random diff --git a/creactistore/_templates/lib/rdflib_/plugins/memory.py~ b/creactistore/_templates/lib/rdflib_/plugins/memory.py~ new file mode 100644 index 0000000..3a9d9f8 --- /dev/null +++ b/creactistore/_templates/lib/rdflib_/plugins/memory.py~ @@ -0,0 +1,563 @@ +from __future__ import generators +from rdflib.term import BNode +from rdflib.store import Store, NO_STORE, VALID_STORE + +__all__ = ['Memory', 'IOMemory'] + +ANY = Any = None + +class Memory(Store): + """\ + An in memory implementation of a triple store. + + This triple store uses nested dictionaries to store triples. Each + triple is stored in two such indices as follows spo[s][p][o] = 1 and + pos[p][o][s] = 1. + + Authors: Michel Pelletier, Daniel Krech, Stefan Niederhauser + """ + def __init__(self, configuration=None, identifier=None): + super(Memory, self).__init__(configuration) + self.identifier = identifier + + # indexed by [subject][predicate][object] + self.__spo = {} + + # indexed by [predicate][object][subject] + self.__pos = {} + + # indexed by [predicate][object][subject] + self.__osp = {} + + self.__namespace = {} + self.__prefix = {} + + def add(self, (subject, predicate, object), context, quoted=False): + """\ + Add a triple to the store of triples. + """ + # add dictionary entries for spo[s][p][p] = 1 and pos[p][o][s] + # = 1, creating the nested dictionaries where they do not yet + # exits. + spo = self.__spo + try: + po = spo[subject] + except: + po = spo[subject] = {} + try: + o = po[predicate] + except: + o = po[predicate] = {} + o[object] = 1 + + pos = self.__pos + try: + os = pos[predicate] + except: + os = pos[predicate] = {} + try: + s = os[object] + except: + s = os[object] = {} + s[subject] = 1 + + osp = self.__osp + try: + sp = osp[object] + except: + sp = osp[object] = {} + try: + p = sp[subject] + except: + p = sp[subject] = {} + p[predicate] = 1 + + def remove(self, (subject, predicate, object), context=None): + for (subject, predicate, object), c in self.triples( + (subject, predicate, object)): + del self.__spo[subject][predicate][object] + del self.__pos[predicate][object][subject] + del self.__osp[object][subject][predicate] + + def triples(self, (subject, predicate, object), context=None): + """A generator over all the triples matching """ + if subject!=ANY: # subject is given + spo = self.__spo + if subject in spo: + subjectDictionary = spo[subject] + if predicate!=ANY: # subject+predicate is given + if predicate in subjectDictionary: + if object!=ANY: # subject+predicate+object is given + if object in subjectDictionary[predicate]: + yield (subject, predicate, object), \ + self.__contexts() + else: # given object not found + pass + else: # subject+predicate is given, object unbound + for o in subjectDictionary[predicate].keys(): + yield (subject, predicate, o), \ + self.__contexts() + else: # given predicate not found + pass + else: # subject given, predicate unbound + for p in subjectDictionary.keys(): + if object!=ANY: # object is given + if object in subjectDictionary[p]: + yield (subject, p, object), self.__contexts() + else: # given object not found + pass + else: # object unbound + for o in subjectDictionary[p].keys(): + yield (subject, p, o), self.__contexts() + else: # given subject not found + pass + elif predicate!=ANY: # predicate is given, subject unbound + pos = self.__pos + if predicate in pos: + predicateDictionary = pos[predicate] + if object!=ANY: # predicate+object is given, subject unbound + if object in predicateDictionary: + for s in predicateDictionary[object].keys(): + yield (s, predicate, object), self.__contexts() + else: # given object not found + pass + else: # predicate is given, object+subject unbound + for o in predicateDictionary.keys(): + for s in predicateDictionary[o].keys(): + yield (s, predicate, o), self.__contexts() + elif object!=ANY: # object is given, subject+predicate unbound + osp = self.__osp + if object in osp: + objectDictionary = osp[object] + for s in objectDictionary.keys(): + for p in objectDictionary[s].keys(): + yield (s, p, object), self.__contexts() + else: # subject+predicate+object unbound + spo = self.__spo + for s in spo.keys(): + subjectDictionary = spo[s] + for p in subjectDictionary.keys(): + for o in subjectDictionary[p].keys(): + yield (s, p, o), self.__contexts() + + def __len__(self, context=None): + #@@ optimize + i = 0 + for triple in self.triples((None, None, None)): + i += 1 + return i + + def bind(self, prefix, namespace): + self.__prefix[namespace] = prefix + self.__namespace[prefix] = namespace + + def namespace(self, prefix): + return self.__namespace.get(prefix, None) + + def prefix(self, namespace): + return self.__prefix.get(namespace, None) + + def namespaces(self): + for prefix, namespace in self.__namespace.iteritems(): + yield prefix, namespace + + def __contexts(self): + return (c for c in []) # TODO: best way to return empty generator + +class IOMemory(Store): + """\ + An integer-key-optimized-context-aware-in-memory store. + + Uses nested dictionaries to store triples and context. Each triple + is stored in six such indices as follows cspo[c][s][p][o] = 1 + and cpos[c][p][o][s] = 1 and cosp[c][o][s][p] = 1 as well as + spo[s][p][o] = [c] and pos[p][o][s] = [c] and pos[o][s][p] = [c] + + Context information is used to track the 'source' of the triple + data for merging, unmerging, remerging purposes. context aware + store stores consume more memory size than non context stores. + + """ + + context_aware = True + formula_aware = True + + def __init__(self, configuration=None, identifier=None): + super(IOMemory, self).__init__() + + # indexed by [context][subject][predicate][object] = 1 + self.cspo = self.createIndex() + + # indexed by [context][predicate][object][subject] = 1 + self.cpos = self.createIndex() + + # indexed by [context][object][subject][predicate] = 1 + self.cosp = self.createIndex() + + # indexed by [subject][predicate][object] = [context] + self.spo = self.createIndex() + + # indexed by [predicate][object][subject] = [context] + self.pos = self.createIndex() + + # indexed by [object][subject][predicate] = [context] + self.osp = self.createIndex() + + # indexes integer keys to identifiers + self.forward = self.createForward() + + # reverse index of forward + self.reverse = self.createReverse() + + self.identifier = identifier or BNode() + + self.__namespace = self.createPrefixMap() + self.__prefix = self.createPrefixMap() + + def open(self, configuration, create=False): + if not create: + # An IOMemory Store never exists. + return NO_STORE + else: + return VALID_STORE + + def bind(self, prefix, namespace): + self.__prefix[namespace] = prefix + self.__namespace[prefix] = namespace + + def namespace(self, prefix): + return self.__namespace.get(prefix, None) + + def prefix(self, namespace): + return self.__prefix.get(namespace, None) + + def namespaces(self): + for prefix, namespace in self.__namespace.iteritems(): + yield prefix, namespace + + def defaultContext(self): + return self.default_context + + def addContext(self, context): + """ Add context w/o adding statement. Dan you can remove this if you want """ + + if not self.reverse.has_key(context): + ci=randid() + while not self.forward.insert(ci, context): + ci=randid() + self.reverse[context] = ci + + def intToIdentifier(self, (si, pi, oi)): + """ Resolve an integer triple into identifers. """ + return (self.forward[si], self.forward[pi], self.forward[oi]) + + def identifierToInt(self, (s, p, o)): + """ Resolve an identifier triple into integers. """ + return (self.reverse[s], self.reverse[p], self.reverse[o]) + + def uniqueSubjects(self, context=None): + if context is None: + index = self.spo + else: + index = self.cspo[context] + for si in index.keys(): + yield self.forward[si] + + def uniquePredicates(self, context=None): + if context is None: + index = self.pos + else: + index = self.cpos[context] + for pi in index.keys(): + yield self.forward[pi] + + def uniqueObjects(self, context=None): + if context is None: + index = self.osp + else: + index = self.cosp[context] + for oi in index.keys(): + yield self.forward[oi] + + def createForward(self): + return {} + + def createReverse(self): + return {} + + def createIndex(self): + return {} + + def createPrefixMap(self): + return {} + + def add(self, triple, context, quoted=False): + """\ + Add a triple to the store. + """ + Store.add(self, triple, context, quoted) + for triple, cg in self.triples(triple, context): + #triple is already in the store. + return + + subject, predicate, object = triple + + f = self.forward + r = self.reverse + + # assign keys for new identifiers + + if not r.has_key(subject): + si=randid() + while f.has_key(si): + si=randid() + f[si] = subject + r[subject] = si + else: + si = r[subject] + + if not r.has_key(predicate): + pi=randid() + while f.has_key(pi): + pi=randid() + f[pi] = predicate + r[predicate] = pi + else: + pi = r[predicate] + + if not r.has_key(object): + oi=randid() + while f.has_key(oi): + oi=randid() + f[oi] = object + r[object] = oi + else: + oi = r[object] + + if not r.has_key(context): + ci=randid() + while f.has_key(ci): + ci=randid() + f[ci] = context + r[context] = ci + else: + ci = r[context] + + # add dictionary entries for cspo[c][s][p][o] = 1, + # cpos[c][p][o][s] = 1, and cosp[c][o][s][p] = 1, creating the + # nested {} where they do not yet exits. + self._setNestedIndex(self.cspo, ci, si, pi, oi) + self._setNestedIndex(self.cpos, ci, pi, oi, si) + self._setNestedIndex(self.cosp, ci, oi, si, pi) + + if not quoted: + self._setNestedIndex(self.spo, si, pi, oi, ci) + self._setNestedIndex(self.pos, pi, oi, si, ci) + self._setNestedIndex(self.osp, oi, si, pi, ci) + + def _setNestedIndex(self, index, *keys): + for key in keys[:-1]: + if not index.has_key(key): + index[key] = self.createIndex() + index = index[key] + index[keys[-1]] = 1 + + + def _removeNestedIndex(self, index, *keys): + """ Remove context from the list of contexts in a nested index. + + Afterwards, recursively remove nested indexes when they became empty. + """ + parents = [] + for key in keys[:-1]: + parents.append(index) + index = index[key] + del index[keys[-1]] + + n = len(parents) + for i in xrange(n): + index = parents[n-1-i] + key = keys[n-1-i] + if len(index[key]) == 0: + del index[key] + + def remove(self, triple, context=None): + Store.remove(self, triple, context) + if context is not None: + if context == self: + context = None + + f = self.forward + r = self.reverse + if context is None: + for triple, cg in self.triples(triple): + subject, predicate, object = triple + si, pi, oi = self.identifierToInt((subject, predicate, object)) + contexts = list(self.contexts(triple)) + for context in contexts: + ci = r[context] + del self.cspo[ci][si][pi][oi] + del self.cpos[ci][pi][oi][si] + del self.cosp[ci][oi][si][pi] + + self._removeNestedIndex(self.spo, si, pi, oi, ci) + self._removeNestedIndex(self.pos, pi, oi, si, ci) + self._removeNestedIndex(self.osp, oi, si, pi, ci) + # grr!! hafta ref-count these before you can collect them dumbass! + #del f[si], f[pi], f[oi] + #del r[subject], r[predicate], r[object] + else: + subject, predicate, object = triple + ci = r.get(context, None) + if ci: + for triple, cg in self.triples(triple, context): + si, pi, oi = self.identifierToInt(triple) + del self.cspo[ci][si][pi][oi] + del self.cpos[ci][pi][oi][si] + del self.cosp[ci][oi][si][pi] + + try: + self._removeNestedIndex(self.spo, si, pi, oi, ci) + self._removeNestedIndex(self.pos, pi, oi, si, ci) + self._removeNestedIndex(self.osp, oi, si, pi, ci) + except KeyError: + # the context may be a quoted one in which + # there will not be a triple in spo, pos or + # osp. So ignore any KeyErrors + pass + # TODO delete references to resources in self.forward/self.reverse + # that are not in use anymore... + + if subject is None and predicate is None and object is None: + # remove context + try: + ci = self.reverse[context] + del self.cspo[ci], self.cpos[ci], self.cosp[ci] + except KeyError: + # TODO: no exception when removing non-existant context? + pass + + + def triples(self, triple, context=None): + """A generator over all the triples matching """ + + if context is not None: + if context == self: + context = None + + subject, predicate, object = triple + ci = si = pi = oi = Any + + if context is None: + spo = self.spo + pos = self.pos + osp = self.osp + else: + try: + ci = self.reverse[context] # TODO: Really ignore keyerror here + spo = self.cspo[ci] + pos = self.cpos[ci] + osp = self.cosp[ci] + except KeyError: + return + try: + if subject is not Any: + si = self.reverse[subject] # throws keyerror if subject doesn't exist ;( + if predicate is not Any: + pi = self.reverse[predicate] + if object is not Any: + oi = self.reverse[object] + except KeyError, e: + return #raise StopIteration + + if si != Any: # subject is given + if spo.has_key(si): + subjectDictionary = spo[si] + if pi != Any: # subject+predicate is given + if subjectDictionary.has_key(pi): + if oi!= Any: # subject+predicate+object is given + if subjectDictionary[pi].has_key(oi): + ss, pp, oo = self.intToIdentifier((si, pi, oi)) + yield (ss, pp, oo), (c for c in self.contexts((ss, pp, oo))) + else: # given object not found + pass + else: # subject+predicate is given, object unbound + for o in subjectDictionary[pi].keys(): + ss, pp, oo = self.intToIdentifier((si, pi, o)) + yield (ss, pp, oo), (c for c in self.contexts((ss, pp, oo))) + else: # given predicate not found + pass + else: # subject given, predicate unbound + for p in subjectDictionary.keys(): + if oi != Any: # object is given + if subjectDictionary[p].has_key(oi): + ss, pp, oo = self.intToIdentifier((si, p, oi)) + yield (ss, pp, oo), (c for c in self.contexts((ss, pp, oo))) + else: # given object not found + pass + else: # object unbound + for o in subjectDictionary[p].keys(): + ss, pp, oo = self.intToIdentifier((si, p, o)) + yield (ss, pp, oo), (c for c in self.contexts((ss, pp, oo))) + else: # given subject not found + pass + elif pi != Any: # predicate is given, subject unbound + if pos.has_key(pi): + predicateDictionary = pos[pi] + if oi != Any: # predicate+object is given, subject unbound + if predicateDictionary.has_key(oi): + for s in predicateDictionary[oi].keys(): + ss, pp, oo = self.intToIdentifier((s, pi, oi)) + yield (ss, pp, oo), (c for c in self.contexts((ss, pp, oo))) + else: # given object not found + pass + else: # predicate is given, object+subject unbound + for o in predicateDictionary.keys(): + for s in predicateDictionary[o].keys(): + ss, pp, oo = self.intToIdentifier((s, pi, o)) + yield (ss, pp, oo), (c for c in self.contexts((ss, pp, oo))) + elif oi != Any: # object is given, subject+predicate unbound + if osp.has_key(oi): + objectDictionary = osp[oi] + for s in objectDictionary.keys(): + for p in objectDictionary[s].keys(): + ss, pp, oo = self.intToIdentifier((s, p, oi)) + yield (ss, pp, oo), (c for c in self.contexts((ss, pp, oo))) + else: # subject+predicate+object unbound + for s in spo.keys(): + subjectDictionary = spo[s] + for p in subjectDictionary.keys(): + for o in subjectDictionary[p].keys(): + ss, pp, oo = self.intToIdentifier((s, p, o)) + yield (ss, pp, oo), (c for c in self.contexts((ss, pp, oo))) + + def __len__(self, context=None): + + if context is not None: + if context == self: + context = None + + # TODO: for eff. implementation + count = 0 + for triple, cg in self.triples((Any, Any, Any), context): + count += 1 + return count + + def contexts(self, triple=None): + if triple: + si, pi, oi = self.identifierToInt(triple) + for ci in self.spo[si][pi][oi]: + yield self.forward[ci] + else: + for ci in self.cspo.keys(): + yield self.forward[ci] + + + + +import random + +def randid(randint=random.randint, choice=random.choice, signs=(-1,1)): + return choice(signs)*randint(1,2000000000) + +del random diff --git a/creactistore/_templates/lib/rdflib_/plugins/parsers/__init__.py b/creactistore/_templates/lib/rdflib_/plugins/parsers/__init__.py new file mode 100644 index 0000000..8062daa --- /dev/null +++ b/creactistore/_templates/lib/rdflib_/plugins/parsers/__init__.py @@ -0,0 +1,3 @@ +""" + +""" diff --git a/creactistore/_templates/lib/rdflib_/plugins/parsers/notation3.py b/creactistore/_templates/lib/rdflib_/plugins/parsers/notation3.py new file mode 100644 index 0000000..32da08e --- /dev/null +++ b/creactistore/_templates/lib/rdflib_/plugins/parsers/notation3.py @@ -0,0 +1,2314 @@ +#!/usr/bin/env python +u""" +notation3.py - Standalone Notation3 Parser +Derived from CWM, the Closed World Machine + +Authors of the original suite: + +* Dan Connolly <@@> +* Tim Berners-Lee <@@> +* Yosi Scharf <@@> +* Joseph M. Reagle Jr. <reagle@w3.org> +* Rich Salz <rsalz@zolera.com> + +http://www.w3.org/2000/10/swap/notation3.py + +Copyright 2000-2007, World Wide Web Consortium. +Copyright 2001, MIT. +Copyright 2001, Zolera Systems Inc. + +License: W3C Software License +http://www.w3.org/Consortium/Legal/copyright-software + +Modified by Sean B. Palmer +Copyright 2007, Sean B. Palmer. \u32E1 + +Modified to work with rdflib_ by Gunnar Aastrand Grimnes +Copyright 2010, Gunnar A. Grimnes + +""" + +# Python standard libraries +import types +import sys +import os +import string +import re +import time +import StringIO +import codecs + +from binascii import a2b_hex +from decimal import Decimal + +from rdflib_.term import URIRef, BNode, Literal, Variable, _XSD_PFX, _unique_id +from rdflib_.graph import QuotedGraph, ConjunctiveGraph +from rdflib_ import py3compat +b = py3compat.b + +__all__ = ['URISyntaxError', 'BadSyntax', 'N3Parser', "verbosity", "setVerbosity", "progress", "splitFrag", "splitFragP", "join", "refTo", "base", "canonical", "runNamespace", "uniqueURI", "Canonicalize", "stripCR", "dummyWrite", "toBool", "stringToN3", "backslashUify", "hexify", "dummy"] + +from rdflib_.parser import Parser + +# Incestuous.. would be nice to separate N3 and XML +# from sax2rdf import XMLtoDOM +def XMLtoDOM(*args, **kargs): + # print >> sys.stderr, args, kargs + pass + +# SWAP http://www.w3.org/2000/10/swap +# from diag import verbosity, setVerbosity, progress +def verbosity(*args, **kargs): + # print >> sys.stderr, args, kargs + pass +def setVerbosity(*args, **kargs): + # print >> sys.stderr, args, kargs + pass +def progress(*args, **kargs): + # print >> sys.stderr, args, kargs + pass + + + +def splitFrag(uriref): + """split a URI reference between the fragment and the rest. + + Punctuation is thrown away. + + e.g. + + >>> splitFrag("abc#def") + ('abc', 'def') + + >>> splitFrag("abcdef") + ('abcdef', None) + + """ + + i = uriref.rfind("#") + if i>= 0: return uriref[:i], uriref[i+1:] + else: return uriref, None + +def splitFragP(uriref, punct=0): + """split a URI reference before the fragment + + Punctuation is kept. + + e.g. + + >>> splitFragP("abc#def") + ('abc', '#def') + + >>> splitFragP("abcdef") + ('abcdef', '') + + """ + + i = uriref.rfind("#") + if i>= 0: return uriref[:i], uriref[i:] + else: return uriref, '' + +@py3compat.format_doctest_out +def join(here, there): + """join an absolute URI and URI reference + (non-ascii characters are supported/doctested; + haven't checked the details of the IRI spec though) + + here is assumed to be absolute. + there is URI reference. + + >>> join('http://example/x/y/z', '../abc') + 'http://example/x/abc' + + Raise ValueError if there uses relative path + syntax but here has no hierarchical path. + + >>> join('mid:foo@example', '../foo') + Traceback (most recent call last): + raise ValueError, here + ValueError: Base <mid:foo@example> has no slash after colon - with relative '../foo'. + + >>> join('http://example/x/y/z', '') + 'http://example/x/y/z' + + >>> join('mid:foo@example', '#foo') + 'mid:foo@example#foo' + + We grok IRIs + + >>> len(u'Andr\\xe9') + 5 + + >>> join('http://example.org/', u'#Andr\\xe9') + %(u)s'http://example.org/#Andr\\xe9' + """ + + assert(here.find("#") < 0), "Base may not contain hash: '%s'"% here # caller must splitFrag (why?) + + slashl = there.find('/') + colonl = there.find(':') + + # join(base, 'foo:/') -- absolute + if colonl >= 0 and (slashl < 0 or colonl < slashl): + return there + + bcolonl = here.find(':') + assert(bcolonl >= 0), "Base uri '%s' is not absolute" % here # else it's not absolute + + path, frag = splitFragP(there) + if not path: return here + frag + + # join('mid:foo@example', '../foo') bzzt + if here[bcolonl+1:bcolonl+2] <> '/': + raise ValueError ("Base <%s> has no slash after colon - with relative '%s'." %(here, there)) + + if here[bcolonl+1:bcolonl+3] == '//': + bpath = here.find('/', bcolonl+3) + else: + bpath = bcolonl+1 + + # join('http://xyz', 'foo') + if bpath < 0: + bpath = len(here) + here = here + '/' + + # join('http://xyz/', '//abc') => 'http://abc' + if there[:2] == '//': + return here[:bcolonl+1] + there + + # join('http://xyz/', '/abc') => 'http://xyz/abc' + if there[:1] == '/': + return here[:bpath] + there + + slashr = here.rfind('/') + + while 1: + if path[:2] == './': + path = path[2:] + if path == '.': + path = '' + elif path[:3] == '../' or path == '..': + path = path[3:] + i = here.rfind('/', bpath, slashr) + if i >= 0: + here = here[:i+1] + slashr = i + else: + break + + return here[:slashr+1] + path + frag + +commonHost = re.compile(r'^[-_a-zA-Z0-9.]+:(//[^/]*)?/[^/]*$') + +def refTo(base, uri): + """figure out a relative URI reference from base to uri + + >>> refTo('http://example/x/y/z', 'http://example/x/abc') + '../abc' + + >>> refTo('file:/ex/x/y', 'file:/ex/x/q/r#s') + 'q/r#s' + + >>> refTo(None, 'http://ex/x/y') + 'http://ex/x/y' + + >>> refTo('http://ex/x/y', 'http://ex/x/y') + '' + + Note the relationship between refTo and join: + join(x, refTo(x, y)) == y + which points out certain strings which cannot be URIs. e.g. + >>> x='http://ex/x/y';y='http://ex/x/q:r';join(x, refTo(x, y)) == y + 0 + + So 'http://ex/x/q:r' is not a URI. Use 'http://ex/x/q%3ar' instead: + >>> x='http://ex/x/y';y='http://ex/x/q%3ar';join(x, refTo(x, y)) == y + 1 + + This one checks that it uses a root-realtive one where that is + all they share. Now uses root-relative where no path is shared. + This is a matter of taste but tends to give more resilience IMHO + -- and shorter paths + + Note that base may be None, meaning no base. In some situations, there + just ain't a base. Slife. In these cases, relTo returns the absolute value. + The axiom abs(,rel(b,x))=x still holds. + This saves people having to set the base to "bogus:". + + >>> refTo('http://ex/x/y/z', 'http://ex/r') + '/r' + + """ + +# assert base # don't mask bugs -danc # not a bug. -tim + if not base: return uri + if base == uri: return "" + + # Find how many path segments in common + i=0 + while i<len(uri) and i<len(base): + if uri[i] == base[i]: i = i + 1 + else: break + # print "# relative", base, uri, " same up to ", i + # i point to end of shortest one or first difference + + m = commonHost.match(base[:i]) + if m: + k=uri.find("//") + if k<0: k=-2 # no host + l=uri.find("/", k+2) + if uri[l+1:l+2] != "/" and base[l+1:l+2] != "/" and uri[:l]==base[:l]: + return uri[l:] + + if uri[i:i+1] =="#" and len(base) == i: return uri[i:] # fragment of base + + while i>0 and uri[i-1] != '/' : i=i-1 # scan for slash + + if i < 3: return uri # No way. + if base.find("//", i-2)>0 \ + or uri.find("//", i-2)>0: return uri # An unshared "//" + if base.find(":", i)>0: return uri # An unshared ":" + n = base.count("/", i) + if n == 0 and i<len(uri) and uri[i] == '#': + return "./" + uri[i:] + elif n == 0 and i == len(uri): + return "./" + else: + return ("../" * n) + uri[i:] + + +def base(): + """The base URI for this process - the Web equiv of cwd + + Relative or abolute unix-standard filenames parsed relative to + this yeild the URI of the file. + If we had a reliable way of getting a computer name, + we should put it in the hostname just to prevent ambiguity + + """ +# return "file://" + hostname + os.getcwd() + "/" + return "file://" + _fixslash(os.getcwd()) + "/" + + +def _fixslash(str): + """ Fix windowslike filename to unixlike - (#ifdef WINDOWS)""" + s = str + for i in range(len(s)): + if s[i] == "\\": s = s[:i] + "/" + s[i+1:] + if s[0] != "/" and s[1] == ":": s = s[2:] # @@@ Hack when drive letter present + return s + +URI_unreserved = b("ABCDEFGHIJJLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-._~") + # unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~" + +@py3compat.format_doctest_out +def canonical(str_in): + """Convert equivalent URIs (or parts) to the same string + + There are many differenet levels of URI canonicalization + which are possible. See http://www.ietf.org/rfc/rfc3986.txt + Done: + - Converfting unicode IRI to utf-8 + - Escaping all non-ASCII + - De-escaping, if escaped, ALPHA (%%41-%%5A and %%61-%%7A), DIGIT (%%30-%%39), + hyphen (%%2D), period (%%2E), underscore (%%5F), or tilde (%%7E) (Sect 2.4) + - Making all escapes uppercase hexadecimal + + Not done: + - Making URI scheme lowercase + - changing /./ or /foo/../ to / with care not to change host part + + + >>> canonical("foo bar") + %(b)s'foo%%20bar' + + >>> canonical(u'http:') + %(b)s'http:' + + >>> canonical('fran%%c3%%83%%c2%%a7ois') + %(b)s'fran%%C3%%83%%C2%%A7ois' + + >>> canonical('a') + %(b)s'a' + + >>> canonical('%%4e') + %(b)s'N' + + >>> canonical('%%9d') + %(b)s'%%9D' + + >>> canonical('%%2f') + %(b)s'%%2F' + + >>> canonical('%%2F') + %(b)s'%%2F' + + """ + if type(str_in) == type(u''): + s8 = str_in.encode('utf-8') + else: + s8 = str_in + s = b('') + i = 0 + while i < len(s8): + if py3compat.PY3: + n = s8[i]; ch = bytes([n]) + else: + ch = s8[i]; n = ord(ch) + if (n > 126) or (n < 33) : # %-encode controls, SP, DEL, and utf-8 + s += b("%%%02X" % ord(ch)) + elif ch == b('%') and i+2 < len(s8): + ch2 = a2b_hex(s8[i+1:i+3]) + if ch2 in URI_unreserved: s += ch2 + else: s += b("%%%02X" % ord(ch2)) + i = i+3 + continue + else: + s += ch + i = i +1 + return s + + + + + + +CONTEXT = 0 +PRED = 1 +SUBJ = 2 +OBJ = 3 + +PARTS = PRED, SUBJ, OBJ +ALL4 = CONTEXT, PRED, SUBJ, OBJ + +SYMBOL = 0 +FORMULA = 1 +LITERAL = 2 +LITERAL_DT = 21 +LITERAL_LANG = 22 +ANONYMOUS = 3 +XMLLITERAL = 25 + +Logic_NS = "http://www.w3.org/2000/10/swap/log#" +NODE_MERGE_URI = Logic_NS + "is" # Pseudo-property indicating node merging +forSomeSym = Logic_NS + "forSome" +forAllSym = Logic_NS + "forAll" + +RDF_type_URI = "http://www.w3.org/1999/02/22-rdf-syntax-ns#type" +RDF_NS_URI = "http://www.w3.org/1999/02/22-rdf-syntax-ns#" +OWL_NS = "http://www.w3.org/2002/07/owl#" +DAML_sameAs_URI = OWL_NS+"sameAs" +parsesTo_URI = Logic_NS + "parsesTo" +RDF_spec = "http://www.w3.org/TR/REC-rdf-syntax/" + +List_NS = RDF_NS_URI # From 20030808 +_Old_Logic_NS = "http://www.w3.org/2000/10/swap/log.n3#" + +N3_first = (SYMBOL, List_NS + "first") +N3_rest = (SYMBOL, List_NS + "rest") +N3_li = (SYMBOL, List_NS + "li") +N3_nil = (SYMBOL, List_NS + "nil") +N3_List = (SYMBOL, List_NS + "List") +N3_Empty = (SYMBOL, List_NS + "Empty") + + + +runNamespaceValue = None + +def runNamespace(): + "Return a URI suitable as a namespace for run-local objects" + # @@@ include hostname (privacy?) (hash it?) + global runNamespaceValue + if runNamespaceValue == None: + runNamespaceValue = join(base(), _unique_id()) + '#' + return runNamespaceValue + +nextu = 0 +def uniqueURI(): + "A unique URI" + global nextu + nextu += 1 + return runNamespace() + "u_" + `nextu` + +class URISyntaxError(ValueError): + """A parameter is passed to a routine that requires a URI reference""" + pass + + +tracking = False +chatty_flag = 50 + + +from xml.dom import Node +try: + from xml.ns import XMLNS +except: + class XMLNS: + BASE = "http://www.w3.org/2000/xmlns/" + XML = "http://www.w3.org/XML/1998/namespace" + + +_attrs = lambda E: (E.attributes and E.attributes.values()) or [] +_children = lambda E: E.childNodes or [] +_IN_XML_NS = lambda n: n.namespaceURI == XMLNS.XML +_inclusive = lambda n: n.unsuppressedPrefixes == None + +# Does a document/PI has lesser/greater document order than the +# first element? +_LesserElement, _Element, _GreaterElement = range(3) + +def _sorter(n1,n2): + '''_sorter(n1,n2) -> int + Sorting predicate for non-NS attributes.''' + + i = cmp(n1.namespaceURI, n2.namespaceURI) + if i: return i + return cmp(n1.localName, n2.localName) + + +def _sorter_ns(n1,n2): + '''_sorter_ns((n,v),(n,v)) -> int + "(an empty namespace URI is lexicographically least)."''' + + if n1[0] == 'xmlns': return -1 + if n2[0] == 'xmlns': return 1 + return cmp(n1[0], n2[0]) + +def _utilized(n, node, other_attrs, unsuppressedPrefixes): + '''_utilized(n, node, other_attrs, unsuppressedPrefixes) -> boolean + Return true if that nodespace is utilized within the node''' + + if n.startswith('xmlns:'): + n = n[6:] + elif n.startswith('xmlns'): + n = n[5:] + if (n=="" and node.prefix in ["#default", None]) or \ + n == node.prefix or n in unsuppressedPrefixes: + return 1 + for attr in other_attrs: + if n == attr.prefix: return 1 + return 0 + +#_in_subset = lambda subset, node: not subset or node in subset +_in_subset = lambda subset, node: subset is None or node in subset # rich's tweak + +class _implementation: + '''Implementation class for C14N. This accompanies a node during it's + processing and includes the parameters and processing state.''' + + # Handler for each node type; populated during module instantiation. + handlers = {} + + def __init__(self, node, write, **kw): + '''Create and run the implementation.''' + self.write = write + self.subset = kw.get('subset') + self.comments = kw.get('comments', 0) + self.unsuppressedPrefixes = kw.get('unsuppressedPrefixes') + nsdict = kw.get('nsdict', { 'xml': XMLNS.XML, 'xmlns': XMLNS.BASE }) + + # Processing state. + self.state = (nsdict, {'xml':''}, {}) #0422 + + if node.nodeType == Node.DOCUMENT_NODE: + self._do_document(node) + elif node.nodeType == Node.ELEMENT_NODE: + self.documentOrder = _Element # At document element + if not _inclusive(self): + self._do_element(node) + else: + inherited = self._inherit_context(node) + self._do_element(node, inherited) + elif node.nodeType == Node.DOCUMENT_TYPE_NODE: + pass + elif node.nodeType == Node.TEXT_NODE: + self._do_text(node) + else: + raise TypeError, str(node) + + + def _inherit_context(self, node): + '''_inherit_context(self, node) -> list + Scan ancestors of attribute and namespace context. Used only + for single element node canonicalization, not for subset + canonicalization.''' + + # Collect the initial list of xml:foo attributes. + xmlattrs = filter(_IN_XML_NS, _attrs(node)) + + # Walk up and get all xml:XXX attributes we inherit. + inherited, parent = [], node.parentNode + while parent and parent.nodeType == Node.ELEMENT_NODE: + for a in filter(_IN_XML_NS, _attrs(parent)): + n = a.localName + if n not in xmlattrs: + xmlattrs.append(n) + inherited.append(a) + parent = parent.parentNode + return inherited + + + def _do_document(self, node): + '''_do_document(self, node) -> None + Process a document node. documentOrder holds whether the document + element has been encountered such that PIs/comments can be written + as specified.''' + + self.documentOrder = _LesserElement + for child in node.childNodes: + if child.nodeType == Node.ELEMENT_NODE: + self.documentOrder = _Element # At document element + self._do_element(child) + self.documentOrder = _GreaterElement # After document element + elif child.nodeType == Node.PROCESSING_INSTRUCTION_NODE: + self._do_pi(child) + elif child.nodeType == Node.COMMENT_NODE: + self._do_comment(child) + elif child.nodeType == Node.DOCUMENT_TYPE_NODE: + pass + else: + raise TypeError, str(child) + handlers[Node.DOCUMENT_NODE] = _do_document + + + def _do_text(self, node): + '''_do_text(self, node) -> None + Process a text or CDATA node. Render various special characters + as their C14N entity representations.''' + if not _in_subset(self.subset, node): return + s = node.data.replace("&", "&") + s = s.replace("<", "<") + s = s.replace(">", ">") + s = s.replace("\015", "
") + if s: self.write(s) + handlers[Node.TEXT_NODE] = _do_text + handlers[Node.CDATA_SECTION_NODE] = _do_text + + + def _do_pi(self, node): + '''_do_pi(self, node) -> None + Process a PI node. Render a leading or trailing #xA if the + document order of the PI is greater or lesser (respectively) + than the document element. + ''' + if not _in_subset(self.subset, node): return + W = self.write + if self.documentOrder == _GreaterElement: W('\n') + W('<?') + W(node.nodeName) + s = node.data + if s: + W(' ') + W(s) + W('?>') + if self.documentOrder == _LesserElement: W('\n') + handlers[Node.PROCESSING_INSTRUCTION_NODE] = _do_pi + + + def _do_comment(self, node): + '''_do_comment(self, node) -> None + Process a comment node. Render a leading or trailing #xA if the + document order of the comment is greater or lesser (respectively) + than the document element. + ''' + if not _in_subset(self.subset, node): return + if self.comments: + W = self.write + if self.documentOrder == _GreaterElement: W('\n') + W('<!--') + W(node.data) + W('-->') + if self.documentOrder == _LesserElement: W('\n') + handlers[Node.COMMENT_NODE] = _do_comment + + + def _do_attr(self, n, value): + ''''_do_attr(self, node) -> None + Process an attribute.''' + + W = self.write + W(' ') + W(n) + W('="') + s = value.replace(value, "&", "&") + s = s.replace("<", "<") + s = s.replace('"', '"') + s = s.replace('\011', '	') + s = s.replace('\012', '
') + s = s.replace('\015', '
') + W(s) + W('"') + + + def _do_element(self, node, initial_other_attrs = []): + '''_do_element(self, node, initial_other_attrs = []) -> None + Process an element (and its children).''' + + # Get state (from the stack) make local copies. + # ns_parent -- NS declarations in parent + # ns_rendered -- NS nodes rendered by ancestors + # ns_local -- NS declarations relevant to this element + # xml_attrs -- Attributes in XML namespace from parent + # xml_attrs_local -- Local attributes in XML namespace. + ns_parent, ns_rendered, xml_attrs = \ + self.state[0], self.state[1].copy(), self.state[2].copy() #0422 + ns_local = ns_parent.copy() + xml_attrs_local = {} + + # progress("_do_element node.nodeName=", node.nodeName) + # progress("_do_element node.namespaceURI", node.namespaceURI) + # progress("_do_element node.tocml()", node.toxml()) + # Divide attributes into NS, XML, and others. + other_attrs = initial_other_attrs[:] + in_subset = _in_subset(self.subset, node) + for a in _attrs(node): + # progress("\t_do_element a.nodeName=", a.nodeName) + if a.namespaceURI == XMLNS.BASE: + n = a.nodeName + if n == "xmlns:": n = "xmlns" # DOM bug workaround + ns_local[n] = a.nodeValue + elif a.namespaceURI == XMLNS.XML: + if _inclusive(self) or in_subset: + xml_attrs_local[a.nodeName] = a #0426 + else: + other_attrs.append(a) + #add local xml:foo attributes to ancestor's xml:foo attributes + xml_attrs.update(xml_attrs_local) + + # Render the node + W, name = self.write, None + if in_subset: + name = node.nodeName + W('<') + W(name) + + # Create list of NS attributes to render. + ns_to_render = [] + for n,v in ns_local.items(): + + # If default namespace is XMLNS.BASE or empty, + # and if an ancestor was the same + if n == "xmlns" and v in [ XMLNS.BASE, '' ] \ + and ns_rendered.get('xmlns') in [ XMLNS.BASE, '', None ]: + continue + + # "omit namespace node with local name xml, which defines + # the xml prefix, if its string value is + # http://www.w3.org/XML/1998/namespace." + if n in ["xmlns:xml", "xml"] \ + and v in [ 'http://www.w3.org/XML/1998/namespace' ]: + continue + + + # If not previously rendered + # and it's inclusive or utilized + if (n,v) not in ns_rendered.items() \ + and (_inclusive(self) or \ + _utilized(n, node, other_attrs, self.unsuppressedPrefixes)): + ns_to_render.append((n, v)) + + # Sort and render the ns, marking what was rendered. + ns_to_render.sort(_sorter_ns) + for n,v in ns_to_render: + self._do_attr(n, v) + ns_rendered[n]=v #0417 + + # If exclusive or the parent is in the subset, add the local xml attributes + # Else, add all local and ancestor xml attributes + # Sort and render the attributes. + if not _inclusive(self) or _in_subset(self.subset,node.parentNode): #0426 + other_attrs.extend(xml_attrs_local.values()) + else: + other_attrs.extend(xml_attrs.values()) + other_attrs.sort(_sorter) + for a in other_attrs: + self._do_attr(a.nodeName, a.value) + W('>') + + # Push state, recurse, pop state. + state, self.state = self.state, (ns_local, ns_rendered, xml_attrs) + for c in _children(node): + _implementation.handlers[c.nodeType](self, c) + self.state = state + + if name: W('</%s>' % name) + handlers[Node.ELEMENT_NODE] = _do_element + + +def Canonicalize(node, output=None, **kw): + '''Canonicalize(node, output=None, **kw) -> UTF-8 + + Canonicalize a DOM document/element node and all descendents. + Return the text; if output is specified then output.write will + be called to output the text and None will be returned + Keyword parameters: + nsdict -- a dictionary of prefix:uri namespace entries + assumed to exist in the surrounding context + comments -- keep comments if non-zero (default is 0) + subset -- Canonical XML subsetting resulting from XPath (default is []) + unsuppressedPrefixes -- do exclusive C14N, and this specifies the + prefixes that should be inherited. + ''' + if output: + apply(_implementation, (node, output.write), kw) + else: + s = StringIO.StringIO() + apply(_implementation, (node, s.write), kw) + return s.getvalue() + +# end of xmlC14n.py + +# from why import BecauseOfData, becauseSubexpression +def BecauseOfData(*args, **kargs): + # print args, kargs + pass +def becauseSubexpression(*args, **kargs): + # print args, kargs + pass + +N3_forSome_URI = forSomeSym +N3_forAll_URI = forAllSym + +# Magic resources we know about + + + +ADDED_HASH = "#" # Stop where we use this in case we want to remove it! +# This is the hash on namespace URIs + +RDF_type = ( SYMBOL , RDF_type_URI ) +DAML_sameAs = ( SYMBOL, DAML_sameAs_URI ) + +LOG_implies_URI = "http://www.w3.org/2000/10/swap/log#implies" + +BOOLEAN_DATATYPE = _XSD_PFX + "boolean" +DECIMAL_DATATYPE = _XSD_PFX + "decimal" +DOUBLE_DATATYPE = _XSD_PFX + "double" +FLOAT_DATATYPE = _XSD_PFX + "float" +INTEGER_DATATYPE = _XSD_PFX + "integer" + +option_noregen = 0 # If set, do not regenerate genids on output + +# @@ I18n - the notname chars need extending for well known unicode non-text +# characters. The XML spec switched to assuming unknown things were name +# characaters. +# _namechars = string.lowercase + string.uppercase + string.digits + '_-' +_notQNameChars = "\t\r\n !\"#$%&'()*.,+/;<=>?@[\\]^`{|}~" # else valid qname :-/ +_notNameChars = _notQNameChars + ":" # Assume anything else valid name :-/ +_rdfns = 'http://www.w3.org/1999/02/22-rdf-syntax-ns#' + + +N3CommentCharacter = "#" # For unix script #! compatabilty + +########################################## Parse string to sink +# +# Regular expressions: +eol = re.compile(r'[ \t]*(#[^\n]*)?\r?\n') # end of line, poss. w/comment +eof = re.compile(r'[ \t]*(#[^\n]*)?$') # end of file, poss. w/comment +ws = re.compile(r'[ \t]*') # Whitespace not including NL +signed_integer = re.compile(r'[-+]?[0-9]+') # integer +number_syntax = re.compile(r'(?P<integer>[-+]?[0-9]+)(?P<decimal>\.[0-9]+)?(?P<exponent>e[-+]?[0-9]+)?') +digitstring = re.compile(r'[0-9]+') # Unsigned integer +interesting = re.compile(r'[\\\r\n\"]') +langcode = re.compile(r'[a-zA-Z0-9]+(-[a-zA-Z0-9]+)?') +#" + + + +class SinkParser: + def __init__(self, store, openFormula=None, thisDoc="", baseURI=None, + genPrefix = "", flags="", + why=None): + """ note: namespace names should *not* end in #; + the # will get added during qname processing """ + + self._bindings = {} + self._flags = flags + if thisDoc != "": + assert ':' in thisDoc, "Document URI not absolute: <%s>" % thisDoc + self._bindings[""] = thisDoc + "#" # default + + self._store = store + if genPrefix: store.setGenPrefix(genPrefix) # pass it on + + self._thisDoc = thisDoc + self.lines = 0 # for error handling + self.startOfLine = 0 # For calculating character number + self._genPrefix = genPrefix + self.keywords = ['a', 'this', 'bind', 'has', 'is', 'of', 'true', 'false' ] + self.keywordsSet = 0 # Then only can others be considerd qnames + self._anonymousNodes = {} # Dict of anon nodes already declared ln: Term + self._variables = {} + self._parentVariables = {} + self._reason = why # Why the parser was asked to parse this + + self._reason2 = None # Why these triples + # was: diag.tracking + if tracking: self._reason2 = BecauseOfData( + store.newSymbol(thisDoc), because=self._reason) + + if baseURI: self._baseURI = baseURI + else: + if thisDoc: + self._baseURI = thisDoc + else: + self._baseURI = None + + assert not self._baseURI or ':' in self._baseURI + + if not self._genPrefix: + if self._thisDoc: self._genPrefix = self._thisDoc + "#_g" + else: self._genPrefix = uniqueURI() + + if openFormula ==None: + if self._thisDoc: + self._formula = store.newFormula(thisDoc + "#_formula") + else: + self._formula = store.newFormula() + else: + self._formula = openFormula + + + self._context = self._formula + self._parentContext = None + + + def here(self, i): + """String generated from position in file + + This is for repeatability when refering people to bnodes in a document. + This has diagnostic uses less formally, as it should point one to which + bnode the arbitrary identifier actually is. It gives the + line and character number of the '[' charcacter or path character + which introduced the blank node. The first blank node is boringly _L1C1. + It used to be used only for tracking, but for tests in general + it makes the canonical ordering of bnodes repeatable.""" + + return "%s_L%iC%i" % (self._genPrefix , self.lines, + i - self.startOfLine + 1) + + def formula(self): + return self._formula + + def loadStream(self, stream): + return self.loadBuf(stream.read()) # Not ideal + + def loadBuf(self, buf): + """Parses a buffer and returns its top level formula""" + self.startDoc() + + self.feed(buf) + return self.endDoc() # self._formula + + + def feed(self, octets): + """Feed an octet stream tothe parser + + if BadSyntax is raised, the string + passed in the exception object is the + remainder after any statements have been parsed. + So if there is more data to feed to the + parser, it should be straightforward to recover.""" + + if not isinstance(octets, unicode): + s = octets.decode('utf-8') + # NB already decoded, so \ufeff + if len(s) > 0 and s[0] == codecs.BOM_UTF8.decode('utf-8'): + s = s[1:] + else: + s=octets + + i = 0 + while i >= 0: + j = self.skipSpace(s, i) + if j<0: return + + i = self.directiveOrStatement(s,j) + if i<0: + print "# next char: ", `s[j]` + raise BadSyntax(self._thisDoc, self.lines, s, j, + "expected directive or statement") + + def directiveOrStatement(self, str,h): + + i = self.skipSpace(str, h) + if i<0: return i # EOF + + j = self.directive(str, i) + if j>=0: return self.checkDot(str,j) + + j = self.statement(str, i) + if j>=0: return self.checkDot(str,j) + + return j + + + #@@I18N + global _notNameChars + #_namechars = string.lowercase + string.uppercase + string.digits + '_-' + + def tok(self, tok, str, i): + """Check for keyword. Space must have been stripped on entry and + we must not be at end of file.""" + + assert tok[0] not in _notNameChars # not for punctuation + if str[i:i+1] == "@": + i = i+1 + else: + if tok not in self.keywords: + return -1 # No, this has neither keywords declaration nor "@" + + if (str[i:i+len(tok)] == tok + and (str[i+len(tok)] in _notQNameChars )): + i = i + len(tok) + return i + else: + return -1 + + def directive(self, str, i): + j = self.skipSpace(str, i) + if j<0: return j # eof + res = [] + + j = self.tok('bind', str, i) # implied "#". Obsolete. + if j>0: raise BadSyntax(self._thisDoc, self.lines, str, i, + "keyword bind is obsolete: use @prefix") + + j = self.tok('keywords', str, i) + if j>0: + i = self.commaSeparatedList(str, j, res, self.bareWord) + if i < 0: + raise BadSyntax(self._thisDoc, self.lines, str, i, + "'@keywords' needs comma separated list of words") + self.setKeywords(res[:]) + # was: diag.chatty_flag + if chatty_flag > 80: progress("Keywords ", self.keywords) + return i + + + j = self.tok('forAll', str, i) + if j > 0: + i = self.commaSeparatedList(str, j, res, self.uri_ref2) + if i <0: raise BadSyntax(self._thisDoc, self.lines, str, i, + "Bad variable list after @forAll") + for x in res: + #self._context.declareUniversal(x) + if x not in self._variables or x in self._parentVariables: + self._variables[x] = self._context.newUniversal(x) + return i + + j = self.tok('forSome', str, i) + if j > 0: + i = self. commaSeparatedList(str, j, res, self.uri_ref2) + if i <0: raise BadSyntax(self._thisDoc, self.lines, str, i, + "Bad variable list after @forSome") + for x in res: + self._context.declareExistential(x) + return i + + + j=self.tok('prefix', str, i) # no implied "#" + if j>=0: + t = [] + i = self.qname(str, j, t) + if i<0: raise BadSyntax(self._thisDoc, self.lines, str, j, + "expected qname after @prefix") + j = self.uri_ref2(str, i, t) + if j<0: raise BadSyntax(self._thisDoc, self.lines, str, i, + "expected <uriref> after @prefix _qname_") + ns = self.uriOf(t[1]) + + if self._baseURI: + ns = join(self._baseURI, ns) + elif ":" not in ns: + raise BadSyntax(self._thisDoc, self.lines, str, j, + "With no base URI, cannot use relative URI in @prefix <"+ns+">") + assert ':' in ns # must be absolute + self._bindings[t[0][0]] = ns + self.bind(t[0][0], hexify(ns)) + return j + + j=self.tok('base', str, i) # Added 2007/7/7 + if j >= 0: + t = [] + i = self.uri_ref2(str, j, t) + if i<0: raise BadSyntax(self._thisDoc, self.lines, str, j, + "expected <uri> after @base ") + ns = self.uriOf(t[0]) + + if self._baseURI: + ns = join(self._baseURI, ns) + else: + raise BadSyntax(self._thisDoc, self.lines, str, j, + "With no previous base URI, cannot use relative URI in @base <"+ns+">") + assert ':' in ns # must be absolute + self._baseURI = ns + return i + + return -1 # Not a directive, could be something else. + + def bind(self, qn, uri): + assert isinstance(uri, + types.StringType), "Any unicode must be %x-encoded already" + if qn == "": + self._store.setDefaultNamespace(uri) + else: + self._store.bind(qn, uri) + + def setKeywords(self, k): + "Takes a list of strings" + if k == None: + self.keywordsSet = 0 + else: + self.keywords = k + self.keywordsSet = 1 + + + def startDoc(self): + # was: self._store.startDoc() + self._store.startDoc(self._formula) + + def endDoc(self): + """Signal end of document and stop parsing. returns formula""" + self._store.endDoc(self._formula) # don't canonicalize yet + return self._formula + + def makeStatement(self, quadruple): + #$$$$$$$$$$$$$$$$$$$$$ +# print "# Parser output: ", `quadruple` + self._store.makeStatement(quadruple, why=self._reason2) + + + + def statement(self, str, i): + r = [] + + i = self.object(str, i, r) # Allow literal for subject - extends RDF + if i<0: return i + + j = self.property_list(str, i, r[0]) + + if j<0: raise BadSyntax(self._thisDoc, self.lines, + str, i, "expected propertylist") + return j + + def subject(self, str, i, res): + return self.item(str, i, res) + + def verb(self, str, i, res): + """ has _prop_ + is _prop_ of + a + = + _prop_ + >- prop -> + <- prop -< + _operator_""" + + j = self.skipSpace(str, i) + if j<0:return j # eof + + r = [] + + j = self.tok('has', str, i) + if j>=0: + i = self.prop(str, j, r) + if i < 0: raise BadSyntax(self._thisDoc, self.lines, + str, j, "expected property after 'has'") + res.append(('->', r[0])) + return i + + j = self.tok('is', str, i) + if j>=0: + i = self.prop(str, j, r) + if i < 0: raise BadSyntax(self._thisDoc, self.lines, str, j, + "expected <property> after 'is'") + j = self.skipSpace(str, i) + if j<0: + raise BadSyntax(self._thisDoc, self.lines, str, i, + "End of file found, expected property after 'is'") + return j # eof + i=j + j = self.tok('of', str, i) + if j<0: raise BadSyntax(self._thisDoc, self.lines, str, i, + "expected 'of' after 'is' <prop>") + res.append(('<-', r[0])) + return j + + j = self.tok('a', str, i) + if j>=0: + res.append(('->', RDF_type)) + return j + + + if str[i:i+2] == "<=": + res.append(('<-', self._store.newSymbol(Logic_NS+"implies"))) + return i+2 + + if str[i:i+1] == "=": + if str[i+1:i+2] == ">": + res.append(('->', self._store.newSymbol(Logic_NS+"implies"))) + return i+2 + res.append(('->', DAML_sameAs)) + return i+1 + + if str[i:i+2] == ":=": + # patch file relates two formulae, uses this @@ really? + res.append(('->', Logic_NS+"becomes")) + return i+2 + + j = self.prop(str, i, r) + if j >= 0: + res.append(('->', r[0])) + return j + + if str[i:i+2] == ">-" or str[i:i+2] == "<-": + raise BadSyntax(self._thisDoc, self.lines, str, j, + ">- ... -> syntax is obsolete.") + + return -1 + + def prop(self, str, i, res): + return self.item(str, i, res) + + def item(self, str, i, res): + return self.path(str, i, res) + + def blankNode(self, uri=None): + if "B" not in self._flags: + return self._context.newBlankNode(uri, why=self._reason2) + x = self._context.newSymbol(uri) + self._context.declareExistential(x) + return x + + def path(self, str, i, res): + """Parse the path production. + """ + j = self.nodeOrLiteral(str, i, res) + if j<0: return j # nope + + while str[j:j+1] in "!^.": # no spaces, must follow exactly (?) + ch = str[j:j+1] # @@ Allow "." followed IMMEDIATELY by a node. + if ch == ".": + ahead = str[j+1:j+2] + if not ahead or (ahead in _notNameChars + and ahead not in ":?<[{("): break + subj = res.pop() + obj = self.blankNode(uri=self.here(j)) + j = self.node(str, j+1, res) + if j<0: raise BadSyntax(self._thisDoc, self.lines, str, j, + "EOF found in middle of path syntax") + pred = res.pop() + if ch == "^": # Reverse traverse + self.makeStatement((self._context, pred, obj, subj)) + else: + self.makeStatement((self._context, pred, subj, obj)) + res.append(obj) + return j + + def anonymousNode(self, ln): + """Remember or generate a term for one of these _: anonymous nodes""" + term = self._anonymousNodes.get(ln, None) + if term != None: return term + term = self._store.newBlankNode(self._context, why=self._reason2) + self._anonymousNodes[ln] = term + return term + + def node(self, str, i, res, subjectAlready=None): + """Parse the <node> production. + Space is now skipped once at the beginning + instead of in multipe calls to self.skipSpace(). + """ + subj = subjectAlready + + j = self.skipSpace(str,i) + if j<0: return j #eof + i=j + ch = str[i:i+1] # Quick 1-character checks first: + + if ch == "[": + bnodeID = self.here(i) + j=self.skipSpace(str,i+1) + if j<0: raise BadSyntax(self._thisDoc, + self.lines, str, i, "EOF after '['") + if str[j:j+1] == "=": # Hack for "is" binding name to anon node + i = j+1 + objs = [] + j = self.objectList(str, i, objs); + if j>=0: + subj = objs[0] + if len(objs)>1: + for obj in objs: + self.makeStatement((self._context, + DAML_sameAs, subj, obj)) + j = self.skipSpace(str, j) + if j<0: raise BadSyntax(self._thisDoc, self.lines, str, i, + "EOF when objectList expected after [ = ") + if str[j:j+1] == ";": + j=j+1 + else: + raise BadSyntax(self._thisDoc, self.lines, str, i, + "objectList expected after [= ") + + if subj is None: + subj=self.blankNode(uri= bnodeID) + + i = self.property_list(str, j, subj) + if i<0: raise BadSyntax(self._thisDoc, self.lines, str, j, + "property_list expected") + + j = self.skipSpace(str, i) + if j<0: raise BadSyntax(self._thisDoc, self.lines, str, i, + "EOF when ']' expected after [ <propertyList>") + if str[j:j+1] != "]": + raise BadSyntax(self._thisDoc, + self.lines, str, j, "']' expected") + res.append(subj) + return j+1 + + if ch == "{": + ch2 = str[i+1:i+2] + if ch2 == '$': + i += 1 + j = i + 1 + List = [] + first_run = True + while 1: + i = self.skipSpace(str, j) + if i<0: raise BadSyntax(self._thisDoc, self.lines, str, i, + "needed '$}', found end.") + if str[i:i+2] == '$}': + j = i+2 + break + + if not first_run: + if str[i:i+1] == ',': + i+=1 + else: + raise BadSyntax(self._thisDoc, self.lines, + str, i, "expected: ','") + else: first_run = False + + item = [] + j = self.item(str,i, item) #@@@@@ should be path, was object + if j<0: raise BadSyntax(self._thisDoc, self.lines, str, i, + "expected item in set or '$}'") + List.append(self._store.intern(item[0])) + res.append(self._store.newSet(List, self._context)) + return j + else: + j=i+1 + oldParentContext = self._parentContext + self._parentContext = self._context + parentAnonymousNodes = self._anonymousNodes + grandParentVariables = self._parentVariables + self._parentVariables = self._variables + self._anonymousNodes = {} + self._variables = self._variables.copy() + reason2 = self._reason2 + self._reason2 = becauseSubexpression + if subj is None: subj = self._store.newFormula() + self._context = subj + + while 1: + i = self.skipSpace(str, j) + if i<0: raise BadSyntax(self._thisDoc, self.lines, + str, i, "needed '}', found end.") + + if str[i:i+1] == "}": + j = i+1 + break + + j = self.directiveOrStatement(str,i) + if j<0: raise BadSyntax(self._thisDoc, self.lines, + str, i, "expected statement or '}'") + + self._anonymousNodes = parentAnonymousNodes + self._variables = self._parentVariables + self._parentVariables = grandParentVariables + self._context = self._parentContext + self._reason2 = reason2 + self._parentContext = oldParentContext + res.append(subj.close()) # No use until closed + return j + + if ch == "(": + thing_type = self._store.newList + ch2 = str[i+1:i+2] + if ch2 == '$': + thing_type = self._store.newSet + i += 1 + j=i+1 + + List = [] + while 1: + i = self.skipSpace(str, j) + if i<0: raise BadSyntax(self._thisDoc, self.lines, + str, i, "needed ')', found end.") + if str[i:i+1] == ')': + j = i+1 + break + + item = [] + j = self.item(str,i, item) #@@@@@ should be path, was object + if j<0: raise BadSyntax(self._thisDoc, self.lines, str, i, + "expected item in list or ')'") + List.append(self._store.intern(item[0])) + res.append(thing_type(List, self._context)) + return j + + j = self.tok('this', str, i) # This context + if j>=0: + raise BadSyntax(self._thisDoc, self.lines, str, i, + "Keyword 'this' was ancient N3. Now use @forSome and @forAll keywords.") + res.append(self._context) + return j + + #booleans + j = self.tok('true', str, i) + if j>=0: + res.append(True) + return j + j = self.tok('false', str, i) + if j>=0: + res.append(False) + return j + + if subj is None: # If this can be a named node, then check for a name. + j = self.uri_ref2(str, i, res) + if j >= 0: + return j + + return -1 + + def property_list(self, str, i, subj): + """Parse property list + Leaves the terminating punctuation in the buffer + """ + while 1: + j = self.skipSpace(str, i) + if j<0: + raise BadSyntax(self._thisDoc, self.lines, str, i, + "EOF found when expected verb in property list") + return j #eof + + if str[j:j+2] ==":-": + i = j + 2 + res = [] + j = self.node(str, i, res, subj) + if j<0: raise BadSyntax(self._thisDoc, self.lines, str, i, + "bad {} or () or [] node after :- ") + i=j + continue + i=j + v = [] + j = self.verb(str, i, v) + if j<=0: + return i # void but valid + + objs = [] + i = self.objectList(str, j, objs) + if i<0: raise BadSyntax(self._thisDoc, self.lines, str, j, + "objectList expected") + for obj in objs: + dir, sym = v[0] + if dir == '->': + self.makeStatement((self._context, sym, subj, obj)) + else: + self.makeStatement((self._context, sym, obj, subj)) + + j = self.skipSpace(str, i) + if j<0: + raise BadSyntax(self._thisDoc, self.lines, str, j, + "EOF found in list of objects") + return j #eof + if str[i:i+1] != ";": + return i + i = i+1 # skip semicolon and continue + + def commaSeparatedList(self, str, j, res, what): + """return value: -1 bad syntax; >1 new position in str + res has things found appended + """ + i = self.skipSpace(str, j) + if i<0: + raise BadSyntax(self._thisDoc, self.lines, str, i, + "EOF found expecting comma sep list") + return i + if str[i] == ".": return j # empty list is OK + i = what(str, i, res) + if i<0: return -1 + + while 1: + j = self.skipSpace(str, i) + if j<0: return j # eof + ch = str[j:j+1] + if ch != ",": + if ch != ".": + return -1 + return j # Found but not swallowed "." + i = what(str, j+1, res) + if i<0: + raise BadSyntax(self._thisDoc, self.lines, str, i, + "bad list content") + return i + + def objectList(self, str, i, res): + i = self.object(str, i, res) + if i<0: return -1 + while 1: + j = self.skipSpace(str, i) + if j<0: + raise BadSyntax(self._thisDoc, self.lines, str, j, + "EOF found after object") + return j #eof + if str[j:j+1] != ",": + return j # Found something else! + i = self.object(str, j+1, res) + if i<0: return i + + def checkDot(self, str, i): + j = self.skipSpace(str, i) + if j<0: return j #eof + if str[j:j+1] == ".": + return j+1 # skip + if str[j:j+1] == "}": + return j # don't skip it + if str[j:j+1] == "]": + return j + raise BadSyntax(self._thisDoc, self.lines, + str, j, "expected '.' or '}' or ']' at end of statement") + return i + + + def uri_ref2(self, str, i, res): + """Generate uri from n3 representation. + + Note that the RDF convention of directly concatenating + NS and local name is now used though I prefer inserting a '#' + to make the namesapces look more like what XML folks expect. + """ + qn = [] + j = self.qname(str, i, qn) + if j>=0: + pfx, ln = qn[0] + if pfx is None: + assert 0, "not used?" + ns = self._baseURI + ADDED_HASH + else: + try: + ns = self._bindings[pfx] + except KeyError: + if pfx == "_": # Magic prefix 2001/05/30, can be overridden + res.append(self.anonymousNode(ln)) + return j + raise BadSyntax(self._thisDoc, self.lines, str, i, + "Prefix \"%s:\" not bound" % (pfx)) + symb = self._store.newSymbol(ns + ln) + if symb in self._variables: + res.append(self._variables[symb]) + else: + res.append(symb) # @@@ "#" CONVENTION + if not ns.find("#"):progress( + "Warning: no # on namespace %s," % ns) + return j + + + i = self.skipSpace(str, i) + if i<0: return -1 + + if str[i] == "?": + v = [] + j = self.variable(str,i,v) + if j>0: #Forget varibles as a class, only in context. + res.append(v[0]) + return j + return -1 + + elif str[i]=="<": + i = i + 1 + st = i + while i < len(str): + if str[i] == ">": + uref = str[st:i] # the join should dealt with "": + if self._baseURI: + uref = join(self._baseURI, uref) # was: uripath.join + else: + assert ":" in uref, \ + "With no base URI, cannot deal with relative URIs" + if str[i-1:i]=="#" and not uref[-1:]=="#": + uref = uref + "#" # She meant it! Weirdness in urlparse? + symb = self._store.newSymbol(uref) + if symb in self._variables: + res.append(self._variables[symb]) + else: + res.append(symb) + return i+1 + i = i + 1 + raise BadSyntax(self._thisDoc, self.lines, str, j, + "unterminated URI reference") + + elif self.keywordsSet: + v = [] + j = self.bareWord(str,i,v) + if j<0: return -1 #Forget varibles as a class, only in context. + if v[0] in self.keywords: + raise BadSyntax(self._thisDoc, self.lines, str, i, + 'Keyword "%s" not allowed here.' % v[0]) + res.append(self._store.newSymbol(self._bindings[""]+v[0])) + return j + else: + return -1 + + def skipSpace(self, str, i): + """Skip white space, newlines and comments. + return -1 if EOF, else position of first non-ws character""" + while 1: + m = eol.match(str, i) + if m == None: break + self.lines = self.lines + 1 + i = m.end() # Point to first character unmatched + self.startOfLine = i + m = ws.match(str, i) + if m != None: + i = m.end() + m = eof.match(str, i) + if m != None: return -1 + return i + + def variable(self, str, i, res): + """ ?abc -> variable(:abc) + """ + + j = self.skipSpace(str, i) + if j<0: return -1 + + if str[j:j+1] != "?": return -1 + j=j+1 + i = j + if str[j] in "0123456789-": + raise BadSyntax(self._thisDoc, self.lines, str, j, + "Varible name can't start with '%s'" % str[j]) + return -1 + while i <len(str) and str[i] not in _notNameChars: + i = i+1 + if self._parentContext == None: + varURI = self._store.newSymbol(self._baseURI + "#" +str[j:i]) + if varURI not in self._variables: + self._variables[varURI] = self._context.newUniversal(varURI + , why=self._reason2) + res.append(self._variables[varURI]) + return i + # @@ was: + # raise BadSyntax(self._thisDoc, self.lines, str, j, + # "Can't use ?xxx syntax for variable in outermost level: %s" + # % str[j-1:i]) + varURI = self._store.newSymbol(self._baseURI + "#" +str[j:i]) + if varURI not in self._parentVariables: + self._parentVariables[varURI] = self._parentContext.newUniversal(varURI + , why=self._reason2) + res.append(self._parentVariables[varURI]) + return i + + def bareWord(self, str, i, res): + """ abc -> :abc + """ + j = self.skipSpace(str, i) + if j<0: return -1 + + if str[j] in "0123456789-" or str[j] in _notNameChars: return -1 + i = j + while i <len(str) and str[i] not in _notNameChars: + i = i+1 + res.append(str[j:i]) + return i + + def qname(self, str, i, res): + """ + xyz:def -> ('xyz', 'def') + If not in keywords and keywordsSet: def -> ('', 'def') + :def -> ('', 'def') + """ + + i = self.skipSpace(str, i) + if i<0: return -1 + + c = str[i] + if c in "0123456789-+": return -1 + if c not in _notNameChars: + ln = c + i = i + 1 + while i < len(str): + c = str[i] + if c not in _notNameChars: + ln = ln + c + i = i + 1 + else: break + else: # First character is non-alpha + ln = '' # Was: None - TBL (why? useful?) + + if i<len(str) and str[i] == ':': + pfx = ln + i = i + 1 + ln = '' + while i < len(str): + c = str[i] + if c not in _notNameChars: + ln = ln + c + i = i + 1 + else: break + + res.append((pfx, ln)) + return i + + else: # delimiter was not ":" + if ln and self.keywordsSet and ln not in self.keywords: + res.append(('', ln)) + return i + return -1 + + def object(self, str, i, res): + j = self.subject(str, i, res) + if j>= 0: + return j + else: + j = self.skipSpace(str, i) + if j<0: return -1 + else: i=j + + if str[i]=='"': + if str[i:i+3] == '"""': delim = '"""' + else: delim = '"' + i = i + len(delim) + + j, s = self.strconst(str, i, delim) + + res.append(self._store.newLiteral(s)) + progress("New string const ", s, j) + return j + else: + return -1 + + def nodeOrLiteral(self, str, i, res): + j = self.node(str, i, res) + startline = self.lines # Remember where for error messages + if j>= 0: + return j + else: + j = self.skipSpace(str, i) + if j<0: return -1 + else: i=j + + ch = str[i] + if ch in "-+0987654321": + m = number_syntax.match(str, i) + if m == None: + raise BadSyntax(self._thisDoc, self.lines, str, i, + "Bad number syntax") + j = m.end() + if m.group('exponent') != None: # includes decimal exponent + res.append(float(str[i:j])) +# res.append(self._store.newLiteral(str[i:j], +# self._store.newSymbol(FLOAT_DATATYPE))) + elif m.group('decimal') != None: + res.append(Decimal(str[i:j])) + else: + res.append(long(str[i:j])) +# res.append(self._store.newLiteral(str[i:j], +# self._store.newSymbol(INTEGER_DATATYPE))) + return j + + if str[i]=='"': + if str[i:i+3] == '"""': delim = '"""' + else: delim = '"' + i = i + len(delim) + + dt = None + j, s = self.strconst(str, i, delim) + lang = None + if str[j:j+1] == "@": # Language? + m = langcode.match(str, j+1) + if m == None: + raise BadSyntax(self._thisDoc, startline, str, i, + "Bad language code syntax on string literal, after @") + i = m.end() + lang = str[j+1:i] + j = i + if str[j:j+2] == "^^": + res2 = [] + j = self.uri_ref2(str, j+2, res2) # Read datatype URI + dt = res2[0] +# if dt.uriref() == "http://www.w3.org/1999/02/22-rdf-syntax-ns#XMLLiteral": + if dt == "http://www.w3.org/1999/02/22-rdf-syntax-ns#XMLLiteral": + try: + dom = XMLtoDOM('<rdf:envelope xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns">' + + s + + '</rdf:envelope>').firstChild + except: + raise ValueError('s="%s"' % s) + res.append(self._store.newXMLLiteral(dom)) + return j + res.append(self._store.newLiteral(s, dt, lang)) + return j + else: + return -1 + + def uriOf(self, sym): + if isinstance(sym, types.TupleType): + return sym[1] # old system for --pipe + # return sym.uriref() # cwm api + return sym + + + def strconst(self, str, i, delim): + """parse an N3 string constant delimited by delim. + return index, val + """ + + + j = i + ustr = u"" # Empty unicode string + startline = self.lines # Remember where for error messages + while j<len(str): + if str[j] == '"': + if delim == '"': # done when delim is " + i = j + 1 + return i, ustr + if delim == '"""': # done when delim is """ and ... + if str[j:j+5] == '"""""': # ... we have "" before + i = j + 5 + ustr = ustr + '""' + return i, ustr + if str[j:j+4] == '""""': # ... we have " before + i = j + 4 + ustr = ustr + '"' + return i, ustr + if str[j:j+3] == '"""': # ... current " is part of delim + i = j + 3 + return i, ustr + + # we are inside of the string and current char is " + j = j + 1 + ustr = ustr + '"' + continue + + m = interesting.search(str, j) # was str[j:]. + # Note for pos param to work, MUST be compiled ... re bug? + assert m , "Quote expected in string at ^ in %s^%s" %( + str[j-20:j], str[j:j+20]) # we at least have to find a quote + + i = m.start() + try: + ustr = ustr + str[j:i] + except UnicodeError: + err = "" + for c in str[j:i]: + err = err + (" %02x" % ord(c)) + streason = sys.exc_info()[1].__str__() + raise BadSyntax(self._thisDoc, startline, str, j, + "Unicode error appending characters %s to string, because\n\t%s" + % (err, streason)) + +# print "@@@ i = ",i, " j=",j, "m.end=", m.end() + + ch = str[i] + if ch == '"': + j = i + continue + elif ch == "\r": # Strip carriage returns + j = i+1 + continue + elif ch == "\n": + if delim == '"': + raise BadSyntax(self._thisDoc, startline, str, i, + "newline found in string literal") + self.lines = self.lines + 1 + ustr = ustr + ch + j = i + 1 + self.startOfLine = j + + elif ch == "\\": + j = i + 1 + ch = str[j:j+1] # Will be empty if string ends + if not ch: + raise BadSyntax(self._thisDoc, startline, str, i, + "unterminated string literal (2)") + k = 'abfrtvn\\"'.find(ch) + if k >= 0: + uch = '\a\b\f\r\t\v\n\\"'[k] + ustr = ustr + uch + j = j + 1 + elif ch == "u": + j, ch = self.uEscape(str, j+1, startline) + ustr = ustr + ch + elif ch == "U": + j, ch = self.UEscape(str, j+1, startline) + ustr = ustr + ch + else: + raise BadSyntax(self._thisDoc, self.lines, str, i, + "bad escape") + + raise BadSyntax(self._thisDoc, self.lines, str, i, + "unterminated string literal") + + + def uEscape(self, str, i, startline): + j = i + count = 0 + value = 0 + while count < 4: # Get 4 more characters + ch = str[j:j+1].lower() + # sbp http://ilrt.org/discovery/chatlogs/rdfig/2002-07-05 + j = j + 1 + if ch == "": + raise BadSyntax(self._thisDoc, startline, str, i, + "unterminated string literal(3)") + k = "0123456789abcdef".find(ch) + if k < 0: + raise BadSyntax(self._thisDoc, startline, str, i, + "bad string literal hex escape") + value = value * 16 + k + count = count + 1 + uch = unichr(value) + return j, uch + + def UEscape(self, str, i, startline): + stringType = type('') + j = i + count = 0 + value = '\\U' + while count < 8: # Get 8 more characters + ch = str[j:j+1].lower() + # sbp http://ilrt.org/discovery/chatlogs/rdfig/2002-07-05 + j = j + 1 + if ch == "": + raise BadSyntax(self._thisDoc, startline, str, i, + "unterminated string literal(3)") + k = "0123456789abcdef".find(ch) + if k < 0: + raise BadSyntax(self._thisDoc, startline, str, i, + "bad string literal hex escape") + value = value + ch + count = count + 1 + + uch = stringType(value).decode('unicode-escape') + return j, uch + +wide_build = True +try: + unichr(0x10000) +except ValueError: + wide_build = False + +# If we are going to do operators then they should generate +# [ is operator:plus of ( \1 \2 ) ] + + +class BadSyntax(SyntaxError): + def __init__(self, uri, lines, str, i, why): + self._str = str.encode('utf-8') # Better go back to strings for errors + self._i = i + self._why = why + self.lines = lines + self._uri = uri + + def __str__(self): + str = self._str + i = self._i + st = 0 + if i>60: + pre="..." + st = i - 60 + else: pre="" + if len(str)-i > 60: post="..." + else: post="" + + return 'at line %i of <%s>:\nBad syntax (%s) at ^ in:\n"%s%s^%s%s"' \ + % (self.lines +1, self._uri, self._why, pre, + str[st:i], str[i:i+60], post) + + + +def stripCR(str): + res = "" + for ch in str: + if ch != "\r": + res = res + ch + return res + +def dummyWrite(x): + pass + +################################################################################ + + +def toBool(s): + if s == 'true' or s == 'True' or s == '1': + return True + if s == 'false' or s == 'False' or s == '0': + return False + raise ValueError(s) + + + + + +class Formula(object): + number = 0 + + def __init__(self, parent): + self.counter = 0 + Formula.number += 1 + self.number = Formula.number + self.existentials = {} + self.universals = {} + + self.quotedgraph=QuotedGraph(store=parent.store, identifier=self.id()) + + def __str__(self): + return '_:Formula%s' % self.number + + def id(self): + return BNode('_:Formula%s' % self.number) + + def newBlankNode(self, uri=None, why=None): + if uri is None: + self.counter += 1 + b = BNode('f%sb%s' % (id(self), self.counter)) + else: b = BNode(uri.split('#').pop().replace('_', 'b')) + return b + + def newUniversal(self, uri, why=None): + return Variable(uri.split('#').pop()) + + def declareExistential(self, x): + self.existentials[x] = self.newBlankNode() + + def close(self): + + return self.quotedgraph + +r_hibyte = re.compile(r'([\x80-\xff])') +def iri(uri): + return uri.decode('utf-8') + # return unicode(r_hibyte.sub(lambda m: '%%%02X' % ord(m.group(1)), uri)) + +class RDFSink(object): + def __init__(self, graph): + self.rootFormula = None + self.counter = 0 + self.graph=graph + + + def newFormula(self): + assert self.graph.store.formula_aware + f = Formula(self.graph) + return f + + def newSymbol(self, *args): + uri = args[0].encode('utf-8') + return URIRef(iri(uri)) + + def newBlankNode(self, arg=None, **kargs): + if isinstance(arg, Formula): + return arg.newBlankNode() + elif arg is None: + self.counter += 1 + b = BNode('n' + str(self.counter)) + else: b = BNode(str(arg[0]).split('#').pop().replace('_', 'b')) + return b + + def newLiteral(self, s, dt, lang): + if dt: return Literal(s, datatype=dt) + else: return Literal(s, lang=lang) + + def newList(self, n, f): + if not n: + return self.newSymbol( + 'http://www.w3.org/1999/02/22-rdf-syntax-ns#nil' + ) + + a = self.newBlankNode(f) + first = self.newSymbol( + 'http://www.w3.org/1999/02/22-rdf-syntax-ns#first' + ) + rest = self.newSymbol('http://www.w3.org/1999/02/22-rdf-syntax-ns#rest') + self.makeStatement((f, first, a, n[0])) + self.makeStatement((f, rest, a, self.newList(n[1:], f))) + return a + + def newSet(self, *args): + return set(args) + + def setDefaultNamespace(self, *args): + return ':'.join(repr(n) for n in args) + + def makeStatement(self, quadruple, why=None): + f, p, s, o = quadruple + + if hasattr(p, 'formula'): + raise Exception("Formula used as predicate") + + s = self.normalise(f, s) + p = self.normalise(f, p) + o = self.normalise(f, o) + + + if f == self.rootFormula: + # print s, p, o, '.' + self.graph.add((s, p, o)) + else: + f.quotedgraph.add((s,p,o)) + + + #return str(quadruple) + + def normalise(self, f, n): + if isinstance(n, tuple): + return URIRef(unicode(n[1])) + + # if isinstance(n, list): + # rdflist, f = n + # name = self.newBlankNode() + # if f == self.rootFormula: + # sublist = name + # for i in xrange(0, len(rdflist) - 1): + # print sublist, 'first', rdflist[i] + # rest = self.newBlankNode() + # print sublist, 'rest', rest + # sublist = rest + # print sublist, 'first', rdflist[-1] + # print sublist, 'rest', 'nil' + # return name + + if isinstance(n, bool): + s = Literal(str(n).lower(), datatype=BOOLEAN_DATATYPE) + return s + + if isinstance(n, int) or isinstance(n, long): + s = Literal(unicode(n), datatype=INTEGER_DATATYPE) + return s + + if isinstance(n, Decimal): + value = str(n.normalize()) + if value == '-0': + value = '0' + s = Literal(value, datatype=DECIMAL_DATATYPE ) + return s + + if isinstance(n, float): + s = Literal(str(n), datatype=DOUBLE_DATATYPE ) + return s + + if f.existentials.has_key(n): + return f.existentials[n] + + # if isinstance(n, Var): + # if f.universals.has_key(n): + # return f.universals[n] + # f.universals[n] = f.newBlankNode() + # return f.universals[n] + + return n + + def intern(self, something): + return something + + def bind(self, pfx, uri): + pass # print pfx, ':', uri + + def startDoc(self, formula): + self.rootFormula = formula + + def endDoc(self, formula): + pass + + +################################################### +# +# Utilities +# + +Escapes = {'a': '\a', + 'b': '\b', + 'f': '\f', + 'r': '\r', + 't': '\t', + 'v': '\v', + 'n': '\n', + '\\': '\\', + '"': '"'} + +forbidden1 = re.compile(ur'[\\\"\a\b\f\r\v\u0080-\U0000ffff]') +forbidden2 = re.compile(ur'[\\\"\a\b\f\r\v\t\n\u0080-\U0000ffff]') +#" +def stringToN3(str, singleLine=0, flags=""): + res = '' + if (len(str) > 20 and + str[-1] <> '"' and + not singleLine and + (str.find("\n") >=0 + or str.find('"') >=0)): + delim= '"""' + forbidden = forbidden1 # (allow tabs too now) + else: + delim = '"' + forbidden = forbidden2 + + i = 0 + + while i < len(str): + m = forbidden.search(str, i) + if not m: + break + + j = m.start() + res = res + str[i:j] + ch = m.group(0) + if ch == '"' and delim == '"""' and str[j:j+3] != '"""': #" + res = res + ch + else: + k = '\a\b\f\r\t\v\n\\"'.find(ch) + if k >= 0: res = res + "\\" + 'abfrtvn\\"'[k] + else: + if 'e' in flags: +# res = res + ('\\u%04x' % ord(ch)) + res = res + ('\\u%04X' % ord(ch)) + # http://www.w3.org/TR/rdf-testcases/#ntriples + else: + res = res + ch + i = j + 1 + + # The following code fixes things for really high range Unicode + newstr = "" + for ch in res + str[i:]: + if ord(ch)>65535: + newstr = newstr + ('\\U%08X' % ord(ch)) + # http://www.w3.org/TR/rdf-testcases/#ntriples + else: + newstr = newstr + ch + # + + return delim + newstr + delim + +def backslashUify(ustr): + """Use URL encoding to return an ASCII string corresponding + to the given unicode""" +# progress("String is "+`ustr`) +# s1=ustr.encode('utf-8') + s = "" + for ch in ustr: # .encode('utf-8'): + if ord(ch) > 65535: + ch = "\\U%08X" % ord(ch) + elif ord(ch) > 126: + ch = "\\u%04X" % ord(ch) + else: + ch = "%c" % ord(ch) + s = s + ch + return b(s) + +@py3compat.format_doctest_out +def hexify(ustr): + """Use URL encoding to return an ASCII string + corresponding to the given UTF8 string + + >>> hexify("http://example/a b") + %(b)s'http://example/a%%20b' + + """ #" +# progress("String is "+`ustr`) +# s1=ustr.encode('utf-8') + s = "" + for ch in ustr: # .encode('utf-8'): + if ord(ch) > 126 or ord(ch) < 33 : + ch = "%%%02X" % ord(ch) + else: + ch = "%c" % ord(ch) + s = s + ch + return b(s) + +def dummy(): + res = "" + if len(str) > 20 and (str.find("\n") >=0 + or str.find('"') >=0): + delim= '"""' + forbidden = "\\\"\a\b\f\r\v" # (allow tabs too now) + else: + delim = '"' + forbidden = "\\\"\a\b\f\r\v\t\n" + for i in range(len(str)): + ch = str[i] + j = forbidden.find(ch) + if ch == '"' and delim == '"""' \ + and i+1 < len(str) and str[i+1] != '"': + j=-1 # Single quotes don't need escaping in long format + if j>=0: ch = "\\" + '\\"abfrvtn'[j] + elif ch not in "\n\t" and (ch < " " or ch > "}"): + ch = "[[" + `ch` + "]]" #[2:-1] # Use python + res = res + ch + return delim + res + delim + + +class N3Parser(Parser): + + def __init__(self): + pass + + def parse(self, source, graph, encoding="utf-8"): + # we're currently being handed a Graph, not a ConjunctiveGraph + assert graph.store.context_aware # is this implied by formula_aware + assert graph.store.formula_aware + + if encoding not in [None, "utf-8"]: + raise Exception("N3 files are always utf-8 encoded, I was passed: %s"%encoding) + + conj_graph = ConjunctiveGraph(store=graph.store) + conj_graph.default_context = graph # TODO: CG __init__ should have a default_context arg + # TODO: update N3Processor so that it can use conj_graph as the sink + conj_graph.namespace_manager = graph.namespace_manager + sink = RDFSink(conj_graph) + + baseURI = graph.absolutize(source.getPublicId() or source.getSystemId() or "") + p = SinkParser(sink, baseURI=baseURI) + + p.loadStream(source.getByteStream()) + + for prefix, namespace in p._bindings.items(): + conj_graph.bind(prefix, namespace) + + + + +def _test(): + import doctest + doctest.testmod() + + +# if __name__ == '__main__': +# _test() + +def main(): + g=ConjunctiveGraph() + + sink = RDFSink(g) + base = 'file://' + os.path.join(os.getcwd(), sys.argv[1]) + + p = SinkParser(sink, baseURI=base) + p._bindings[''] = p._baseURI + '#' + p.startDoc() + + f = open(sys.argv[1], 'rb') + bytes = f.read() + f.close() + + p.feed(bytes) + p.endDoc() + for t in g.quads((None,None,None)): + + print t + +if __name__ == '__main__': + main() + +#ends + diff --git a/creactistore/_templates/lib/rdflib_/plugins/parsers/notation3.py~ b/creactistore/_templates/lib/rdflib_/plugins/parsers/notation3.py~ new file mode 100644 index 0000000..ac48340 --- /dev/null +++ b/creactistore/_templates/lib/rdflib_/plugins/parsers/notation3.py~ @@ -0,0 +1,2314 @@ +#!/usr/bin/env python +u""" +notation3.py - Standalone Notation3 Parser +Derived from CWM, the Closed World Machine + +Authors of the original suite: + +* Dan Connolly <@@> +* Tim Berners-Lee <@@> +* Yosi Scharf <@@> +* Joseph M. Reagle Jr. <reagle@w3.org> +* Rich Salz <rsalz@zolera.com> + +http://www.w3.org/2000/10/swap/notation3.py + +Copyright 2000-2007, World Wide Web Consortium. +Copyright 2001, MIT. +Copyright 2001, Zolera Systems Inc. + +License: W3C Software License +http://www.w3.org/Consortium/Legal/copyright-software + +Modified by Sean B. Palmer +Copyright 2007, Sean B. Palmer. \u32E1 + +Modified to work with rdflib by Gunnar Aastrand Grimnes +Copyright 2010, Gunnar A. Grimnes + +""" + +# Python standard libraries +import types +import sys +import os +import string +import re +import time +import StringIO +import codecs + +from binascii import a2b_hex +from decimal import Decimal + +from rdflib.term import URIRef, BNode, Literal, Variable, _XSD_PFX, _unique_id +from rdflib.graph import QuotedGraph, ConjunctiveGraph +from rdflib import py3compat +b = py3compat.b + +__all__ = ['URISyntaxError', 'BadSyntax', 'N3Parser', "verbosity", "setVerbosity", "progress", "splitFrag", "splitFragP", "join", "refTo", "base", "canonical", "runNamespace", "uniqueURI", "Canonicalize", "stripCR", "dummyWrite", "toBool", "stringToN3", "backslashUify", "hexify", "dummy"] + +from rdflib.parser import Parser + +# Incestuous.. would be nice to separate N3 and XML +# from sax2rdf import XMLtoDOM +def XMLtoDOM(*args, **kargs): + # print >> sys.stderr, args, kargs + pass + +# SWAP http://www.w3.org/2000/10/swap +# from diag import verbosity, setVerbosity, progress +def verbosity(*args, **kargs): + # print >> sys.stderr, args, kargs + pass +def setVerbosity(*args, **kargs): + # print >> sys.stderr, args, kargs + pass +def progress(*args, **kargs): + # print >> sys.stderr, args, kargs + pass + + + +def splitFrag(uriref): + """split a URI reference between the fragment and the rest. + + Punctuation is thrown away. + + e.g. + + >>> splitFrag("abc#def") + ('abc', 'def') + + >>> splitFrag("abcdef") + ('abcdef', None) + + """ + + i = uriref.rfind("#") + if i>= 0: return uriref[:i], uriref[i+1:] + else: return uriref, None + +def splitFragP(uriref, punct=0): + """split a URI reference before the fragment + + Punctuation is kept. + + e.g. + + >>> splitFragP("abc#def") + ('abc', '#def') + + >>> splitFragP("abcdef") + ('abcdef', '') + + """ + + i = uriref.rfind("#") + if i>= 0: return uriref[:i], uriref[i:] + else: return uriref, '' + +@py3compat.format_doctest_out +def join(here, there): + """join an absolute URI and URI reference + (non-ascii characters are supported/doctested; + haven't checked the details of the IRI spec though) + + here is assumed to be absolute. + there is URI reference. + + >>> join('http://example/x/y/z', '../abc') + 'http://example/x/abc' + + Raise ValueError if there uses relative path + syntax but here has no hierarchical path. + + >>> join('mid:foo@example', '../foo') + Traceback (most recent call last): + raise ValueError, here + ValueError: Base <mid:foo@example> has no slash after colon - with relative '../foo'. + + >>> join('http://example/x/y/z', '') + 'http://example/x/y/z' + + >>> join('mid:foo@example', '#foo') + 'mid:foo@example#foo' + + We grok IRIs + + >>> len(u'Andr\\xe9') + 5 + + >>> join('http://example.org/', u'#Andr\\xe9') + %(u)s'http://example.org/#Andr\\xe9' + """ + + assert(here.find("#") < 0), "Base may not contain hash: '%s'"% here # caller must splitFrag (why?) + + slashl = there.find('/') + colonl = there.find(':') + + # join(base, 'foo:/') -- absolute + if colonl >= 0 and (slashl < 0 or colonl < slashl): + return there + + bcolonl = here.find(':') + assert(bcolonl >= 0), "Base uri '%s' is not absolute" % here # else it's not absolute + + path, frag = splitFragP(there) + if not path: return here + frag + + # join('mid:foo@example', '../foo') bzzt + if here[bcolonl+1:bcolonl+2] <> '/': + raise ValueError ("Base <%s> has no slash after colon - with relative '%s'." %(here, there)) + + if here[bcolonl+1:bcolonl+3] == '//': + bpath = here.find('/', bcolonl+3) + else: + bpath = bcolonl+1 + + # join('http://xyz', 'foo') + if bpath < 0: + bpath = len(here) + here = here + '/' + + # join('http://xyz/', '//abc') => 'http://abc' + if there[:2] == '//': + return here[:bcolonl+1] + there + + # join('http://xyz/', '/abc') => 'http://xyz/abc' + if there[:1] == '/': + return here[:bpath] + there + + slashr = here.rfind('/') + + while 1: + if path[:2] == './': + path = path[2:] + if path == '.': + path = '' + elif path[:3] == '../' or path == '..': + path = path[3:] + i = here.rfind('/', bpath, slashr) + if i >= 0: + here = here[:i+1] + slashr = i + else: + break + + return here[:slashr+1] + path + frag + +commonHost = re.compile(r'^[-_a-zA-Z0-9.]+:(//[^/]*)?/[^/]*$') + +def refTo(base, uri): + """figure out a relative URI reference from base to uri + + >>> refTo('http://example/x/y/z', 'http://example/x/abc') + '../abc' + + >>> refTo('file:/ex/x/y', 'file:/ex/x/q/r#s') + 'q/r#s' + + >>> refTo(None, 'http://ex/x/y') + 'http://ex/x/y' + + >>> refTo('http://ex/x/y', 'http://ex/x/y') + '' + + Note the relationship between refTo and join: + join(x, refTo(x, y)) == y + which points out certain strings which cannot be URIs. e.g. + >>> x='http://ex/x/y';y='http://ex/x/q:r';join(x, refTo(x, y)) == y + 0 + + So 'http://ex/x/q:r' is not a URI. Use 'http://ex/x/q%3ar' instead: + >>> x='http://ex/x/y';y='http://ex/x/q%3ar';join(x, refTo(x, y)) == y + 1 + + This one checks that it uses a root-realtive one where that is + all they share. Now uses root-relative where no path is shared. + This is a matter of taste but tends to give more resilience IMHO + -- and shorter paths + + Note that base may be None, meaning no base. In some situations, there + just ain't a base. Slife. In these cases, relTo returns the absolute value. + The axiom abs(,rel(b,x))=x still holds. + This saves people having to set the base to "bogus:". + + >>> refTo('http://ex/x/y/z', 'http://ex/r') + '/r' + + """ + +# assert base # don't mask bugs -danc # not a bug. -tim + if not base: return uri + if base == uri: return "" + + # Find how many path segments in common + i=0 + while i<len(uri) and i<len(base): + if uri[i] == base[i]: i = i + 1 + else: break + # print "# relative", base, uri, " same up to ", i + # i point to end of shortest one or first difference + + m = commonHost.match(base[:i]) + if m: + k=uri.find("//") + if k<0: k=-2 # no host + l=uri.find("/", k+2) + if uri[l+1:l+2] != "/" and base[l+1:l+2] != "/" and uri[:l]==base[:l]: + return uri[l:] + + if uri[i:i+1] =="#" and len(base) == i: return uri[i:] # fragment of base + + while i>0 and uri[i-1] != '/' : i=i-1 # scan for slash + + if i < 3: return uri # No way. + if base.find("//", i-2)>0 \ + or uri.find("//", i-2)>0: return uri # An unshared "//" + if base.find(":", i)>0: return uri # An unshared ":" + n = base.count("/", i) + if n == 0 and i<len(uri) and uri[i] == '#': + return "./" + uri[i:] + elif n == 0 and i == len(uri): + return "./" + else: + return ("../" * n) + uri[i:] + + +def base(): + """The base URI for this process - the Web equiv of cwd + + Relative or abolute unix-standard filenames parsed relative to + this yeild the URI of the file. + If we had a reliable way of getting a computer name, + we should put it in the hostname just to prevent ambiguity + + """ +# return "file://" + hostname + os.getcwd() + "/" + return "file://" + _fixslash(os.getcwd()) + "/" + + +def _fixslash(str): + """ Fix windowslike filename to unixlike - (#ifdef WINDOWS)""" + s = str + for i in range(len(s)): + if s[i] == "\\": s = s[:i] + "/" + s[i+1:] + if s[0] != "/" and s[1] == ":": s = s[2:] # @@@ Hack when drive letter present + return s + +URI_unreserved = b("ABCDEFGHIJJLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-._~") + # unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~" + +@py3compat.format_doctest_out +def canonical(str_in): + """Convert equivalent URIs (or parts) to the same string + + There are many differenet levels of URI canonicalization + which are possible. See http://www.ietf.org/rfc/rfc3986.txt + Done: + - Converfting unicode IRI to utf-8 + - Escaping all non-ASCII + - De-escaping, if escaped, ALPHA (%%41-%%5A and %%61-%%7A), DIGIT (%%30-%%39), + hyphen (%%2D), period (%%2E), underscore (%%5F), or tilde (%%7E) (Sect 2.4) + - Making all escapes uppercase hexadecimal + + Not done: + - Making URI scheme lowercase + - changing /./ or /foo/../ to / with care not to change host part + + + >>> canonical("foo bar") + %(b)s'foo%%20bar' + + >>> canonical(u'http:') + %(b)s'http:' + + >>> canonical('fran%%c3%%83%%c2%%a7ois') + %(b)s'fran%%C3%%83%%C2%%A7ois' + + >>> canonical('a') + %(b)s'a' + + >>> canonical('%%4e') + %(b)s'N' + + >>> canonical('%%9d') + %(b)s'%%9D' + + >>> canonical('%%2f') + %(b)s'%%2F' + + >>> canonical('%%2F') + %(b)s'%%2F' + + """ + if type(str_in) == type(u''): + s8 = str_in.encode('utf-8') + else: + s8 = str_in + s = b('') + i = 0 + while i < len(s8): + if py3compat.PY3: + n = s8[i]; ch = bytes([n]) + else: + ch = s8[i]; n = ord(ch) + if (n > 126) or (n < 33) : # %-encode controls, SP, DEL, and utf-8 + s += b("%%%02X" % ord(ch)) + elif ch == b('%') and i+2 < len(s8): + ch2 = a2b_hex(s8[i+1:i+3]) + if ch2 in URI_unreserved: s += ch2 + else: s += b("%%%02X" % ord(ch2)) + i = i+3 + continue + else: + s += ch + i = i +1 + return s + + + + + + +CONTEXT = 0 +PRED = 1 +SUBJ = 2 +OBJ = 3 + +PARTS = PRED, SUBJ, OBJ +ALL4 = CONTEXT, PRED, SUBJ, OBJ + +SYMBOL = 0 +FORMULA = 1 +LITERAL = 2 +LITERAL_DT = 21 +LITERAL_LANG = 22 +ANONYMOUS = 3 +XMLLITERAL = 25 + +Logic_NS = "http://www.w3.org/2000/10/swap/log#" +NODE_MERGE_URI = Logic_NS + "is" # Pseudo-property indicating node merging +forSomeSym = Logic_NS + "forSome" +forAllSym = Logic_NS + "forAll" + +RDF_type_URI = "http://www.w3.org/1999/02/22-rdf-syntax-ns#type" +RDF_NS_URI = "http://www.w3.org/1999/02/22-rdf-syntax-ns#" +OWL_NS = "http://www.w3.org/2002/07/owl#" +DAML_sameAs_URI = OWL_NS+"sameAs" +parsesTo_URI = Logic_NS + "parsesTo" +RDF_spec = "http://www.w3.org/TR/REC-rdf-syntax/" + +List_NS = RDF_NS_URI # From 20030808 +_Old_Logic_NS = "http://www.w3.org/2000/10/swap/log.n3#" + +N3_first = (SYMBOL, List_NS + "first") +N3_rest = (SYMBOL, List_NS + "rest") +N3_li = (SYMBOL, List_NS + "li") +N3_nil = (SYMBOL, List_NS + "nil") +N3_List = (SYMBOL, List_NS + "List") +N3_Empty = (SYMBOL, List_NS + "Empty") + + + +runNamespaceValue = None + +def runNamespace(): + "Return a URI suitable as a namespace for run-local objects" + # @@@ include hostname (privacy?) (hash it?) + global runNamespaceValue + if runNamespaceValue == None: + runNamespaceValue = join(base(), _unique_id()) + '#' + return runNamespaceValue + +nextu = 0 +def uniqueURI(): + "A unique URI" + global nextu + nextu += 1 + return runNamespace() + "u_" + `nextu` + +class URISyntaxError(ValueError): + """A parameter is passed to a routine that requires a URI reference""" + pass + + +tracking = False +chatty_flag = 50 + + +from xml.dom import Node +try: + from xml.ns import XMLNS +except: + class XMLNS: + BASE = "http://www.w3.org/2000/xmlns/" + XML = "http://www.w3.org/XML/1998/namespace" + + +_attrs = lambda E: (E.attributes and E.attributes.values()) or [] +_children = lambda E: E.childNodes or [] +_IN_XML_NS = lambda n: n.namespaceURI == XMLNS.XML +_inclusive = lambda n: n.unsuppressedPrefixes == None + +# Does a document/PI has lesser/greater document order than the +# first element? +_LesserElement, _Element, _GreaterElement = range(3) + +def _sorter(n1,n2): + '''_sorter(n1,n2) -> int + Sorting predicate for non-NS attributes.''' + + i = cmp(n1.namespaceURI, n2.namespaceURI) + if i: return i + return cmp(n1.localName, n2.localName) + + +def _sorter_ns(n1,n2): + '''_sorter_ns((n,v),(n,v)) -> int + "(an empty namespace URI is lexicographically least)."''' + + if n1[0] == 'xmlns': return -1 + if n2[0] == 'xmlns': return 1 + return cmp(n1[0], n2[0]) + +def _utilized(n, node, other_attrs, unsuppressedPrefixes): + '''_utilized(n, node, other_attrs, unsuppressedPrefixes) -> boolean + Return true if that nodespace is utilized within the node''' + + if n.startswith('xmlns:'): + n = n[6:] + elif n.startswith('xmlns'): + n = n[5:] + if (n=="" and node.prefix in ["#default", None]) or \ + n == node.prefix or n in unsuppressedPrefixes: + return 1 + for attr in other_attrs: + if n == attr.prefix: return 1 + return 0 + +#_in_subset = lambda subset, node: not subset or node in subset +_in_subset = lambda subset, node: subset is None or node in subset # rich's tweak + +class _implementation: + '''Implementation class for C14N. This accompanies a node during it's + processing and includes the parameters and processing state.''' + + # Handler for each node type; populated during module instantiation. + handlers = {} + + def __init__(self, node, write, **kw): + '''Create and run the implementation.''' + self.write = write + self.subset = kw.get('subset') + self.comments = kw.get('comments', 0) + self.unsuppressedPrefixes = kw.get('unsuppressedPrefixes') + nsdict = kw.get('nsdict', { 'xml': XMLNS.XML, 'xmlns': XMLNS.BASE }) + + # Processing state. + self.state = (nsdict, {'xml':''}, {}) #0422 + + if node.nodeType == Node.DOCUMENT_NODE: + self._do_document(node) + elif node.nodeType == Node.ELEMENT_NODE: + self.documentOrder = _Element # At document element + if not _inclusive(self): + self._do_element(node) + else: + inherited = self._inherit_context(node) + self._do_element(node, inherited) + elif node.nodeType == Node.DOCUMENT_TYPE_NODE: + pass + elif node.nodeType == Node.TEXT_NODE: + self._do_text(node) + else: + raise TypeError, str(node) + + + def _inherit_context(self, node): + '''_inherit_context(self, node) -> list + Scan ancestors of attribute and namespace context. Used only + for single element node canonicalization, not for subset + canonicalization.''' + + # Collect the initial list of xml:foo attributes. + xmlattrs = filter(_IN_XML_NS, _attrs(node)) + + # Walk up and get all xml:XXX attributes we inherit. + inherited, parent = [], node.parentNode + while parent and parent.nodeType == Node.ELEMENT_NODE: + for a in filter(_IN_XML_NS, _attrs(parent)): + n = a.localName + if n not in xmlattrs: + xmlattrs.append(n) + inherited.append(a) + parent = parent.parentNode + return inherited + + + def _do_document(self, node): + '''_do_document(self, node) -> None + Process a document node. documentOrder holds whether the document + element has been encountered such that PIs/comments can be written + as specified.''' + + self.documentOrder = _LesserElement + for child in node.childNodes: + if child.nodeType == Node.ELEMENT_NODE: + self.documentOrder = _Element # At document element + self._do_element(child) + self.documentOrder = _GreaterElement # After document element + elif child.nodeType == Node.PROCESSING_INSTRUCTION_NODE: + self._do_pi(child) + elif child.nodeType == Node.COMMENT_NODE: + self._do_comment(child) + elif child.nodeType == Node.DOCUMENT_TYPE_NODE: + pass + else: + raise TypeError, str(child) + handlers[Node.DOCUMENT_NODE] = _do_document + + + def _do_text(self, node): + '''_do_text(self, node) -> None + Process a text or CDATA node. Render various special characters + as their C14N entity representations.''' + if not _in_subset(self.subset, node): return + s = node.data.replace("&", "&") + s = s.replace("<", "<") + s = s.replace(">", ">") + s = s.replace("\015", "
") + if s: self.write(s) + handlers[Node.TEXT_NODE] = _do_text + handlers[Node.CDATA_SECTION_NODE] = _do_text + + + def _do_pi(self, node): + '''_do_pi(self, node) -> None + Process a PI node. Render a leading or trailing #xA if the + document order of the PI is greater or lesser (respectively) + than the document element. + ''' + if not _in_subset(self.subset, node): return + W = self.write + if self.documentOrder == _GreaterElement: W('\n') + W('<?') + W(node.nodeName) + s = node.data + if s: + W(' ') + W(s) + W('?>') + if self.documentOrder == _LesserElement: W('\n') + handlers[Node.PROCESSING_INSTRUCTION_NODE] = _do_pi + + + def _do_comment(self, node): + '''_do_comment(self, node) -> None + Process a comment node. Render a leading or trailing #xA if the + document order of the comment is greater or lesser (respectively) + than the document element. + ''' + if not _in_subset(self.subset, node): return + if self.comments: + W = self.write + if self.documentOrder == _GreaterElement: W('\n') + W('<!--') + W(node.data) + W('-->') + if self.documentOrder == _LesserElement: W('\n') + handlers[Node.COMMENT_NODE] = _do_comment + + + def _do_attr(self, n, value): + ''''_do_attr(self, node) -> None + Process an attribute.''' + + W = self.write + W(' ') + W(n) + W('="') + s = value.replace(value, "&", "&") + s = s.replace("<", "<") + s = s.replace('"', '"') + s = s.replace('\011', '	') + s = s.replace('\012', '
') + s = s.replace('\015', '
') + W(s) + W('"') + + + def _do_element(self, node, initial_other_attrs = []): + '''_do_element(self, node, initial_other_attrs = []) -> None + Process an element (and its children).''' + + # Get state (from the stack) make local copies. + # ns_parent -- NS declarations in parent + # ns_rendered -- NS nodes rendered by ancestors + # ns_local -- NS declarations relevant to this element + # xml_attrs -- Attributes in XML namespace from parent + # xml_attrs_local -- Local attributes in XML namespace. + ns_parent, ns_rendered, xml_attrs = \ + self.state[0], self.state[1].copy(), self.state[2].copy() #0422 + ns_local = ns_parent.copy() + xml_attrs_local = {} + + # progress("_do_element node.nodeName=", node.nodeName) + # progress("_do_element node.namespaceURI", node.namespaceURI) + # progress("_do_element node.tocml()", node.toxml()) + # Divide attributes into NS, XML, and others. + other_attrs = initial_other_attrs[:] + in_subset = _in_subset(self.subset, node) + for a in _attrs(node): + # progress("\t_do_element a.nodeName=", a.nodeName) + if a.namespaceURI == XMLNS.BASE: + n = a.nodeName + if n == "xmlns:": n = "xmlns" # DOM bug workaround + ns_local[n] = a.nodeValue + elif a.namespaceURI == XMLNS.XML: + if _inclusive(self) or in_subset: + xml_attrs_local[a.nodeName] = a #0426 + else: + other_attrs.append(a) + #add local xml:foo attributes to ancestor's xml:foo attributes + xml_attrs.update(xml_attrs_local) + + # Render the node + W, name = self.write, None + if in_subset: + name = node.nodeName + W('<') + W(name) + + # Create list of NS attributes to render. + ns_to_render = [] + for n,v in ns_local.items(): + + # If default namespace is XMLNS.BASE or empty, + # and if an ancestor was the same + if n == "xmlns" and v in [ XMLNS.BASE, '' ] \ + and ns_rendered.get('xmlns') in [ XMLNS.BASE, '', None ]: + continue + + # "omit namespace node with local name xml, which defines + # the xml prefix, if its string value is + # http://www.w3.org/XML/1998/namespace." + if n in ["xmlns:xml", "xml"] \ + and v in [ 'http://www.w3.org/XML/1998/namespace' ]: + continue + + + # If not previously rendered + # and it's inclusive or utilized + if (n,v) not in ns_rendered.items() \ + and (_inclusive(self) or \ + _utilized(n, node, other_attrs, self.unsuppressedPrefixes)): + ns_to_render.append((n, v)) + + # Sort and render the ns, marking what was rendered. + ns_to_render.sort(_sorter_ns) + for n,v in ns_to_render: + self._do_attr(n, v) + ns_rendered[n]=v #0417 + + # If exclusive or the parent is in the subset, add the local xml attributes + # Else, add all local and ancestor xml attributes + # Sort and render the attributes. + if not _inclusive(self) or _in_subset(self.subset,node.parentNode): #0426 + other_attrs.extend(xml_attrs_local.values()) + else: + other_attrs.extend(xml_attrs.values()) + other_attrs.sort(_sorter) + for a in other_attrs: + self._do_attr(a.nodeName, a.value) + W('>') + + # Push state, recurse, pop state. + state, self.state = self.state, (ns_local, ns_rendered, xml_attrs) + for c in _children(node): + _implementation.handlers[c.nodeType](self, c) + self.state = state + + if name: W('</%s>' % name) + handlers[Node.ELEMENT_NODE] = _do_element + + +def Canonicalize(node, output=None, **kw): + '''Canonicalize(node, output=None, **kw) -> UTF-8 + + Canonicalize a DOM document/element node and all descendents. + Return the text; if output is specified then output.write will + be called to output the text and None will be returned + Keyword parameters: + nsdict -- a dictionary of prefix:uri namespace entries + assumed to exist in the surrounding context + comments -- keep comments if non-zero (default is 0) + subset -- Canonical XML subsetting resulting from XPath (default is []) + unsuppressedPrefixes -- do exclusive C14N, and this specifies the + prefixes that should be inherited. + ''' + if output: + apply(_implementation, (node, output.write), kw) + else: + s = StringIO.StringIO() + apply(_implementation, (node, s.write), kw) + return s.getvalue() + +# end of xmlC14n.py + +# from why import BecauseOfData, becauseSubexpression +def BecauseOfData(*args, **kargs): + # print args, kargs + pass +def becauseSubexpression(*args, **kargs): + # print args, kargs + pass + +N3_forSome_URI = forSomeSym +N3_forAll_URI = forAllSym + +# Magic resources we know about + + + +ADDED_HASH = "#" # Stop where we use this in case we want to remove it! +# This is the hash on namespace URIs + +RDF_type = ( SYMBOL , RDF_type_URI ) +DAML_sameAs = ( SYMBOL, DAML_sameAs_URI ) + +LOG_implies_URI = "http://www.w3.org/2000/10/swap/log#implies" + +BOOLEAN_DATATYPE = _XSD_PFX + "boolean" +DECIMAL_DATATYPE = _XSD_PFX + "decimal" +DOUBLE_DATATYPE = _XSD_PFX + "double" +FLOAT_DATATYPE = _XSD_PFX + "float" +INTEGER_DATATYPE = _XSD_PFX + "integer" + +option_noregen = 0 # If set, do not regenerate genids on output + +# @@ I18n - the notname chars need extending for well known unicode non-text +# characters. The XML spec switched to assuming unknown things were name +# characaters. +# _namechars = string.lowercase + string.uppercase + string.digits + '_-' +_notQNameChars = "\t\r\n !\"#$%&'()*.,+/;<=>?@[\\]^`{|}~" # else valid qname :-/ +_notNameChars = _notQNameChars + ":" # Assume anything else valid name :-/ +_rdfns = 'http://www.w3.org/1999/02/22-rdf-syntax-ns#' + + +N3CommentCharacter = "#" # For unix script #! compatabilty + +########################################## Parse string to sink +# +# Regular expressions: +eol = re.compile(r'[ \t]*(#[^\n]*)?\r?\n') # end of line, poss. w/comment +eof = re.compile(r'[ \t]*(#[^\n]*)?$') # end of file, poss. w/comment +ws = re.compile(r'[ \t]*') # Whitespace not including NL +signed_integer = re.compile(r'[-+]?[0-9]+') # integer +number_syntax = re.compile(r'(?P<integer>[-+]?[0-9]+)(?P<decimal>\.[0-9]+)?(?P<exponent>e[-+]?[0-9]+)?') +digitstring = re.compile(r'[0-9]+') # Unsigned integer +interesting = re.compile(r'[\\\r\n\"]') +langcode = re.compile(r'[a-zA-Z0-9]+(-[a-zA-Z0-9]+)?') +#" + + + +class SinkParser: + def __init__(self, store, openFormula=None, thisDoc="", baseURI=None, + genPrefix = "", flags="", + why=None): + """ note: namespace names should *not* end in #; + the # will get added during qname processing """ + + self._bindings = {} + self._flags = flags + if thisDoc != "": + assert ':' in thisDoc, "Document URI not absolute: <%s>" % thisDoc + self._bindings[""] = thisDoc + "#" # default + + self._store = store + if genPrefix: store.setGenPrefix(genPrefix) # pass it on + + self._thisDoc = thisDoc + self.lines = 0 # for error handling + self.startOfLine = 0 # For calculating character number + self._genPrefix = genPrefix + self.keywords = ['a', 'this', 'bind', 'has', 'is', 'of', 'true', 'false' ] + self.keywordsSet = 0 # Then only can others be considerd qnames + self._anonymousNodes = {} # Dict of anon nodes already declared ln: Term + self._variables = {} + self._parentVariables = {} + self._reason = why # Why the parser was asked to parse this + + self._reason2 = None # Why these triples + # was: diag.tracking + if tracking: self._reason2 = BecauseOfData( + store.newSymbol(thisDoc), because=self._reason) + + if baseURI: self._baseURI = baseURI + else: + if thisDoc: + self._baseURI = thisDoc + else: + self._baseURI = None + + assert not self._baseURI or ':' in self._baseURI + + if not self._genPrefix: + if self._thisDoc: self._genPrefix = self._thisDoc + "#_g" + else: self._genPrefix = uniqueURI() + + if openFormula ==None: + if self._thisDoc: + self._formula = store.newFormula(thisDoc + "#_formula") + else: + self._formula = store.newFormula() + else: + self._formula = openFormula + + + self._context = self._formula + self._parentContext = None + + + def here(self, i): + """String generated from position in file + + This is for repeatability when refering people to bnodes in a document. + This has diagnostic uses less formally, as it should point one to which + bnode the arbitrary identifier actually is. It gives the + line and character number of the '[' charcacter or path character + which introduced the blank node. The first blank node is boringly _L1C1. + It used to be used only for tracking, but for tests in general + it makes the canonical ordering of bnodes repeatable.""" + + return "%s_L%iC%i" % (self._genPrefix , self.lines, + i - self.startOfLine + 1) + + def formula(self): + return self._formula + + def loadStream(self, stream): + return self.loadBuf(stream.read()) # Not ideal + + def loadBuf(self, buf): + """Parses a buffer and returns its top level formula""" + self.startDoc() + + self.feed(buf) + return self.endDoc() # self._formula + + + def feed(self, octets): + """Feed an octet stream tothe parser + + if BadSyntax is raised, the string + passed in the exception object is the + remainder after any statements have been parsed. + So if there is more data to feed to the + parser, it should be straightforward to recover.""" + + if not isinstance(octets, unicode): + s = octets.decode('utf-8') + # NB already decoded, so \ufeff + if len(s) > 0 and s[0] == codecs.BOM_UTF8.decode('utf-8'): + s = s[1:] + else: + s=octets + + i = 0 + while i >= 0: + j = self.skipSpace(s, i) + if j<0: return + + i = self.directiveOrStatement(s,j) + if i<0: + print "# next char: ", `s[j]` + raise BadSyntax(self._thisDoc, self.lines, s, j, + "expected directive or statement") + + def directiveOrStatement(self, str,h): + + i = self.skipSpace(str, h) + if i<0: return i # EOF + + j = self.directive(str, i) + if j>=0: return self.checkDot(str,j) + + j = self.statement(str, i) + if j>=0: return self.checkDot(str,j) + + return j + + + #@@I18N + global _notNameChars + #_namechars = string.lowercase + string.uppercase + string.digits + '_-' + + def tok(self, tok, str, i): + """Check for keyword. Space must have been stripped on entry and + we must not be at end of file.""" + + assert tok[0] not in _notNameChars # not for punctuation + if str[i:i+1] == "@": + i = i+1 + else: + if tok not in self.keywords: + return -1 # No, this has neither keywords declaration nor "@" + + if (str[i:i+len(tok)] == tok + and (str[i+len(tok)] in _notQNameChars )): + i = i + len(tok) + return i + else: + return -1 + + def directive(self, str, i): + j = self.skipSpace(str, i) + if j<0: return j # eof + res = [] + + j = self.tok('bind', str, i) # implied "#". Obsolete. + if j>0: raise BadSyntax(self._thisDoc, self.lines, str, i, + "keyword bind is obsolete: use @prefix") + + j = self.tok('keywords', str, i) + if j>0: + i = self.commaSeparatedList(str, j, res, self.bareWord) + if i < 0: + raise BadSyntax(self._thisDoc, self.lines, str, i, + "'@keywords' needs comma separated list of words") + self.setKeywords(res[:]) + # was: diag.chatty_flag + if chatty_flag > 80: progress("Keywords ", self.keywords) + return i + + + j = self.tok('forAll', str, i) + if j > 0: + i = self.commaSeparatedList(str, j, res, self.uri_ref2) + if i <0: raise BadSyntax(self._thisDoc, self.lines, str, i, + "Bad variable list after @forAll") + for x in res: + #self._context.declareUniversal(x) + if x not in self._variables or x in self._parentVariables: + self._variables[x] = self._context.newUniversal(x) + return i + + j = self.tok('forSome', str, i) + if j > 0: + i = self. commaSeparatedList(str, j, res, self.uri_ref2) + if i <0: raise BadSyntax(self._thisDoc, self.lines, str, i, + "Bad variable list after @forSome") + for x in res: + self._context.declareExistential(x) + return i + + + j=self.tok('prefix', str, i) # no implied "#" + if j>=0: + t = [] + i = self.qname(str, j, t) + if i<0: raise BadSyntax(self._thisDoc, self.lines, str, j, + "expected qname after @prefix") + j = self.uri_ref2(str, i, t) + if j<0: raise BadSyntax(self._thisDoc, self.lines, str, i, + "expected <uriref> after @prefix _qname_") + ns = self.uriOf(t[1]) + + if self._baseURI: + ns = join(self._baseURI, ns) + elif ":" not in ns: + raise BadSyntax(self._thisDoc, self.lines, str, j, + "With no base URI, cannot use relative URI in @prefix <"+ns+">") + assert ':' in ns # must be absolute + self._bindings[t[0][0]] = ns + self.bind(t[0][0], hexify(ns)) + return j + + j=self.tok('base', str, i) # Added 2007/7/7 + if j >= 0: + t = [] + i = self.uri_ref2(str, j, t) + if i<0: raise BadSyntax(self._thisDoc, self.lines, str, j, + "expected <uri> after @base ") + ns = self.uriOf(t[0]) + + if self._baseURI: + ns = join(self._baseURI, ns) + else: + raise BadSyntax(self._thisDoc, self.lines, str, j, + "With no previous base URI, cannot use relative URI in @base <"+ns+">") + assert ':' in ns # must be absolute + self._baseURI = ns + return i + + return -1 # Not a directive, could be something else. + + def bind(self, qn, uri): + assert isinstance(uri, + types.StringType), "Any unicode must be %x-encoded already" + if qn == "": + self._store.setDefaultNamespace(uri) + else: + self._store.bind(qn, uri) + + def setKeywords(self, k): + "Takes a list of strings" + if k == None: + self.keywordsSet = 0 + else: + self.keywords = k + self.keywordsSet = 1 + + + def startDoc(self): + # was: self._store.startDoc() + self._store.startDoc(self._formula) + + def endDoc(self): + """Signal end of document and stop parsing. returns formula""" + self._store.endDoc(self._formula) # don't canonicalize yet + return self._formula + + def makeStatement(self, quadruple): + #$$$$$$$$$$$$$$$$$$$$$ +# print "# Parser output: ", `quadruple` + self._store.makeStatement(quadruple, why=self._reason2) + + + + def statement(self, str, i): + r = [] + + i = self.object(str, i, r) # Allow literal for subject - extends RDF + if i<0: return i + + j = self.property_list(str, i, r[0]) + + if j<0: raise BadSyntax(self._thisDoc, self.lines, + str, i, "expected propertylist") + return j + + def subject(self, str, i, res): + return self.item(str, i, res) + + def verb(self, str, i, res): + """ has _prop_ + is _prop_ of + a + = + _prop_ + >- prop -> + <- prop -< + _operator_""" + + j = self.skipSpace(str, i) + if j<0:return j # eof + + r = [] + + j = self.tok('has', str, i) + if j>=0: + i = self.prop(str, j, r) + if i < 0: raise BadSyntax(self._thisDoc, self.lines, + str, j, "expected property after 'has'") + res.append(('->', r[0])) + return i + + j = self.tok('is', str, i) + if j>=0: + i = self.prop(str, j, r) + if i < 0: raise BadSyntax(self._thisDoc, self.lines, str, j, + "expected <property> after 'is'") + j = self.skipSpace(str, i) + if j<0: + raise BadSyntax(self._thisDoc, self.lines, str, i, + "End of file found, expected property after 'is'") + return j # eof + i=j + j = self.tok('of', str, i) + if j<0: raise BadSyntax(self._thisDoc, self.lines, str, i, + "expected 'of' after 'is' <prop>") + res.append(('<-', r[0])) + return j + + j = self.tok('a', str, i) + if j>=0: + res.append(('->', RDF_type)) + return j + + + if str[i:i+2] == "<=": + res.append(('<-', self._store.newSymbol(Logic_NS+"implies"))) + return i+2 + + if str[i:i+1] == "=": + if str[i+1:i+2] == ">": + res.append(('->', self._store.newSymbol(Logic_NS+"implies"))) + return i+2 + res.append(('->', DAML_sameAs)) + return i+1 + + if str[i:i+2] == ":=": + # patch file relates two formulae, uses this @@ really? + res.append(('->', Logic_NS+"becomes")) + return i+2 + + j = self.prop(str, i, r) + if j >= 0: + res.append(('->', r[0])) + return j + + if str[i:i+2] == ">-" or str[i:i+2] == "<-": + raise BadSyntax(self._thisDoc, self.lines, str, j, + ">- ... -> syntax is obsolete.") + + return -1 + + def prop(self, str, i, res): + return self.item(str, i, res) + + def item(self, str, i, res): + return self.path(str, i, res) + + def blankNode(self, uri=None): + if "B" not in self._flags: + return self._context.newBlankNode(uri, why=self._reason2) + x = self._context.newSymbol(uri) + self._context.declareExistential(x) + return x + + def path(self, str, i, res): + """Parse the path production. + """ + j = self.nodeOrLiteral(str, i, res) + if j<0: return j # nope + + while str[j:j+1] in "!^.": # no spaces, must follow exactly (?) + ch = str[j:j+1] # @@ Allow "." followed IMMEDIATELY by a node. + if ch == ".": + ahead = str[j+1:j+2] + if not ahead or (ahead in _notNameChars + and ahead not in ":?<[{("): break + subj = res.pop() + obj = self.blankNode(uri=self.here(j)) + j = self.node(str, j+1, res) + if j<0: raise BadSyntax(self._thisDoc, self.lines, str, j, + "EOF found in middle of path syntax") + pred = res.pop() + if ch == "^": # Reverse traverse + self.makeStatement((self._context, pred, obj, subj)) + else: + self.makeStatement((self._context, pred, subj, obj)) + res.append(obj) + return j + + def anonymousNode(self, ln): + """Remember or generate a term for one of these _: anonymous nodes""" + term = self._anonymousNodes.get(ln, None) + if term != None: return term + term = self._store.newBlankNode(self._context, why=self._reason2) + self._anonymousNodes[ln] = term + return term + + def node(self, str, i, res, subjectAlready=None): + """Parse the <node> production. + Space is now skipped once at the beginning + instead of in multipe calls to self.skipSpace(). + """ + subj = subjectAlready + + j = self.skipSpace(str,i) + if j<0: return j #eof + i=j + ch = str[i:i+1] # Quick 1-character checks first: + + if ch == "[": + bnodeID = self.here(i) + j=self.skipSpace(str,i+1) + if j<0: raise BadSyntax(self._thisDoc, + self.lines, str, i, "EOF after '['") + if str[j:j+1] == "=": # Hack for "is" binding name to anon node + i = j+1 + objs = [] + j = self.objectList(str, i, objs); + if j>=0: + subj = objs[0] + if len(objs)>1: + for obj in objs: + self.makeStatement((self._context, + DAML_sameAs, subj, obj)) + j = self.skipSpace(str, j) + if j<0: raise BadSyntax(self._thisDoc, self.lines, str, i, + "EOF when objectList expected after [ = ") + if str[j:j+1] == ";": + j=j+1 + else: + raise BadSyntax(self._thisDoc, self.lines, str, i, + "objectList expected after [= ") + + if subj is None: + subj=self.blankNode(uri= bnodeID) + + i = self.property_list(str, j, subj) + if i<0: raise BadSyntax(self._thisDoc, self.lines, str, j, + "property_list expected") + + j = self.skipSpace(str, i) + if j<0: raise BadSyntax(self._thisDoc, self.lines, str, i, + "EOF when ']' expected after [ <propertyList>") + if str[j:j+1] != "]": + raise BadSyntax(self._thisDoc, + self.lines, str, j, "']' expected") + res.append(subj) + return j+1 + + if ch == "{": + ch2 = str[i+1:i+2] + if ch2 == '$': + i += 1 + j = i + 1 + List = [] + first_run = True + while 1: + i = self.skipSpace(str, j) + if i<0: raise BadSyntax(self._thisDoc, self.lines, str, i, + "needed '$}', found end.") + if str[i:i+2] == '$}': + j = i+2 + break + + if not first_run: + if str[i:i+1] == ',': + i+=1 + else: + raise BadSyntax(self._thisDoc, self.lines, + str, i, "expected: ','") + else: first_run = False + + item = [] + j = self.item(str,i, item) #@@@@@ should be path, was object + if j<0: raise BadSyntax(self._thisDoc, self.lines, str, i, + "expected item in set or '$}'") + List.append(self._store.intern(item[0])) + res.append(self._store.newSet(List, self._context)) + return j + else: + j=i+1 + oldParentContext = self._parentContext + self._parentContext = self._context + parentAnonymousNodes = self._anonymousNodes + grandParentVariables = self._parentVariables + self._parentVariables = self._variables + self._anonymousNodes = {} + self._variables = self._variables.copy() + reason2 = self._reason2 + self._reason2 = becauseSubexpression + if subj is None: subj = self._store.newFormula() + self._context = subj + + while 1: + i = self.skipSpace(str, j) + if i<0: raise BadSyntax(self._thisDoc, self.lines, + str, i, "needed '}', found end.") + + if str[i:i+1] == "}": + j = i+1 + break + + j = self.directiveOrStatement(str,i) + if j<0: raise BadSyntax(self._thisDoc, self.lines, + str, i, "expected statement or '}'") + + self._anonymousNodes = parentAnonymousNodes + self._variables = self._parentVariables + self._parentVariables = grandParentVariables + self._context = self._parentContext + self._reason2 = reason2 + self._parentContext = oldParentContext + res.append(subj.close()) # No use until closed + return j + + if ch == "(": + thing_type = self._store.newList + ch2 = str[i+1:i+2] + if ch2 == '$': + thing_type = self._store.newSet + i += 1 + j=i+1 + + List = [] + while 1: + i = self.skipSpace(str, j) + if i<0: raise BadSyntax(self._thisDoc, self.lines, + str, i, "needed ')', found end.") + if str[i:i+1] == ')': + j = i+1 + break + + item = [] + j = self.item(str,i, item) #@@@@@ should be path, was object + if j<0: raise BadSyntax(self._thisDoc, self.lines, str, i, + "expected item in list or ')'") + List.append(self._store.intern(item[0])) + res.append(thing_type(List, self._context)) + return j + + j = self.tok('this', str, i) # This context + if j>=0: + raise BadSyntax(self._thisDoc, self.lines, str, i, + "Keyword 'this' was ancient N3. Now use @forSome and @forAll keywords.") + res.append(self._context) + return j + + #booleans + j = self.tok('true', str, i) + if j>=0: + res.append(True) + return j + j = self.tok('false', str, i) + if j>=0: + res.append(False) + return j + + if subj is None: # If this can be a named node, then check for a name. + j = self.uri_ref2(str, i, res) + if j >= 0: + return j + + return -1 + + def property_list(self, str, i, subj): + """Parse property list + Leaves the terminating punctuation in the buffer + """ + while 1: + j = self.skipSpace(str, i) + if j<0: + raise BadSyntax(self._thisDoc, self.lines, str, i, + "EOF found when expected verb in property list") + return j #eof + + if str[j:j+2] ==":-": + i = j + 2 + res = [] + j = self.node(str, i, res, subj) + if j<0: raise BadSyntax(self._thisDoc, self.lines, str, i, + "bad {} or () or [] node after :- ") + i=j + continue + i=j + v = [] + j = self.verb(str, i, v) + if j<=0: + return i # void but valid + + objs = [] + i = self.objectList(str, j, objs) + if i<0: raise BadSyntax(self._thisDoc, self.lines, str, j, + "objectList expected") + for obj in objs: + dir, sym = v[0] + if dir == '->': + self.makeStatement((self._context, sym, subj, obj)) + else: + self.makeStatement((self._context, sym, obj, subj)) + + j = self.skipSpace(str, i) + if j<0: + raise BadSyntax(self._thisDoc, self.lines, str, j, + "EOF found in list of objects") + return j #eof + if str[i:i+1] != ";": + return i + i = i+1 # skip semicolon and continue + + def commaSeparatedList(self, str, j, res, what): + """return value: -1 bad syntax; >1 new position in str + res has things found appended + """ + i = self.skipSpace(str, j) + if i<0: + raise BadSyntax(self._thisDoc, self.lines, str, i, + "EOF found expecting comma sep list") + return i + if str[i] == ".": return j # empty list is OK + i = what(str, i, res) + if i<0: return -1 + + while 1: + j = self.skipSpace(str, i) + if j<0: return j # eof + ch = str[j:j+1] + if ch != ",": + if ch != ".": + return -1 + return j # Found but not swallowed "." + i = what(str, j+1, res) + if i<0: + raise BadSyntax(self._thisDoc, self.lines, str, i, + "bad list content") + return i + + def objectList(self, str, i, res): + i = self.object(str, i, res) + if i<0: return -1 + while 1: + j = self.skipSpace(str, i) + if j<0: + raise BadSyntax(self._thisDoc, self.lines, str, j, + "EOF found after object") + return j #eof + if str[j:j+1] != ",": + return j # Found something else! + i = self.object(str, j+1, res) + if i<0: return i + + def checkDot(self, str, i): + j = self.skipSpace(str, i) + if j<0: return j #eof + if str[j:j+1] == ".": + return j+1 # skip + if str[j:j+1] == "}": + return j # don't skip it + if str[j:j+1] == "]": + return j + raise BadSyntax(self._thisDoc, self.lines, + str, j, "expected '.' or '}' or ']' at end of statement") + return i + + + def uri_ref2(self, str, i, res): + """Generate uri from n3 representation. + + Note that the RDF convention of directly concatenating + NS and local name is now used though I prefer inserting a '#' + to make the namesapces look more like what XML folks expect. + """ + qn = [] + j = self.qname(str, i, qn) + if j>=0: + pfx, ln = qn[0] + if pfx is None: + assert 0, "not used?" + ns = self._baseURI + ADDED_HASH + else: + try: + ns = self._bindings[pfx] + except KeyError: + if pfx == "_": # Magic prefix 2001/05/30, can be overridden + res.append(self.anonymousNode(ln)) + return j + raise BadSyntax(self._thisDoc, self.lines, str, i, + "Prefix \"%s:\" not bound" % (pfx)) + symb = self._store.newSymbol(ns + ln) + if symb in self._variables: + res.append(self._variables[symb]) + else: + res.append(symb) # @@@ "#" CONVENTION + if not ns.find("#"):progress( + "Warning: no # on namespace %s," % ns) + return j + + + i = self.skipSpace(str, i) + if i<0: return -1 + + if str[i] == "?": + v = [] + j = self.variable(str,i,v) + if j>0: #Forget varibles as a class, only in context. + res.append(v[0]) + return j + return -1 + + elif str[i]=="<": + i = i + 1 + st = i + while i < len(str): + if str[i] == ">": + uref = str[st:i] # the join should dealt with "": + if self._baseURI: + uref = join(self._baseURI, uref) # was: uripath.join + else: + assert ":" in uref, \ + "With no base URI, cannot deal with relative URIs" + if str[i-1:i]=="#" and not uref[-1:]=="#": + uref = uref + "#" # She meant it! Weirdness in urlparse? + symb = self._store.newSymbol(uref) + if symb in self._variables: + res.append(self._variables[symb]) + else: + res.append(symb) + return i+1 + i = i + 1 + raise BadSyntax(self._thisDoc, self.lines, str, j, + "unterminated URI reference") + + elif self.keywordsSet: + v = [] + j = self.bareWord(str,i,v) + if j<0: return -1 #Forget varibles as a class, only in context. + if v[0] in self.keywords: + raise BadSyntax(self._thisDoc, self.lines, str, i, + 'Keyword "%s" not allowed here.' % v[0]) + res.append(self._store.newSymbol(self._bindings[""]+v[0])) + return j + else: + return -1 + + def skipSpace(self, str, i): + """Skip white space, newlines and comments. + return -1 if EOF, else position of first non-ws character""" + while 1: + m = eol.match(str, i) + if m == None: break + self.lines = self.lines + 1 + i = m.end() # Point to first character unmatched + self.startOfLine = i + m = ws.match(str, i) + if m != None: + i = m.end() + m = eof.match(str, i) + if m != None: return -1 + return i + + def variable(self, str, i, res): + """ ?abc -> variable(:abc) + """ + + j = self.skipSpace(str, i) + if j<0: return -1 + + if str[j:j+1] != "?": return -1 + j=j+1 + i = j + if str[j] in "0123456789-": + raise BadSyntax(self._thisDoc, self.lines, str, j, + "Varible name can't start with '%s'" % str[j]) + return -1 + while i <len(str) and str[i] not in _notNameChars: + i = i+1 + if self._parentContext == None: + varURI = self._store.newSymbol(self._baseURI + "#" +str[j:i]) + if varURI not in self._variables: + self._variables[varURI] = self._context.newUniversal(varURI + , why=self._reason2) + res.append(self._variables[varURI]) + return i + # @@ was: + # raise BadSyntax(self._thisDoc, self.lines, str, j, + # "Can't use ?xxx syntax for variable in outermost level: %s" + # % str[j-1:i]) + varURI = self._store.newSymbol(self._baseURI + "#" +str[j:i]) + if varURI not in self._parentVariables: + self._parentVariables[varURI] = self._parentContext.newUniversal(varURI + , why=self._reason2) + res.append(self._parentVariables[varURI]) + return i + + def bareWord(self, str, i, res): + """ abc -> :abc + """ + j = self.skipSpace(str, i) + if j<0: return -1 + + if str[j] in "0123456789-" or str[j] in _notNameChars: return -1 + i = j + while i <len(str) and str[i] not in _notNameChars: + i = i+1 + res.append(str[j:i]) + return i + + def qname(self, str, i, res): + """ + xyz:def -> ('xyz', 'def') + If not in keywords and keywordsSet: def -> ('', 'def') + :def -> ('', 'def') + """ + + i = self.skipSpace(str, i) + if i<0: return -1 + + c = str[i] + if c in "0123456789-+": return -1 + if c not in _notNameChars: + ln = c + i = i + 1 + while i < len(str): + c = str[i] + if c not in _notNameChars: + ln = ln + c + i = i + 1 + else: break + else: # First character is non-alpha + ln = '' # Was: None - TBL (why? useful?) + + if i<len(str) and str[i] == ':': + pfx = ln + i = i + 1 + ln = '' + while i < len(str): + c = str[i] + if c not in _notNameChars: + ln = ln + c + i = i + 1 + else: break + + res.append((pfx, ln)) + return i + + else: # delimiter was not ":" + if ln and self.keywordsSet and ln not in self.keywords: + res.append(('', ln)) + return i + return -1 + + def object(self, str, i, res): + j = self.subject(str, i, res) + if j>= 0: + return j + else: + j = self.skipSpace(str, i) + if j<0: return -1 + else: i=j + + if str[i]=='"': + if str[i:i+3] == '"""': delim = '"""' + else: delim = '"' + i = i + len(delim) + + j, s = self.strconst(str, i, delim) + + res.append(self._store.newLiteral(s)) + progress("New string const ", s, j) + return j + else: + return -1 + + def nodeOrLiteral(self, str, i, res): + j = self.node(str, i, res) + startline = self.lines # Remember where for error messages + if j>= 0: + return j + else: + j = self.skipSpace(str, i) + if j<0: return -1 + else: i=j + + ch = str[i] + if ch in "-+0987654321": + m = number_syntax.match(str, i) + if m == None: + raise BadSyntax(self._thisDoc, self.lines, str, i, + "Bad number syntax") + j = m.end() + if m.group('exponent') != None: # includes decimal exponent + res.append(float(str[i:j])) +# res.append(self._store.newLiteral(str[i:j], +# self._store.newSymbol(FLOAT_DATATYPE))) + elif m.group('decimal') != None: + res.append(Decimal(str[i:j])) + else: + res.append(long(str[i:j])) +# res.append(self._store.newLiteral(str[i:j], +# self._store.newSymbol(INTEGER_DATATYPE))) + return j + + if str[i]=='"': + if str[i:i+3] == '"""': delim = '"""' + else: delim = '"' + i = i + len(delim) + + dt = None + j, s = self.strconst(str, i, delim) + lang = None + if str[j:j+1] == "@": # Language? + m = langcode.match(str, j+1) + if m == None: + raise BadSyntax(self._thisDoc, startline, str, i, + "Bad language code syntax on string literal, after @") + i = m.end() + lang = str[j+1:i] + j = i + if str[j:j+2] == "^^": + res2 = [] + j = self.uri_ref2(str, j+2, res2) # Read datatype URI + dt = res2[0] +# if dt.uriref() == "http://www.w3.org/1999/02/22-rdf-syntax-ns#XMLLiteral": + if dt == "http://www.w3.org/1999/02/22-rdf-syntax-ns#XMLLiteral": + try: + dom = XMLtoDOM('<rdf:envelope xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns">' + + s + + '</rdf:envelope>').firstChild + except: + raise ValueError('s="%s"' % s) + res.append(self._store.newXMLLiteral(dom)) + return j + res.append(self._store.newLiteral(s, dt, lang)) + return j + else: + return -1 + + def uriOf(self, sym): + if isinstance(sym, types.TupleType): + return sym[1] # old system for --pipe + # return sym.uriref() # cwm api + return sym + + + def strconst(self, str, i, delim): + """parse an N3 string constant delimited by delim. + return index, val + """ + + + j = i + ustr = u"" # Empty unicode string + startline = self.lines # Remember where for error messages + while j<len(str): + if str[j] == '"': + if delim == '"': # done when delim is " + i = j + 1 + return i, ustr + if delim == '"""': # done when delim is """ and ... + if str[j:j+5] == '"""""': # ... we have "" before + i = j + 5 + ustr = ustr + '""' + return i, ustr + if str[j:j+4] == '""""': # ... we have " before + i = j + 4 + ustr = ustr + '"' + return i, ustr + if str[j:j+3] == '"""': # ... current " is part of delim + i = j + 3 + return i, ustr + + # we are inside of the string and current char is " + j = j + 1 + ustr = ustr + '"' + continue + + m = interesting.search(str, j) # was str[j:]. + # Note for pos param to work, MUST be compiled ... re bug? + assert m , "Quote expected in string at ^ in %s^%s" %( + str[j-20:j], str[j:j+20]) # we at least have to find a quote + + i = m.start() + try: + ustr = ustr + str[j:i] + except UnicodeError: + err = "" + for c in str[j:i]: + err = err + (" %02x" % ord(c)) + streason = sys.exc_info()[1].__str__() + raise BadSyntax(self._thisDoc, startline, str, j, + "Unicode error appending characters %s to string, because\n\t%s" + % (err, streason)) + +# print "@@@ i = ",i, " j=",j, "m.end=", m.end() + + ch = str[i] + if ch == '"': + j = i + continue + elif ch == "\r": # Strip carriage returns + j = i+1 + continue + elif ch == "\n": + if delim == '"': + raise BadSyntax(self._thisDoc, startline, str, i, + "newline found in string literal") + self.lines = self.lines + 1 + ustr = ustr + ch + j = i + 1 + self.startOfLine = j + + elif ch == "\\": + j = i + 1 + ch = str[j:j+1] # Will be empty if string ends + if not ch: + raise BadSyntax(self._thisDoc, startline, str, i, + "unterminated string literal (2)") + k = 'abfrtvn\\"'.find(ch) + if k >= 0: + uch = '\a\b\f\r\t\v\n\\"'[k] + ustr = ustr + uch + j = j + 1 + elif ch == "u": + j, ch = self.uEscape(str, j+1, startline) + ustr = ustr + ch + elif ch == "U": + j, ch = self.UEscape(str, j+1, startline) + ustr = ustr + ch + else: + raise BadSyntax(self._thisDoc, self.lines, str, i, + "bad escape") + + raise BadSyntax(self._thisDoc, self.lines, str, i, + "unterminated string literal") + + + def uEscape(self, str, i, startline): + j = i + count = 0 + value = 0 + while count < 4: # Get 4 more characters + ch = str[j:j+1].lower() + # sbp http://ilrt.org/discovery/chatlogs/rdfig/2002-07-05 + j = j + 1 + if ch == "": + raise BadSyntax(self._thisDoc, startline, str, i, + "unterminated string literal(3)") + k = "0123456789abcdef".find(ch) + if k < 0: + raise BadSyntax(self._thisDoc, startline, str, i, + "bad string literal hex escape") + value = value * 16 + k + count = count + 1 + uch = unichr(value) + return j, uch + + def UEscape(self, str, i, startline): + stringType = type('') + j = i + count = 0 + value = '\\U' + while count < 8: # Get 8 more characters + ch = str[j:j+1].lower() + # sbp http://ilrt.org/discovery/chatlogs/rdfig/2002-07-05 + j = j + 1 + if ch == "": + raise BadSyntax(self._thisDoc, startline, str, i, + "unterminated string literal(3)") + k = "0123456789abcdef".find(ch) + if k < 0: + raise BadSyntax(self._thisDoc, startline, str, i, + "bad string literal hex escape") + value = value + ch + count = count + 1 + + uch = stringType(value).decode('unicode-escape') + return j, uch + +wide_build = True +try: + unichr(0x10000) +except ValueError: + wide_build = False + +# If we are going to do operators then they should generate +# [ is operator:plus of ( \1 \2 ) ] + + +class BadSyntax(SyntaxError): + def __init__(self, uri, lines, str, i, why): + self._str = str.encode('utf-8') # Better go back to strings for errors + self._i = i + self._why = why + self.lines = lines + self._uri = uri + + def __str__(self): + str = self._str + i = self._i + st = 0 + if i>60: + pre="..." + st = i - 60 + else: pre="" + if len(str)-i > 60: post="..." + else: post="" + + return 'at line %i of <%s>:\nBad syntax (%s) at ^ in:\n"%s%s^%s%s"' \ + % (self.lines +1, self._uri, self._why, pre, + str[st:i], str[i:i+60], post) + + + +def stripCR(str): + res = "" + for ch in str: + if ch != "\r": + res = res + ch + return res + +def dummyWrite(x): + pass + +################################################################################ + + +def toBool(s): + if s == 'true' or s == 'True' or s == '1': + return True + if s == 'false' or s == 'False' or s == '0': + return False + raise ValueError(s) + + + + + +class Formula(object): + number = 0 + + def __init__(self, parent): + self.counter = 0 + Formula.number += 1 + self.number = Formula.number + self.existentials = {} + self.universals = {} + + self.quotedgraph=QuotedGraph(store=parent.store, identifier=self.id()) + + def __str__(self): + return '_:Formula%s' % self.number + + def id(self): + return BNode('_:Formula%s' % self.number) + + def newBlankNode(self, uri=None, why=None): + if uri is None: + self.counter += 1 + b = BNode('f%sb%s' % (id(self), self.counter)) + else: b = BNode(uri.split('#').pop().replace('_', 'b')) + return b + + def newUniversal(self, uri, why=None): + return Variable(uri.split('#').pop()) + + def declareExistential(self, x): + self.existentials[x] = self.newBlankNode() + + def close(self): + + return self.quotedgraph + +r_hibyte = re.compile(r'([\x80-\xff])') +def iri(uri): + return uri.decode('utf-8') + # return unicode(r_hibyte.sub(lambda m: '%%%02X' % ord(m.group(1)), uri)) + +class RDFSink(object): + def __init__(self, graph): + self.rootFormula = None + self.counter = 0 + self.graph=graph + + + def newFormula(self): + assert self.graph.store.formula_aware + f = Formula(self.graph) + return f + + def newSymbol(self, *args): + uri = args[0].encode('utf-8') + return URIRef(iri(uri)) + + def newBlankNode(self, arg=None, **kargs): + if isinstance(arg, Formula): + return arg.newBlankNode() + elif arg is None: + self.counter += 1 + b = BNode('n' + str(self.counter)) + else: b = BNode(str(arg[0]).split('#').pop().replace('_', 'b')) + return b + + def newLiteral(self, s, dt, lang): + if dt: return Literal(s, datatype=dt) + else: return Literal(s, lang=lang) + + def newList(self, n, f): + if not n: + return self.newSymbol( + 'http://www.w3.org/1999/02/22-rdf-syntax-ns#nil' + ) + + a = self.newBlankNode(f) + first = self.newSymbol( + 'http://www.w3.org/1999/02/22-rdf-syntax-ns#first' + ) + rest = self.newSymbol('http://www.w3.org/1999/02/22-rdf-syntax-ns#rest') + self.makeStatement((f, first, a, n[0])) + self.makeStatement((f, rest, a, self.newList(n[1:], f))) + return a + + def newSet(self, *args): + return set(args) + + def setDefaultNamespace(self, *args): + return ':'.join(repr(n) for n in args) + + def makeStatement(self, quadruple, why=None): + f, p, s, o = quadruple + + if hasattr(p, 'formula'): + raise Exception("Formula used as predicate") + + s = self.normalise(f, s) + p = self.normalise(f, p) + o = self.normalise(f, o) + + + if f == self.rootFormula: + # print s, p, o, '.' + self.graph.add((s, p, o)) + else: + f.quotedgraph.add((s,p,o)) + + + #return str(quadruple) + + def normalise(self, f, n): + if isinstance(n, tuple): + return URIRef(unicode(n[1])) + + # if isinstance(n, list): + # rdflist, f = n + # name = self.newBlankNode() + # if f == self.rootFormula: + # sublist = name + # for i in xrange(0, len(rdflist) - 1): + # print sublist, 'first', rdflist[i] + # rest = self.newBlankNode() + # print sublist, 'rest', rest + # sublist = rest + # print sublist, 'first', rdflist[-1] + # print sublist, 'rest', 'nil' + # return name + + if isinstance(n, bool): + s = Literal(str(n).lower(), datatype=BOOLEAN_DATATYPE) + return s + + if isinstance(n, int) or isinstance(n, long): + s = Literal(unicode(n), datatype=INTEGER_DATATYPE) + return s + + if isinstance(n, Decimal): + value = str(n.normalize()) + if value == '-0': + value = '0' + s = Literal(value, datatype=DECIMAL_DATATYPE ) + return s + + if isinstance(n, float): + s = Literal(str(n), datatype=DOUBLE_DATATYPE ) + return s + + if f.existentials.has_key(n): + return f.existentials[n] + + # if isinstance(n, Var): + # if f.universals.has_key(n): + # return f.universals[n] + # f.universals[n] = f.newBlankNode() + # return f.universals[n] + + return n + + def intern(self, something): + return something + + def bind(self, pfx, uri): + pass # print pfx, ':', uri + + def startDoc(self, formula): + self.rootFormula = formula + + def endDoc(self, formula): + pass + + +################################################### +# +# Utilities +# + +Escapes = {'a': '\a', + 'b': '\b', + 'f': '\f', + 'r': '\r', + 't': '\t', + 'v': '\v', + 'n': '\n', + '\\': '\\', + '"': '"'} + +forbidden1 = re.compile(ur'[\\\"\a\b\f\r\v\u0080-\U0000ffff]') +forbidden2 = re.compile(ur'[\\\"\a\b\f\r\v\t\n\u0080-\U0000ffff]') +#" +def stringToN3(str, singleLine=0, flags=""): + res = '' + if (len(str) > 20 and + str[-1] <> '"' and + not singleLine and + (str.find("\n") >=0 + or str.find('"') >=0)): + delim= '"""' + forbidden = forbidden1 # (allow tabs too now) + else: + delim = '"' + forbidden = forbidden2 + + i = 0 + + while i < len(str): + m = forbidden.search(str, i) + if not m: + break + + j = m.start() + res = res + str[i:j] + ch = m.group(0) + if ch == '"' and delim == '"""' and str[j:j+3] != '"""': #" + res = res + ch + else: + k = '\a\b\f\r\t\v\n\\"'.find(ch) + if k >= 0: res = res + "\\" + 'abfrtvn\\"'[k] + else: + if 'e' in flags: +# res = res + ('\\u%04x' % ord(ch)) + res = res + ('\\u%04X' % ord(ch)) + # http://www.w3.org/TR/rdf-testcases/#ntriples + else: + res = res + ch + i = j + 1 + + # The following code fixes things for really high range Unicode + newstr = "" + for ch in res + str[i:]: + if ord(ch)>65535: + newstr = newstr + ('\\U%08X' % ord(ch)) + # http://www.w3.org/TR/rdf-testcases/#ntriples + else: + newstr = newstr + ch + # + + return delim + newstr + delim + +def backslashUify(ustr): + """Use URL encoding to return an ASCII string corresponding + to the given unicode""" +# progress("String is "+`ustr`) +# s1=ustr.encode('utf-8') + s = "" + for ch in ustr: # .encode('utf-8'): + if ord(ch) > 65535: + ch = "\\U%08X" % ord(ch) + elif ord(ch) > 126: + ch = "\\u%04X" % ord(ch) + else: + ch = "%c" % ord(ch) + s = s + ch + return b(s) + +@py3compat.format_doctest_out +def hexify(ustr): + """Use URL encoding to return an ASCII string + corresponding to the given UTF8 string + + >>> hexify("http://example/a b") + %(b)s'http://example/a%%20b' + + """ #" +# progress("String is "+`ustr`) +# s1=ustr.encode('utf-8') + s = "" + for ch in ustr: # .encode('utf-8'): + if ord(ch) > 126 or ord(ch) < 33 : + ch = "%%%02X" % ord(ch) + else: + ch = "%c" % ord(ch) + s = s + ch + return b(s) + +def dummy(): + res = "" + if len(str) > 20 and (str.find("\n") >=0 + or str.find('"') >=0): + delim= '"""' + forbidden = "\\\"\a\b\f\r\v" # (allow tabs too now) + else: + delim = '"' + forbidden = "\\\"\a\b\f\r\v\t\n" + for i in range(len(str)): + ch = str[i] + j = forbidden.find(ch) + if ch == '"' and delim == '"""' \ + and i+1 < len(str) and str[i+1] != '"': + j=-1 # Single quotes don't need escaping in long format + if j>=0: ch = "\\" + '\\"abfrvtn'[j] + elif ch not in "\n\t" and (ch < " " or ch > "}"): + ch = "[[" + `ch` + "]]" #[2:-1] # Use python + res = res + ch + return delim + res + delim + + +class N3Parser(Parser): + + def __init__(self): + pass + + def parse(self, source, graph, encoding="utf-8"): + # we're currently being handed a Graph, not a ConjunctiveGraph + assert graph.store.context_aware # is this implied by formula_aware + assert graph.store.formula_aware + + if encoding not in [None, "utf-8"]: + raise Exception("N3 files are always utf-8 encoded, I was passed: %s"%encoding) + + conj_graph = ConjunctiveGraph(store=graph.store) + conj_graph.default_context = graph # TODO: CG __init__ should have a default_context arg + # TODO: update N3Processor so that it can use conj_graph as the sink + conj_graph.namespace_manager = graph.namespace_manager + sink = RDFSink(conj_graph) + + baseURI = graph.absolutize(source.getPublicId() or source.getSystemId() or "") + p = SinkParser(sink, baseURI=baseURI) + + p.loadStream(source.getByteStream()) + + for prefix, namespace in p._bindings.items(): + conj_graph.bind(prefix, namespace) + + + + +def _test(): + import doctest + doctest.testmod() + + +# if __name__ == '__main__': +# _test() + +def main(): + g=ConjunctiveGraph() + + sink = RDFSink(g) + base = 'file://' + os.path.join(os.getcwd(), sys.argv[1]) + + p = SinkParser(sink, baseURI=base) + p._bindings[''] = p._baseURI + '#' + p.startDoc() + + f = open(sys.argv[1], 'rb') + bytes = f.read() + f.close() + + p.feed(bytes) + p.endDoc() + for t in g.quads((None,None,None)): + + print t + +if __name__ == '__main__': + main() + +#ends + diff --git a/creactistore/_templates/lib/rdflib_/plugins/parsers/nquads.py b/creactistore/_templates/lib/rdflib_/plugins/parsers/nquads.py new file mode 100644 index 0000000..22d65f8 --- /dev/null +++ b/creactistore/_templates/lib/rdflib_/plugins/parsers/nquads.py @@ -0,0 +1,107 @@ +""" +This is a rdflib_ plugin for parsing NQuad files into Conjunctive +graphs that can be used and queried. The store that backs the graph +*must* be able to handle contexts. + +>>> from rdflib_ import ConjunctiveGraph, URIRef, Namespace +>>> g = ConjunctiveGraph() +>>> data = open("test/example.nquads", "rb") +>>> g.parse(data, format="nquads") # doctest:+ELLIPSIS +<Graph identifier=... (<class 'rdflib_.graph.Graph'>)> +>>> assert len(g.store) == 449 +>>> # There should be 16 separate contexts +>>> assert len([x for x in g.store.contexts()]) == 16 +>>> # is the name of entity E10009 "Arco Publications"? (in graph http://bibliographica.org/entity/E10009) +>>> # Looking for: +>>> # <http://bibliographica.org/entity/E10009> <http://xmlns.com/foaf/0.1/name> "Arco Publications" <http://bibliographica.org/entity/E10009> +>>> s = URIRef("http://bibliographica.org/entity/E10009") +>>> FOAF = Namespace("http://xmlns.com/foaf/0.1/") +>>> assert(g.value(s, FOAF.name) == "Arco Publications") +""" + +from rdflib_.py3compat import b + +# Build up from the NTriples parser: +from rdflib_.plugins.parsers.ntriples import NTriplesParser +from rdflib_.plugins.parsers.ntriples import ParseError +from rdflib_.plugins.parsers.ntriples import r_tail +from rdflib_.plugins.parsers.ntriples import r_wspace +from rdflib_.plugins.parsers.ntriples import r_wspaces + +__all__ = ['QuadSink', 'NQuadsParser'] + +class QuadSink(object): + def __init__(self): + class FakeStore(object): + def __init__(self, addn): + self.addN = addn + self.length = 0 + self.__quads = [] + self.__store = FakeStore(self.addN) + + def addN(self, quads): + self.length += 1 + self.__quads.append(quads) + + def quads(self, (s,p,o)): + for s,p,o,ctx in self.__quads: + yield s,p,o,ctx + +class NQuadsParser(NTriplesParser): + def __init__(self, sink=None): + if sink is not None: + assert sink.store.context_aware, ("NQuadsParser must be given" + " a context aware store.") + self.sink = sink + else: self.sink = QuadSink() + + def parse(self, inputsource, sink, **kwargs): + """Parse f as an N-Triples file.""" + assert sink.store.context_aware, ("NQuadsParser must be given" + " a context aware store.") + self.sink = sink + + source = inputsource.getByteStream() + + if not hasattr(source, 'read'): + raise ParseError("Item to parse must be a file-like object.") + + self.file = source + self.buffer = '' + while True: + self.line = self.readline() + if self.line is None: break + try: self.parseline() + except ParseError: + raise ParseError("Invalid line: %r" % self.line) + return self.sink + + def context(self): + context = self.uriref() + if not context: + raise ParseError("Context must be a uriref") + return context + + def parseline(self): + self.eat(r_wspace) + if (not self.line) or self.line.startswith(b('#')): + return # The line is empty or a comment + + subject = self.subject() + self.eat(r_wspaces) + + predicate = self.predicate() + self.eat(r_wspaces) + + obj = self.object() + self.eat(r_wspaces) + + context = self.context() + self.eat(r_tail) + + if self.line: + raise ParseError("Trailing garbage") + # Must have a context aware store - add on a normal Graph + # discards anything where the ctx != graph.identifier + self.sink.store.add((subject, predicate, obj), context) + diff --git a/creactistore/_templates/lib/rdflib_/plugins/parsers/nquads.py~ b/creactistore/_templates/lib/rdflib_/plugins/parsers/nquads.py~ new file mode 100644 index 0000000..fbb4a37 --- /dev/null +++ b/creactistore/_templates/lib/rdflib_/plugins/parsers/nquads.py~ @@ -0,0 +1,107 @@ +""" +This is a rdflib plugin for parsing NQuad files into Conjunctive +graphs that can be used and queried. The store that backs the graph +*must* be able to handle contexts. + +>>> from rdflib import ConjunctiveGraph, URIRef, Namespace +>>> g = ConjunctiveGraph() +>>> data = open("test/example.nquads", "rb") +>>> g.parse(data, format="nquads") # doctest:+ELLIPSIS +<Graph identifier=... (<class 'rdflib.graph.Graph'>)> +>>> assert len(g.store) == 449 +>>> # There should be 16 separate contexts +>>> assert len([x for x in g.store.contexts()]) == 16 +>>> # is the name of entity E10009 "Arco Publications"? (in graph http://bibliographica.org/entity/E10009) +>>> # Looking for: +>>> # <http://bibliographica.org/entity/E10009> <http://xmlns.com/foaf/0.1/name> "Arco Publications" <http://bibliographica.org/entity/E10009> +>>> s = URIRef("http://bibliographica.org/entity/E10009") +>>> FOAF = Namespace("http://xmlns.com/foaf/0.1/") +>>> assert(g.value(s, FOAF.name) == "Arco Publications") +""" + +from rdflib.py3compat import b + +# Build up from the NTriples parser: +from rdflib.plugins.parsers.ntriples import NTriplesParser +from rdflib.plugins.parsers.ntriples import ParseError +from rdflib.plugins.parsers.ntriples import r_tail +from rdflib.plugins.parsers.ntriples import r_wspace +from rdflib.plugins.parsers.ntriples import r_wspaces + +__all__ = ['QuadSink', 'NQuadsParser'] + +class QuadSink(object): + def __init__(self): + class FakeStore(object): + def __init__(self, addn): + self.addN = addn + self.length = 0 + self.__quads = [] + self.__store = FakeStore(self.addN) + + def addN(self, quads): + self.length += 1 + self.__quads.append(quads) + + def quads(self, (s,p,o)): + for s,p,o,ctx in self.__quads: + yield s,p,o,ctx + +class NQuadsParser(NTriplesParser): + def __init__(self, sink=None): + if sink is not None: + assert sink.store.context_aware, ("NQuadsParser must be given" + " a context aware store.") + self.sink = sink + else: self.sink = QuadSink() + + def parse(self, inputsource, sink, **kwargs): + """Parse f as an N-Triples file.""" + assert sink.store.context_aware, ("NQuadsParser must be given" + " a context aware store.") + self.sink = sink + + source = inputsource.getByteStream() + + if not hasattr(source, 'read'): + raise ParseError("Item to parse must be a file-like object.") + + self.file = source + self.buffer = '' + while True: + self.line = self.readline() + if self.line is None: break + try: self.parseline() + except ParseError: + raise ParseError("Invalid line: %r" % self.line) + return self.sink + + def context(self): + context = self.uriref() + if not context: + raise ParseError("Context must be a uriref") + return context + + def parseline(self): + self.eat(r_wspace) + if (not self.line) or self.line.startswith(b('#')): + return # The line is empty or a comment + + subject = self.subject() + self.eat(r_wspaces) + + predicate = self.predicate() + self.eat(r_wspaces) + + obj = self.object() + self.eat(r_wspaces) + + context = self.context() + self.eat(r_tail) + + if self.line: + raise ParseError("Trailing garbage") + # Must have a context aware store - add on a normal Graph + # discards anything where the ctx != graph.identifier + self.sink.store.add((subject, predicate, obj), context) + diff --git a/creactistore/_templates/lib/rdflib_/plugins/parsers/nt.py b/creactistore/_templates/lib/rdflib_/plugins/parsers/nt.py new file mode 100644 index 0000000..86cec18 --- /dev/null +++ b/creactistore/_templates/lib/rdflib_/plugins/parsers/nt.py @@ -0,0 +1,28 @@ +from rdflib_.parser import Parser +from rdflib_.plugins.parsers.ntriples import NTriplesParser + +__all__ = ['NTSink', 'NTParser'] + +class NTSink(object): + def __init__(self, graph): + self.graph = graph + + def triple(self, s, p, o): + self.graph.add((s, p, o)) + + +class NTParser(Parser): + """parser for the ntriples format, often stored with the .nt extension + + See http://www.w3.org/TR/rdf-testcases/#ntriples""" + + def __init__(self): + super(NTParser, self).__init__() + + def parse(self, source, sink, baseURI=None): + f = source.getByteStream() # TODO getCharacterStream? + parser = NTriplesParser(NTSink(sink)) + parser.parse(f) + f.close() + + diff --git a/creactistore/_templates/lib/rdflib_/plugins/parsers/nt.py~ b/creactistore/_templates/lib/rdflib_/plugins/parsers/nt.py~ new file mode 100644 index 0000000..1ec2282 --- /dev/null +++ b/creactistore/_templates/lib/rdflib_/plugins/parsers/nt.py~ @@ -0,0 +1,28 @@ +from rdflib.parser import Parser +from rdflib.plugins.parsers.ntriples import NTriplesParser + +__all__ = ['NTSink', 'NTParser'] + +class NTSink(object): + def __init__(self, graph): + self.graph = graph + + def triple(self, s, p, o): + self.graph.add((s, p, o)) + + +class NTParser(Parser): + """parser for the ntriples format, often stored with the .nt extension + + See http://www.w3.org/TR/rdf-testcases/#ntriples""" + + def __init__(self): + super(NTParser, self).__init__() + + def parse(self, source, sink, baseURI=None): + f = source.getByteStream() # TODO getCharacterStream? + parser = NTriplesParser(NTSink(sink)) + parser.parse(f) + f.close() + + diff --git a/creactistore/_templates/lib/rdflib_/plugins/parsers/ntriples.py b/creactistore/_templates/lib/rdflib_/plugins/parsers/ntriples.py new file mode 100644 index 0000000..36a263e --- /dev/null +++ b/creactistore/_templates/lib/rdflib_/plugins/parsers/ntriples.py @@ -0,0 +1,243 @@ +#!/usr/bin/env python +__doc__=""" +N-Triples Parser +License: GPL 2, W3C, BSD, or MIT +Author: Sean B. Palmer, inamidst.com +""" + +import re +from rdflib_.term import URIRef as URI +from rdflib_.term import BNode as bNode +from rdflib_.term import Literal + +from rdflib_.py3compat import b, cast_bytes + +__all__ = ['unquote', 'uriquote', 'Sink', 'NTriplesParser'] + +uriref = b(r'<([^:]+:[^\s"<>]+)>') +literal = b(r'"([^"\\]*(?:\\.[^"\\]*)*)"') +litinfo = b(r'(?:@([a-z]+(?:-[a-z0-9]+)*)|\^\^') + uriref + b(r')?') + +r_line = re.compile(b(r'([^\r\n]*)(?:\r\n|\r|\n)')) +r_wspace = re.compile(b(r'[ \t]*')) +r_wspaces = re.compile(b(r'[ \t]+')) +r_tail = re.compile(b(r'[ \t]*\.[ \t]*')) +r_uriref = re.compile(uriref) +r_nodeid = re.compile(b(r'_:([A-Za-z][A-Za-z0-9]*)')) +r_literal = re.compile(literal + litinfo) + +bufsiz = 2048 +validate = False + +class Node(unicode): pass + +class ParseError(Exception): pass + +class Sink(object): + def __init__(self): + self.length = 0 + + def triple(self, s, p, o): + self.length += 1 + print (s, p, o) + +quot = {b('t'): u'\t', b('n'): u'\n', b('r'): u'\r', b('"'): u'"', b('\\'): u'\\'} +r_safe = re.compile(b(r'([\x20\x21\x23-\x5B\x5D-\x7E]+)')) +r_quot = re.compile(b(r'\\(t|n|r|"|\\)')) +r_uniquot = re.compile(b(r'\\u([0-9A-F]{4})|\\U([0-9A-F]{8})')) + +def unquote(s): + """Unquote an N-Triples string.""" + if not validate: + return s.decode('unicode-escape') + else: + result = [] + while s: + m = r_safe.match(s) + if m: + s = s[m.end():] + result.append(m.group(1).decode('ascii')) + continue + + m = r_quot.match(s) + if m: + s = s[2:] + result.append(quot[m.group(1)]) + continue + + m = r_uniquot.match(s) + if m: + s = s[m.end():] + u, U = m.groups() + codepoint = int(u or U, 16) + if codepoint > 0x10FFFF: + raise ParseError("Disallowed codepoint: %08X" % codepoint) + result.append(unichr(codepoint)) + elif s.startswith(b('\\')): + raise ParseError("Illegal escape at: %s..." % s[:10]) + else: raise ParseError("Illegal literal character: %r" % s[0]) + return u''.join(result) + +r_hibyte = re.compile(ur'([\x80-\xFF])') + +def uriquote(uri): + if not validate: + return uri + else: + return r_hibyte.sub( + lambda m: '%%%02X' % ord(m.group(1)), uri) + +class NTriplesParser(object): + """An N-Triples Parser. + + Usage:: + + p = NTriplesParser(sink=MySink()) + sink = p.parse(f) # file; use parsestring for a string + """ + + def __init__(self, sink=None): + if sink is not None: + self.sink = sink + else: self.sink = Sink() + + def parse(self, f): + """Parse f as an N-Triples file.""" + if not hasattr(f, 'read'): + raise ParseError("Item to parse must be a file-like object.") + + self.file = f + self.buffer = '' + while True: + self.line = self.readline() + if self.line is None: break + try: self.parseline() + except ParseError: + raise ParseError("Invalid line: %r" % self.line) + return self.sink + + def parsestring(self, s): + """Parse s as an N-Triples string.""" + if not isinstance(s, basestring): + raise ParseError("Item to parse must be a string instance.") + try: + from io import BytesIO + except ImportError: + from cStringIO import StringIO as BytesIO + f = BytesIO() + f.write(cast_bytes(s)) + f.seek(0) + self.parse(f) + + def readline(self): + """Read an N-Triples line from buffered input.""" + # N-Triples lines end in either CRLF, CR, or LF + # Therefore, we can't just use f.readline() + if not self.buffer: + buffer = self.file.read(bufsiz) + if not buffer: return None + self.buffer = buffer + + while True: + m = r_line.match(self.buffer) + if m: # the more likely prospect + self.buffer = self.buffer[m.end():] + return m.group(1) + else: + buffer = self.file.read(bufsiz) + if not buffer and not self.buffer.isspace(): + raise ParseError("EOF in line") + elif not buffer: + return None + self.buffer += buffer + + def parseline(self): + self.eat(r_wspace) + if (not self.line) or self.line.startswith(b('#')): + return # The line is empty or a comment + + subject = self.subject() + self.eat(r_wspaces) + + predicate = self.predicate() + self.eat(r_wspaces) + + object = self.object() + self.eat(r_tail) + + if self.line: + raise ParseError("Trailing garbage") + self.sink.triple(subject, predicate, object) + + def peek(self, token): + return self.line.startswith(token) + + def eat(self, pattern): + m = pattern.match(self.line) + if not m: # @@ Why can't we get the original pattern? + print(dir(pattern)) + print repr(self.line), type(self.line) + raise ParseError("Failed to eat %s" % pattern) + self.line = self.line[m.end():] + return m + + def subject(self): + # @@ Consider using dictionary cases + subj = self.uriref() or self.nodeid() + if not subj: + raise ParseError("Subject must be uriref or nodeID") + return subj + + def predicate(self): + pred = self.uriref() + if not pred: + raise ParseError("Predicate must be uriref") + return pred + + def object(self): + objt = self.uriref() or self.nodeid() or self.literal() + if objt is False: + raise ParseError("Unrecognised object type") + return objt + + def uriref(self): + if self.peek(b('<')): + uri = self.eat(r_uriref).group(1) + uri = unquote(uri) + uri = uriquote(uri) + return URI(uri) + return False + + def nodeid(self): + if self.peek(b('_')): + return bNode(self.eat(r_nodeid).group(1).decode()) + return False + + def literal(self): + if self.peek(b('"')): + lit, lang, dtype = self.eat(r_literal).groups() + if lang: + lang = lang.decode() + else: + lang = None + if dtype: + dtype = dtype.decode() + else: + dtype = None + if lang and dtype: + raise ParseError("Can't have both a language and a datatype") + lit = unquote(lit) + return Literal(lit, lang, dtype) + return False + +# # Obsolete, unused +# def parseURI(uri): +# import urllib +# parser = NTriplesParser() +# u = urllib.urlopen(uri) +# sink = parser.parse(u) +# u.close() +# # for triple in sink: +# # print triple +# print 'Length of input:', sink.length + diff --git a/creactistore/_templates/lib/rdflib_/plugins/parsers/ntriples.py~ b/creactistore/_templates/lib/rdflib_/plugins/parsers/ntriples.py~ new file mode 100644 index 0000000..48fe327 --- /dev/null +++ b/creactistore/_templates/lib/rdflib_/plugins/parsers/ntriples.py~ @@ -0,0 +1,243 @@ +#!/usr/bin/env python +__doc__=""" +N-Triples Parser +License: GPL 2, W3C, BSD, or MIT +Author: Sean B. Palmer, inamidst.com +""" + +import re +from rdflib.term import URIRef as URI +from rdflib.term import BNode as bNode +from rdflib.term import Literal + +from rdflib.py3compat import b, cast_bytes + +__all__ = ['unquote', 'uriquote', 'Sink', 'NTriplesParser'] + +uriref = b(r'<([^:]+:[^\s"<>]+)>') +literal = b(r'"([^"\\]*(?:\\.[^"\\]*)*)"') +litinfo = b(r'(?:@([a-z]+(?:-[a-z0-9]+)*)|\^\^') + uriref + b(r')?') + +r_line = re.compile(b(r'([^\r\n]*)(?:\r\n|\r|\n)')) +r_wspace = re.compile(b(r'[ \t]*')) +r_wspaces = re.compile(b(r'[ \t]+')) +r_tail = re.compile(b(r'[ \t]*\.[ \t]*')) +r_uriref = re.compile(uriref) +r_nodeid = re.compile(b(r'_:([A-Za-z][A-Za-z0-9]*)')) +r_literal = re.compile(literal + litinfo) + +bufsiz = 2048 +validate = False + +class Node(unicode): pass + +class ParseError(Exception): pass + +class Sink(object): + def __init__(self): + self.length = 0 + + def triple(self, s, p, o): + self.length += 1 + print (s, p, o) + +quot = {b('t'): u'\t', b('n'): u'\n', b('r'): u'\r', b('"'): u'"', b('\\'): u'\\'} +r_safe = re.compile(b(r'([\x20\x21\x23-\x5B\x5D-\x7E]+)')) +r_quot = re.compile(b(r'\\(t|n|r|"|\\)')) +r_uniquot = re.compile(b(r'\\u([0-9A-F]{4})|\\U([0-9A-F]{8})')) + +def unquote(s): + """Unquote an N-Triples string.""" + if not validate: + return s.decode('unicode-escape') + else: + result = [] + while s: + m = r_safe.match(s) + if m: + s = s[m.end():] + result.append(m.group(1).decode('ascii')) + continue + + m = r_quot.match(s) + if m: + s = s[2:] + result.append(quot[m.group(1)]) + continue + + m = r_uniquot.match(s) + if m: + s = s[m.end():] + u, U = m.groups() + codepoint = int(u or U, 16) + if codepoint > 0x10FFFF: + raise ParseError("Disallowed codepoint: %08X" % codepoint) + result.append(unichr(codepoint)) + elif s.startswith(b('\\')): + raise ParseError("Illegal escape at: %s..." % s[:10]) + else: raise ParseError("Illegal literal character: %r" % s[0]) + return u''.join(result) + +r_hibyte = re.compile(ur'([\x80-\xFF])') + +def uriquote(uri): + if not validate: + return uri + else: + return r_hibyte.sub( + lambda m: '%%%02X' % ord(m.group(1)), uri) + +class NTriplesParser(object): + """An N-Triples Parser. + + Usage:: + + p = NTriplesParser(sink=MySink()) + sink = p.parse(f) # file; use parsestring for a string + """ + + def __init__(self, sink=None): + if sink is not None: + self.sink = sink + else: self.sink = Sink() + + def parse(self, f): + """Parse f as an N-Triples file.""" + if not hasattr(f, 'read'): + raise ParseError("Item to parse must be a file-like object.") + + self.file = f + self.buffer = '' + while True: + self.line = self.readline() + if self.line is None: break + try: self.parseline() + except ParseError: + raise ParseError("Invalid line: %r" % self.line) + return self.sink + + def parsestring(self, s): + """Parse s as an N-Triples string.""" + if not isinstance(s, basestring): + raise ParseError("Item to parse must be a string instance.") + try: + from io import BytesIO + except ImportError: + from cStringIO import StringIO as BytesIO + f = BytesIO() + f.write(cast_bytes(s)) + f.seek(0) + self.parse(f) + + def readline(self): + """Read an N-Triples line from buffered input.""" + # N-Triples lines end in either CRLF, CR, or LF + # Therefore, we can't just use f.readline() + if not self.buffer: + buffer = self.file.read(bufsiz) + if not buffer: return None + self.buffer = buffer + + while True: + m = r_line.match(self.buffer) + if m: # the more likely prospect + self.buffer = self.buffer[m.end():] + return m.group(1) + else: + buffer = self.file.read(bufsiz) + if not buffer and not self.buffer.isspace(): + raise ParseError("EOF in line") + elif not buffer: + return None + self.buffer += buffer + + def parseline(self): + self.eat(r_wspace) + if (not self.line) or self.line.startswith(b('#')): + return # The line is empty or a comment + + subject = self.subject() + self.eat(r_wspaces) + + predicate = self.predicate() + self.eat(r_wspaces) + + object = self.object() + self.eat(r_tail) + + if self.line: + raise ParseError("Trailing garbage") + self.sink.triple(subject, predicate, object) + + def peek(self, token): + return self.line.startswith(token) + + def eat(self, pattern): + m = pattern.match(self.line) + if not m: # @@ Why can't we get the original pattern? + print(dir(pattern)) + print repr(self.line), type(self.line) + raise ParseError("Failed to eat %s" % pattern) + self.line = self.line[m.end():] + return m + + def subject(self): + # @@ Consider using dictionary cases + subj = self.uriref() or self.nodeid() + if not subj: + raise ParseError("Subject must be uriref or nodeID") + return subj + + def predicate(self): + pred = self.uriref() + if not pred: + raise ParseError("Predicate must be uriref") + return pred + + def object(self): + objt = self.uriref() or self.nodeid() or self.literal() + if objt is False: + raise ParseError("Unrecognised object type") + return objt + + def uriref(self): + if self.peek(b('<')): + uri = self.eat(r_uriref).group(1) + uri = unquote(uri) + uri = uriquote(uri) + return URI(uri) + return False + + def nodeid(self): + if self.peek(b('_')): + return bNode(self.eat(r_nodeid).group(1).decode()) + return False + + def literal(self): + if self.peek(b('"')): + lit, lang, dtype = self.eat(r_literal).groups() + if lang: + lang = lang.decode() + else: + lang = None + if dtype: + dtype = dtype.decode() + else: + dtype = None + if lang and dtype: + raise ParseError("Can't have both a language and a datatype") + lit = unquote(lit) + return Literal(lit, lang, dtype) + return False + +# # Obsolete, unused +# def parseURI(uri): +# import urllib +# parser = NTriplesParser() +# u = urllib.urlopen(uri) +# sink = parser.parse(u) +# u.close() +# # for triple in sink: +# # print triple +# print 'Length of input:', sink.length + diff --git a/creactistore/_templates/lib/rdflib_/plugins/parsers/rdfa/__init__.py b/creactistore/_templates/lib/rdflib_/plugins/parsers/rdfa/__init__.py new file mode 100644 index 0000000..9c4f4eb --- /dev/null +++ b/creactistore/_templates/lib/rdflib_/plugins/parsers/rdfa/__init__.py @@ -0,0 +1,168 @@ +""" +From a Python file, expecting an RDF/XML pretty printed output:: + + import rdflib_.graph as g + graph = g.Graph() + graph.parse('filename.html', format='rdfa') + print graph.serialize(format='pretty-xml') + +For details on RDFa, the reader should consult the `RDFa syntax document`__. + +This is an adapted version of pyRdfa (`W3C RDFa Distiller page`__) by Ivan Herman + +.. __: http://www.w3.org/TR/rdfa-syntax +.. __: http://www.w3.org/2007/08/pyRdfa/ + +""" + + +import sys +import urllib +import xml.dom.minidom + +from rdflib_.term import URIRef +from rdflib_.parser import Parser +from rdflib_.plugins.parsers.rdfa.state import ExecutionContext +from rdflib_.plugins.parsers.rdfa.parse import parse_one_node +from rdflib_.plugins.parsers.rdfa.options import (Options, _add_to_comment_graph, + DIST_NS, ERROR, GENERIC_XML, XHTML_RDFA, HTML5_RDFA) + +from rdflib_.plugins.parsers.rdfa.transform.headabout import head_about_transform + +__all__ = ['RDFaParser'] + +# These are part of the RDFa spec. +BUILT_IN_TRANSFORMERS = [ + head_about_transform +] + +# Exception handling. Essentially, all the different exceptions are re-packaged +# into separate exception class, to allow for an easier management on the user +# level +class RDFaError(Exception) : + """Just a wrapper around the local exceptions. It does not add any new + functionality to the Exception class.""" + pass + +# For some doctype and element name combinations an automatic switch to an +# input mode is done +_HOST_LANG = { + ("http://www.w3.org/1999/xhtml", "html"): XHTML_RDFA, + ("http://www.w3.org/2000/svg", "svg"): GENERIC_XML +} + + +class RDFaParser(Parser): + + def parse(self, source, sink, + warnings=False, space_preserve=True, + transformers=None, xhtml=True, lax=True, html5=False, encoding=None): + if transformers is None: + transformers = [] + options = Options(warnings, space_preserve, transformers, xhtml, lax) + baseURI = source.getPublicId() + stream = source.getByteStream() + if html5: + dom = _process_html5_source(stream, options, encoding) + else: + dom = _try_process_source(stream, options, encoding) + _process_DOM(dom, baseURI, sink, options) + + +def _process_DOM(dom, base, graph, options=None): + """ + Core processing. The transformers ("pre-processing") is done on the DOM + tree, the state is initialized, and the "real" RDFa parsing is done. + The result is put into the provided Graph. + + The real work is done in the parser function ``parse_one_node()``. + + Params: + dom -- XML DOM Tree node (for the top level) + base -- URI for the default "base" value (usually the URI of the file to be processed) + + Options: + obj -- `Options` for the distiller + raise RDFaError -- when called via CGI, this encapsulates the possible + exceptions raised by the RDFLib serializer or the processing itself + """ + html = dom.documentElement + # Perform the built-in and external transformations on the HTML tree. This is, + # in simulated form, the hGRDDL approach of Ben Adida. + for trans in options.transformers + BUILT_IN_TRANSFORMERS: + trans(html, options) + # Collect the initial state. This takes care of things + # like base, top level namespace settings, etc. + # Ensure the proper initialization. + state = ExecutionContext(html, graph, base=base, options=options) + # The top level subject starts with the current document; this + # is used by the recursion + subject = URIRef(state.base) + # Parse the whole thing recursively and fill the graph. + parse_one_node(html, graph, subject, state, []) + if options.comment_graph.graph != None: + # Add the content of the comment graph to the output. + graph.bind("dist", DIST_NS) + for t in options.comment_graph.graph: + graph.add(t) + +def _try_process_source(stream, options, encoding): + """ + Tries to parse input as xhtml, xml (e.g. svg) or html(5), modifying options + while figuring out input.. + + Returns a DOM tree. + """ + parse = xml.dom.minidom.parse + try: + dom = parse(stream) + # Try to second-guess the input type + # This is _not_ really kosher, but the minidom is not really namespace aware... + # In practice the goal is to have the system recognize svg content automatically + # First see if there is a default namespace defined for the document: + top = dom.documentElement + if top.hasAttribute("xmlns"): + key = (top.getAttribute("xmlns"), top.nodeName) + if key in _HOST_LANG: + options.host_language = _HOST_LANG[key] + return dom + except: + # XML Parsing error in the input + type, value, traceback = sys.exc_info() + if options.host_language == GENERIC_XML or options.lax == False: + raise RDFaError('Parsing error in input file: "%s"' % value) + + # XML Parsing error in the input + msg = "XHTML Parsing error in input file: %s. Falling back on the HTML5 parser" % value + if options != None and options.warnings: + options.comment_graph.add_warning(msg) + + # in Ivan's original code he reopened the stream if it was from urllib + if isinstance(stream, urllib.addinfourl): + stream = urllib.urlopen(stream.url) + + return _process_html5_source(stream, options, encoding) + + +def _process_html5_source(stream, options, encoding): + # Now try to see if and HTML5 parser is an alternative... + try: + from html5lib import HTMLParser, treebuilders + except ImportError: + # no alternative to the XHTML error, because HTML5 parser not available... + msg2 = 'XHTML Parsing error in input file: %s. Though parsing is lax, HTML5 parser not available. Try installing html5lib <http://code.google.com/p/html5lib>' + raise RDFaError(msg2) + + parser = HTMLParser(tree=treebuilders.getTreeBuilder("dom")) + parse = parser.parse + try: + dom = parse(stream, encoding) + # The host language has changed + options.host_language = HTML5_RDFA + except: + # Well, even the HTML5 parser could not do anything with this... + (type, value, traceback) = sys.exc_info() + msg2 = 'Parsing error in input file as HTML5: "%s"' % value + raise RDFaError, msg2 + + return dom diff --git a/creactistore/_templates/lib/rdflib_/plugins/parsers/rdfa/__init__.py~ b/creactistore/_templates/lib/rdflib_/plugins/parsers/rdfa/__init__.py~ new file mode 100644 index 0000000..9553349 --- /dev/null +++ b/creactistore/_templates/lib/rdflib_/plugins/parsers/rdfa/__init__.py~ @@ -0,0 +1,168 @@ +""" +From a Python file, expecting an RDF/XML pretty printed output:: + + import rdflib.graph as g + graph = g.Graph() + graph.parse('filename.html', format='rdfa') + print graph.serialize(format='pretty-xml') + +For details on RDFa, the reader should consult the `RDFa syntax document`__. + +This is an adapted version of pyRdfa (`W3C RDFa Distiller page`__) by Ivan Herman + +.. __: http://www.w3.org/TR/rdfa-syntax +.. __: http://www.w3.org/2007/08/pyRdfa/ + +""" + + +import sys +import urllib +import xml.dom.minidom + +from rdflib.term import URIRef +from rdflib.parser import Parser +from rdflib.plugins.parsers.rdfa.state import ExecutionContext +from rdflib.plugins.parsers.rdfa.parse import parse_one_node +from rdflib.plugins.parsers.rdfa.options import (Options, _add_to_comment_graph, + DIST_NS, ERROR, GENERIC_XML, XHTML_RDFA, HTML5_RDFA) + +from rdflib.plugins.parsers.rdfa.transform.headabout import head_about_transform + +__all__ = ['RDFaParser'] + +# These are part of the RDFa spec. +BUILT_IN_TRANSFORMERS = [ + head_about_transform +] + +# Exception handling. Essentially, all the different exceptions are re-packaged +# into separate exception class, to allow for an easier management on the user +# level +class RDFaError(Exception) : + """Just a wrapper around the local exceptions. It does not add any new + functionality to the Exception class.""" + pass + +# For some doctype and element name combinations an automatic switch to an +# input mode is done +_HOST_LANG = { + ("http://www.w3.org/1999/xhtml", "html"): XHTML_RDFA, + ("http://www.w3.org/2000/svg", "svg"): GENERIC_XML +} + + +class RDFaParser(Parser): + + def parse(self, source, sink, + warnings=False, space_preserve=True, + transformers=None, xhtml=True, lax=True, html5=False, encoding=None): + if transformers is None: + transformers = [] + options = Options(warnings, space_preserve, transformers, xhtml, lax) + baseURI = source.getPublicId() + stream = source.getByteStream() + if html5: + dom = _process_html5_source(stream, options, encoding) + else: + dom = _try_process_source(stream, options, encoding) + _process_DOM(dom, baseURI, sink, options) + + +def _process_DOM(dom, base, graph, options=None): + """ + Core processing. The transformers ("pre-processing") is done on the DOM + tree, the state is initialized, and the "real" RDFa parsing is done. + The result is put into the provided Graph. + + The real work is done in the parser function ``parse_one_node()``. + + Params: + dom -- XML DOM Tree node (for the top level) + base -- URI for the default "base" value (usually the URI of the file to be processed) + + Options: + obj -- `Options` for the distiller + raise RDFaError -- when called via CGI, this encapsulates the possible + exceptions raised by the RDFLib serializer or the processing itself + """ + html = dom.documentElement + # Perform the built-in and external transformations on the HTML tree. This is, + # in simulated form, the hGRDDL approach of Ben Adida. + for trans in options.transformers + BUILT_IN_TRANSFORMERS: + trans(html, options) + # Collect the initial state. This takes care of things + # like base, top level namespace settings, etc. + # Ensure the proper initialization. + state = ExecutionContext(html, graph, base=base, options=options) + # The top level subject starts with the current document; this + # is used by the recursion + subject = URIRef(state.base) + # Parse the whole thing recursively and fill the graph. + parse_one_node(html, graph, subject, state, []) + if options.comment_graph.graph != None: + # Add the content of the comment graph to the output. + graph.bind("dist", DIST_NS) + for t in options.comment_graph.graph: + graph.add(t) + +def _try_process_source(stream, options, encoding): + """ + Tries to parse input as xhtml, xml (e.g. svg) or html(5), modifying options + while figuring out input.. + + Returns a DOM tree. + """ + parse = xml.dom.minidom.parse + try: + dom = parse(stream) + # Try to second-guess the input type + # This is _not_ really kosher, but the minidom is not really namespace aware... + # In practice the goal is to have the system recognize svg content automatically + # First see if there is a default namespace defined for the document: + top = dom.documentElement + if top.hasAttribute("xmlns"): + key = (top.getAttribute("xmlns"), top.nodeName) + if key in _HOST_LANG: + options.host_language = _HOST_LANG[key] + return dom + except: + # XML Parsing error in the input + type, value, traceback = sys.exc_info() + if options.host_language == GENERIC_XML or options.lax == False: + raise RDFaError('Parsing error in input file: "%s"' % value) + + # XML Parsing error in the input + msg = "XHTML Parsing error in input file: %s. Falling back on the HTML5 parser" % value + if options != None and options.warnings: + options.comment_graph.add_warning(msg) + + # in Ivan's original code he reopened the stream if it was from urllib + if isinstance(stream, urllib.addinfourl): + stream = urllib.urlopen(stream.url) + + return _process_html5_source(stream, options, encoding) + + +def _process_html5_source(stream, options, encoding): + # Now try to see if and HTML5 parser is an alternative... + try: + from html5lib import HTMLParser, treebuilders + except ImportError: + # no alternative to the XHTML error, because HTML5 parser not available... + msg2 = 'XHTML Parsing error in input file: %s. Though parsing is lax, HTML5 parser not available. Try installing html5lib <http://code.google.com/p/html5lib>' + raise RDFaError(msg2) + + parser = HTMLParser(tree=treebuilders.getTreeBuilder("dom")) + parse = parser.parse + try: + dom = parse(stream, encoding) + # The host language has changed + options.host_language = HTML5_RDFA + except: + # Well, even the HTML5 parser could not do anything with this... + (type, value, traceback) = sys.exc_info() + msg2 = 'Parsing error in input file as HTML5: "%s"' % value + raise RDFaError, msg2 + + return dom diff --git a/creactistore/_templates/lib/rdflib_/plugins/parsers/rdfa/embeddedrdf.py b/creactistore/_templates/lib/rdflib_/plugins/parsers/rdfa/embeddedrdf.py new file mode 100644 index 0000000..4a9b015 --- /dev/null +++ b/creactistore/_templates/lib/rdflib_/plugins/parsers/rdfa/embeddedrdf.py @@ -0,0 +1,36 @@ +# -*- coding: utf-8 -*- +""" +Extracting possible embedded RDF/XML content from the file and parse it separately into the Graph. This is used, for example +by U{SVG 1.2 Tiny<http://www.w3.org/TR/SVGMobile12/>}. + +@author: U{Ivan Herman<a href="http://www.w3.org/People/Ivan/">} +@license: This software is available for use under the +U{W3C® SOFTWARE NOTICE AND LICENSE<href="http://www.w3.org/Consortium/Legal/2002/copyright-software-20021231">} +@contact: Ivan Herman, ivan@w3.org +""" + +from StringIO import StringIO + +__all__ = ['handle_embeddedRDF'] + +def handle_embeddedRDF(node, graph, state): + """ + Check if the node is the top level rdf element for RDF/XML. If so, the content is parsed and added to the target graph. Note that if an separate + base is defined in the state, the C{xml:base} attribute will be added to the C{rdf} node before parsing. + @param node: a DOM node for the top level xml element + @param graph: target rdf graph + @type graph: RDFLib's Graph object instance + @param state: the inherited state (namespaces, lang, etc) + @type state: L{State.ExecutionContext} + @return: whether an RDF/XML content has been detected or not. If TRUE, the RDFa processing should not occur on the node and its descendents. + @rtype: Boolean + + """ + if node.localName == "RDF" and node.namespaceURI == "http://www.w3.org/1999/02/22-rdf-syntax-ns#": + node.setAttribute("xml:base",state.base) + rdf = StringIO(node.toxml()) + graph.parse(rdf) + return True + else: + return False + diff --git a/creactistore/_templates/lib/rdflib_/plugins/parsers/rdfa/embeddedrdf.py~ b/creactistore/_templates/lib/rdflib_/plugins/parsers/rdfa/embeddedrdf.py~ new file mode 100644 index 0000000..4a9b015 --- /dev/null +++ b/creactistore/_templates/lib/rdflib_/plugins/parsers/rdfa/embeddedrdf.py~ @@ -0,0 +1,36 @@ +# -*- coding: utf-8 -*- +""" +Extracting possible embedded RDF/XML content from the file and parse it separately into the Graph. This is used, for example +by U{SVG 1.2 Tiny<http://www.w3.org/TR/SVGMobile12/>}. + +@author: U{Ivan Herman<a href="http://www.w3.org/People/Ivan/">} +@license: This software is available for use under the +U{W3C® SOFTWARE NOTICE AND LICENSE<href="http://www.w3.org/Consortium/Legal/2002/copyright-software-20021231">} +@contact: Ivan Herman, ivan@w3.org +""" + +from StringIO import StringIO + +__all__ = ['handle_embeddedRDF'] + +def handle_embeddedRDF(node, graph, state): + """ + Check if the node is the top level rdf element for RDF/XML. If so, the content is parsed and added to the target graph. Note that if an separate + base is defined in the state, the C{xml:base} attribute will be added to the C{rdf} node before parsing. + @param node: a DOM node for the top level xml element + @param graph: target rdf graph + @type graph: RDFLib's Graph object instance + @param state: the inherited state (namespaces, lang, etc) + @type state: L{State.ExecutionContext} + @return: whether an RDF/XML content has been detected or not. If TRUE, the RDFa processing should not occur on the node and its descendents. + @rtype: Boolean + + """ + if node.localName == "RDF" and node.namespaceURI == "http://www.w3.org/1999/02/22-rdf-syntax-ns#": + node.setAttribute("xml:base",state.base) + rdf = StringIO(node.toxml()) + graph.parse(rdf) + return True + else: + return False + diff --git a/creactistore/_templates/lib/rdflib_/plugins/parsers/rdfa/literal.py b/creactistore/_templates/lib/rdflib_/plugins/parsers/rdfa/literal.py new file mode 100644 index 0000000..ed185af --- /dev/null +++ b/creactistore/_templates/lib/rdflib_/plugins/parsers/rdfa/literal.py @@ -0,0 +1,180 @@ +# -*- coding: utf-8 -*- +""" +Implementation of the Literal handling. Details of the algorithm are described on +U{RDFa Task Force's wiki page<http://www.w3.org/2006/07/SWD/wiki/RDFa/LiteralObject>}. + +@summary: RDFa Literal generation +@requires: U{RDFLib package<http://rdflib_.net>} +@organization: U{World Wide Web Consortium<http://www.w3.org>} +@author: U{Ivan Herman<a href="http://www.w3.org/People/Ivan/">} +@license: This software is available for use under the +U{W3C® SOFTWARE NOTICE AND LICENSE<href="http://www.w3.org/Consortium/Legal/2002/copyright-software-20021231">} +""" + +import re +from rdflib_.namespace import RDF +from rdflib_.term import Literal + +__all__ = ['generate_literal'] + +XMLLiteral = RDF.XMLLiteral + + +def __putBackEntities(str): + """Put 'back' entities for the '&', '<', and '>' characters, to produce kosher XML string. + Used by XML Literal + @param str: string to be converted + @return: string with entities + @rtype: string + """ + return str.replace('&', '&').replace('<', '<').replace('>', '>') + +#### The real meat... +def generate_literal(node, graph, subject, state): + """Generate the literal the C{@property}, taking into account datatype, etc. + Note: this method is called only if the C{@property} is indeed present, no need to check. + + This method is an encoding of the algorithm documented + U{task force's wiki page<http://www.w3.org/2006/07/SWD/wiki/RDFa/LiteralObject>}. + + The method returns a value whether the literal is a 'normal' literal (regardless of its datatype) + or an XML Literal. The return value is True or False, respectively. This value is used to control whether + the parser should stop recursion. This also means that that if the literal is generated from @content, + the return value is False, regardless of the possible @datatype value. + + @param node: DOM element node + @param graph: the (RDF) graph to add the properies to + @param subject: the RDFLib URIRef serving as a subject for the generated triples + @param state: the current state to be used for the CURIE-s + @type state: L{State.ExecutionContext} + @return: whether the literal is a 'normal' or an XML Literal (return value is True or False, respectively). Note that if the literal is generated from @content, the return value is False, regardless of the possible @datatype value. + @rtype: Boolean + """ + def _get_literal(Pnode): + """ + Get (recursively) the full text from a DOM Node. + + @param Pnode: DOM Node + @return: string + """ + rc = "" + for node in Pnode.childNodes: + if node.nodeType == node.TEXT_NODE: + rc = rc + node.data + elif node.nodeType == node.ELEMENT_NODE: + rc = rc + _get_literal(node) + + # The decision of the group in February 2008 is not to normalize the result by default. + # This is reflected in the default value of the option + if state.options.space_preserve: + return rc + else: + return re.sub(r'(\r| |\n|\t)+', " ", rc).strip() + # end getLiteral + + def _get_XML_literal(Pnode): + """ + Get (recursively) the XML Literal content of a DOM Node. (Most of the processing is done + via a C{node.toxml} call of the xml minidom implementation.) + + @param Pnode: DOM Node + @return: string + """ + def collectPrefixes(prefixes, node): + def addPf(prefx, string): + pf = string.split(':')[0] + if pf != string and pf not in prefx : prefx.append(pf) + # edn addPf + + # first the local name of the node + addPf(prefixes, node.tagName) + # get all the attributes and children + for child in node.childNodes: + if child.nodeType == node.ELEMENT_NODE: + collectPrefixes(prefixes, child) + elif child.nodeType == node.ATTRIBUTE_NODE: + addPf(prefixes, node.child.name) + # end collectPrefixes + + rc = "" + prefixes = [] + for node in Pnode.childNodes: + if node.nodeType == node.ELEMENT_NODE: + collectPrefixes(prefixes, node) + + for node in Pnode.childNodes: + if node.nodeType == node.TEXT_NODE: + rc = rc + __putBackEntities(node.data) + elif node.nodeType == node.ELEMENT_NODE: + # Decorate the element with namespaces and lang values + for prefix in prefixes: + if prefix in state.ns and not node.hasAttribute("xmlns:%s" % prefix): + node.setAttribute("xmlns:%s" % prefix, "%s" % state.ns[prefix]) + # Set the default namespace, if not done (and is available) + if not node.getAttribute("xmlns") and state.defaultNS != None: + node.setAttribute("xmlns", state.defaultNS) + # Get the lang, if necessary + if not node.getAttribute("xml:lang") and state.lang != None: + node.setAttribute("xml:lang", state.lang) + rc = rc + node.toxml() + return rc + # If XML Literals must be canonicalized for space, then this is the return line: + #return re.sub(r'(\r| |\n|\t)+', " ", rc).strip() + # end getXMLLiteral + + # Most of the times the literal is a 'normal' one, ie, not an XML Literal + retval = True + + # Get the Property URI-s + props = state.get_resources(node.getAttribute("property"), prop=True) + + # Get, if exists, the value of @datatype, and figure out the language + datatype = None + dtset = False + lang = state.lang + if node.hasAttribute("datatype"): + dtset = True + dt = node.getAttribute("datatype") + if dt != "": + datatype = state.get_resource(dt) + lang = None + + # The simple case: separate @content attribute + if node.hasAttribute("content"): + val = node.getAttribute("content") + object = Literal(node.getAttribute("content"), datatype=datatype, lang=lang) + # The value of datatype has been set, and the keyword paramaters take care of the rest + else: + # see if there *is* a datatype (even if it is empty!) + if dtset: + # yep. The Literal content is the pure text part of the current element: + # We have to check whether the specified datatype is, in fact, and + # explicit XML Literal + if datatype == XMLLiteral: + object = Literal(_get_XML_literal(node), datatype=XMLLiteral) + retval = False + else: + object = Literal(_get_literal(node), datatype=datatype, lang=lang) + else: + # no controlling @datatype. We have to see if there is markup in the contained + # element + if True in [ n.nodeType == node.ELEMENT_NODE for n in node.childNodes ]: + # yep, and XML Literal should be generated + object = Literal(_get_XML_literal(node), datatype=XMLLiteral) + retval = False + else: + val = _get_literal(node) + # At this point, there might be entities in the string that are returned as real characters by the dom + # implementation. That should be turned back + object = Literal(_get_literal(node), lang=lang) + + # NOTE: rdflib_<2.5 didn't equal Literal with lang="", hence this check + # proably always passed? + # All tests pass with this check removed; going with that.. + ## The object may be empty, for example in an ill-defined <meta> element... + if True:#object != "": + for prop in props: + graph.add((subject, prop, object)) + + return retval + diff --git a/creactistore/_templates/lib/rdflib_/plugins/parsers/rdfa/literal.py~ b/creactistore/_templates/lib/rdflib_/plugins/parsers/rdfa/literal.py~ new file mode 100644 index 0000000..2ab9b44 --- /dev/null +++ b/creactistore/_templates/lib/rdflib_/plugins/parsers/rdfa/literal.py~ @@ -0,0 +1,180 @@ +# -*- coding: utf-8 -*- +""" +Implementation of the Literal handling. Details of the algorithm are described on +U{RDFa Task Force's wiki page<http://www.w3.org/2006/07/SWD/wiki/RDFa/LiteralObject>}. + +@summary: RDFa Literal generation +@requires: U{RDFLib package<http://rdflib.net>} +@organization: U{World Wide Web Consortium<http://www.w3.org>} +@author: U{Ivan Herman<a href="http://www.w3.org/People/Ivan/">} +@license: This software is available for use under the +U{W3C® SOFTWARE NOTICE AND LICENSE<href="http://www.w3.org/Consortium/Legal/2002/copyright-software-20021231">} +""" + +import re +from rdflib.namespace import RDF +from rdflib.term import Literal + +__all__ = ['generate_literal'] + +XMLLiteral = RDF.XMLLiteral + + +def __putBackEntities(str): + """Put 'back' entities for the '&', '<', and '>' characters, to produce kosher XML string. + Used by XML Literal + @param str: string to be converted + @return: string with entities + @rtype: string + """ + return str.replace('&', '&').replace('<', '<').replace('>', '>') + +#### The real meat... +def generate_literal(node, graph, subject, state): + """Generate the literal the C{@property}, taking into account datatype, etc. + Note: this method is called only if the C{@property} is indeed present, no need to check. + + This method is an encoding of the algorithm documented + U{task force's wiki page<http://www.w3.org/2006/07/SWD/wiki/RDFa/LiteralObject>}. + + The method returns a value whether the literal is a 'normal' literal (regardless of its datatype) + or an XML Literal. The return value is True or False, respectively. This value is used to control whether + the parser should stop recursion. This also means that that if the literal is generated from @content, + the return value is False, regardless of the possible @datatype value. + + @param node: DOM element node + @param graph: the (RDF) graph to add the properies to + @param subject: the RDFLib URIRef serving as a subject for the generated triples + @param state: the current state to be used for the CURIE-s + @type state: L{State.ExecutionContext} + @return: whether the literal is a 'normal' or an XML Literal (return value is True or False, respectively). Note that if the literal is generated from @content, the return value is False, regardless of the possible @datatype value. + @rtype: Boolean + """ + def _get_literal(Pnode): + """ + Get (recursively) the full text from a DOM Node. + + @param Pnode: DOM Node + @return: string + """ + rc = "" + for node in Pnode.childNodes: + if node.nodeType == node.TEXT_NODE: + rc = rc + node.data + elif node.nodeType == node.ELEMENT_NODE: + rc = rc + _get_literal(node) + + # The decision of the group in February 2008 is not to normalize the result by default. + # This is reflected in the default value of the option + if state.options.space_preserve: + return rc + else: + return re.sub(r'(\r| |\n|\t)+', " ", rc).strip() + # end getLiteral + + def _get_XML_literal(Pnode): + """ + Get (recursively) the XML Literal content of a DOM Node. (Most of the processing is done + via a C{node.toxml} call of the xml minidom implementation.) + + @param Pnode: DOM Node + @return: string + """ + def collectPrefixes(prefixes, node): + def addPf(prefx, string): + pf = string.split(':')[0] + if pf != string and pf not in prefx : prefx.append(pf) + # edn addPf + + # first the local name of the node + addPf(prefixes, node.tagName) + # get all the attributes and children + for child in node.childNodes: + if child.nodeType == node.ELEMENT_NODE: + collectPrefixes(prefixes, child) + elif child.nodeType == node.ATTRIBUTE_NODE: + addPf(prefixes, node.child.name) + # end collectPrefixes + + rc = "" + prefixes = [] + for node in Pnode.childNodes: + if node.nodeType == node.ELEMENT_NODE: + collectPrefixes(prefixes, node) + + for node in Pnode.childNodes: + if node.nodeType == node.TEXT_NODE: + rc = rc + __putBackEntities(node.data) + elif node.nodeType == node.ELEMENT_NODE: + # Decorate the element with namespaces and lang values + for prefix in prefixes: + if prefix in state.ns and not node.hasAttribute("xmlns:%s" % prefix): + node.setAttribute("xmlns:%s" % prefix, "%s" % state.ns[prefix]) + # Set the default namespace, if not done (and is available) + if not node.getAttribute("xmlns") and state.defaultNS != None: + node.setAttribute("xmlns", state.defaultNS) + # Get the lang, if necessary + if not node.getAttribute("xml:lang") and state.lang != None: + node.setAttribute("xml:lang", state.lang) + rc = rc + node.toxml() + return rc + # If XML Literals must be canonicalized for space, then this is the return line: + #return re.sub(r'(\r| |\n|\t)+', " ", rc).strip() + # end getXMLLiteral + + # Most of the times the literal is a 'normal' one, ie, not an XML Literal + retval = True + + # Get the Property URI-s + props = state.get_resources(node.getAttribute("property"), prop=True) + + # Get, if exists, the value of @datatype, and figure out the language + datatype = None + dtset = False + lang = state.lang + if node.hasAttribute("datatype"): + dtset = True + dt = node.getAttribute("datatype") + if dt != "": + datatype = state.get_resource(dt) + lang = None + + # The simple case: separate @content attribute + if node.hasAttribute("content"): + val = node.getAttribute("content") + object = Literal(node.getAttribute("content"), datatype=datatype, lang=lang) + # The value of datatype has been set, and the keyword paramaters take care of the rest + else: + # see if there *is* a datatype (even if it is empty!) + if dtset: + # yep. The Literal content is the pure text part of the current element: + # We have to check whether the specified datatype is, in fact, and + # explicit XML Literal + if datatype == XMLLiteral: + object = Literal(_get_XML_literal(node), datatype=XMLLiteral) + retval = False + else: + object = Literal(_get_literal(node), datatype=datatype, lang=lang) + else: + # no controlling @datatype. We have to see if there is markup in the contained + # element + if True in [ n.nodeType == node.ELEMENT_NODE for n in node.childNodes ]: + # yep, and XML Literal should be generated + object = Literal(_get_XML_literal(node), datatype=XMLLiteral) + retval = False + else: + val = _get_literal(node) + # At this point, there might be entities in the string that are returned as real characters by the dom + # implementation. That should be turned back + object = Literal(_get_literal(node), lang=lang) + + # NOTE: rdflib<2.5 didn't equal Literal with lang="", hence this check + # proably always passed? + # All tests pass with this check removed; going with that.. + ## The object may be empty, for example in an ill-defined <meta> element... + if True:#object != "": + for prop in props: + graph.add((subject, prop, object)) + + return retval + diff --git a/creactistore/_templates/lib/rdflib_/plugins/parsers/rdfa/options.py b/creactistore/_templates/lib/rdflib_/plugins/parsers/rdfa/options.py new file mode 100644 index 0000000..05abe3e --- /dev/null +++ b/creactistore/_templates/lib/rdflib_/plugins/parsers/rdfa/options.py @@ -0,0 +1,173 @@ +# -*- coding: utf-8 -*- +""" + +Options class: collect the possible options that govern the parsing possibilities. It also includes a reference and +handling of the extra Graph for warnings, informations, errors. + + +@summary: RDFa parser (distiller) +@requires: U{RDFLib<http://rdflib_.net>} +@requires: U{html5lib<http://code.google.com/p/html5lib/>} for the HTML5 parsing; note possible dependecies on Python's version on the project's web site +@organization: U{World Wide Web Consortium<http://www.w3.org>} +@author: U{Ivan Herman<a href="http://www.w3.org/People/Ivan/">} +@license: This software is available for use under the +U{W3C® SOFTWARE NOTICE AND LICENSE<href="http://www.w3.org/Consortium/Legal/2002/copyright-software-20021231">} + +""" + +import sys +from rdflib_.graph import Graph +from rdflib_.term import BNode, Literal, URIRef +from rdflib_.namespace import Namespace + +__all__ = ['CommentGraph', 'Options'] + +DIST_URI = "http://www.w3.org/2007/08/pyRdfa/distiller" +DIST_NS = DIST_URI + '#' + +ns_errors = Namespace(DIST_NS) +distillerURI = URIRef(DIST_URI) + +WARNING = 'warning' +ERROR = 'error' +INFO = 'info' +DEBUG = 'debug' + +_message_properties = { + WARNING: ns_errors["warning"], + ERROR: ns_errors["error"], + INFO: ns_errors["information"], + DEBUG: ns_errors["debug"] +} + +def _add_to_comment_graph(graph, msg, prop, uri): + """ + Add a distiller message to the graph. + + @param graph: RDFLib Graph + @param msg: message of an exception + @type msg: RDFLIb Literal + @param prop: the property to be used + @type prop: string, must be one of 'warning', 'error', 'info', 'debug' + @param uri: the top URI used to invoke the distiller + @type uri: URIRef + """ + bnode = BNode() + graph.add((distillerURI, _message_properties[prop], bnode)) + graph.add((bnode, ns_errors["onURI"], uri)) + graph.add((bnode, ns_errors["message"], msg)) + + +class CommentGraph(object): + """Class to handle the 'comment graph', ie, the (RDF) Graph containing the warnings, + error messages, and informational messages. + """ + def __init__(self, warnings = False): + """ + @param warnings: whether a graph should effectively be set up, or whether this + should just be an empty shell for the various calls to work (without effect) + """ + if warnings: + self.graph = Graph() + else: + self.graph = None + self.accumulated_literals = [] + self.baseURI = None + + def _add_triple(self, msg, prop): + obj = Literal(msg) + if self.baseURI == None: + self.accumulated_literals.append((obj,prop)) + elif self.graph != None: + _add_to_comment_graph(self.graph, obj, prop, self.baseURI) + + def set_base_URI(self, URI): + """Set the base URI for the comment triples. + + Note that this method I{must} be called at some point to complete the triples. Without it the triples + added via L{add_warning<CommentGraph.add_warning>}, L{add_info<CommentGraph.add_info>}, etc, will not be added to the final graph. + + @param URI: URIRef for the subject of the comments + """ + self.baseURI = URI + if self.graph != None: + for obj, prop in self.accumulated_literals: + _add_to_comment_graph(self.graph, obj, prop, self.baseURI) + self.accumulated_literals = [] + + def add_warning(self, txt): + """Add a warning. A comment triplet is added to the separate "warning" graph. + @param txt: the warning text. It will be preceded by the string "==== pyRdfa Warning ==== " + """ + self._add_triple(txt, WARNING) + + def add_info(self, txt): + """Add an informational comment. A comment triplet is added to the separate "warning" graph. + @param txt: the information text. It will be preceded by the string "==== pyRdfa information ==== " + """ + self._add_triple(txt, INFO) + + def add_error(self, txt): + """Add an error comment. A comment triplet is added to the separate "warning" graph. + @param txt: the information text. It will be preceded by the string "==== pyRdfa information ==== " + """ + self._add_triple(txt, ERROR) + + def _add_debug(self, txt): + self._add_triple(txt, DEBUG) + + +GENERIC_XML = 0 +XHTML_RDFA = 1 +HTML5_RDFA = 2 + +class Options(object): + """Settable options. An instance of this class is stored in + the L{execution context<ExecutionContext>} of the parser. + + @ivar space_preserve: whether plain literals should preserve spaces at output or not + @type space_preserve: Boolean + @ivar comment_graph: Graph for the storage of warnings + @type comment_graph: L{CommentGraph} + @ivar warnings: whether warnings should be generated or not + @type warnings: Boolean + @ivar transformers: extra transformers + @type transformers: list + @type host_language: the host language for the RDFa attributes. Default is XHTML_RDFA, but it can be GENERIC_XML and HTML5_RDFA + @ivar host_language: integer (logically: an enumeration) + @ivar lax: whether a 'lax' parsing of XHTML (ie, HTML5) is allowed. This means that the value of the host language might change run time + @type lax: Boolean + """ + def __init__(self, warnings=False, space_preserve=True, transformers=[], xhtml=True, lax=False): + """ + @param space_preserve: whether plain literals should preserve spaces at output or not + @type space_preserve: Boolean + @param warnings: whether warnings should be generated or not + @type warnings: Boolean + @param transformers: extra transformers + @type transformers: list + @param xhtml: initial value for the host language. If True, the value is set to XHTML_RDFA. Note that run-time the class variable might be set ot HTML5_RDFA, depending on the value of the lax flag and the result of parsing. + @type xhtml: Booelan + @param lax: whether a 'lax' parsing of XHTML (ie, HTML5) is allowed. This means that the value of the host language might change run time + @type lax: Boolean + """ + self.space_preserve = space_preserve + self.transformers = transformers + self.comment_graph = CommentGraph(warnings) + self.warnings = warnings + self.lax = lax + if xhtml: + self.host_language = XHTML_RDFA + else: + self.host_language = GENERIC_XML + + def __str__(self): + retval = """Current options: + space_preserve : %s + warnings : %s + lax parsing : %s + host language : %s + """ + return retval % (self.space_preserve, self.warnings, self.lax, self.host_language) + + diff --git a/creactistore/_templates/lib/rdflib_/plugins/parsers/rdfa/options.py~ b/creactistore/_templates/lib/rdflib_/plugins/parsers/rdfa/options.py~ new file mode 100644 index 0000000..0329969 --- /dev/null +++ b/creactistore/_templates/lib/rdflib_/plugins/parsers/rdfa/options.py~ @@ -0,0 +1,173 @@ +# -*- coding: utf-8 -*- +""" + +Options class: collect the possible options that govern the parsing possibilities. It also includes a reference and +handling of the extra Graph for warnings, informations, errors. + + +@summary: RDFa parser (distiller) +@requires: U{RDFLib<http://rdflib.net>} +@requires: U{html5lib<http://code.google.com/p/html5lib/>} for the HTML5 parsing; note possible dependecies on Python's version on the project's web site +@organization: U{World Wide Web Consortium<http://www.w3.org>} +@author: U{Ivan Herman<a href="http://www.w3.org/People/Ivan/">} +@license: This software is available for use under the +U{W3C® SOFTWARE NOTICE AND LICENSE<href="http://www.w3.org/Consortium/Legal/2002/copyright-software-20021231">} + +""" + +import sys +from rdflib.graph import Graph +from rdflib.term import BNode, Literal, URIRef +from rdflib.namespace import Namespace + +__all__ = ['CommentGraph', 'Options'] + +DIST_URI = "http://www.w3.org/2007/08/pyRdfa/distiller" +DIST_NS = DIST_URI + '#' + +ns_errors = Namespace(DIST_NS) +distillerURI = URIRef(DIST_URI) + +WARNING = 'warning' +ERROR = 'error' +INFO = 'info' +DEBUG = 'debug' + +_message_properties = { + WARNING: ns_errors["warning"], + ERROR: ns_errors["error"], + INFO: ns_errors["information"], + DEBUG: ns_errors["debug"] +} + +def _add_to_comment_graph(graph, msg, prop, uri): + """ + Add a distiller message to the graph. + + @param graph: RDFLib Graph + @param msg: message of an exception + @type msg: RDFLIb Literal + @param prop: the property to be used + @type prop: string, must be one of 'warning', 'error', 'info', 'debug' + @param uri: the top URI used to invoke the distiller + @type uri: URIRef + """ + bnode = BNode() + graph.add((distillerURI, _message_properties[prop], bnode)) + graph.add((bnode, ns_errors["onURI"], uri)) + graph.add((bnode, ns_errors["message"], msg)) + + +class CommentGraph(object): + """Class to handle the 'comment graph', ie, the (RDF) Graph containing the warnings, + error messages, and informational messages. + """ + def __init__(self, warnings = False): + """ + @param warnings: whether a graph should effectively be set up, or whether this + should just be an empty shell for the various calls to work (without effect) + """ + if warnings: + self.graph = Graph() + else: + self.graph = None + self.accumulated_literals = [] + self.baseURI = None + + def _add_triple(self, msg, prop): + obj = Literal(msg) + if self.baseURI == None: + self.accumulated_literals.append((obj,prop)) + elif self.graph != None: + _add_to_comment_graph(self.graph, obj, prop, self.baseURI) + + def set_base_URI(self, URI): + """Set the base URI for the comment triples. + + Note that this method I{must} be called at some point to complete the triples. Without it the triples + added via L{add_warning<CommentGraph.add_warning>}, L{add_info<CommentGraph.add_info>}, etc, will not be added to the final graph. + + @param URI: URIRef for the subject of the comments + """ + self.baseURI = URI + if self.graph != None: + for obj, prop in self.accumulated_literals: + _add_to_comment_graph(self.graph, obj, prop, self.baseURI) + self.accumulated_literals = [] + + def add_warning(self, txt): + """Add a warning. A comment triplet is added to the separate "warning" graph. + @param txt: the warning text. It will be preceded by the string "==== pyRdfa Warning ==== " + """ + self._add_triple(txt, WARNING) + + def add_info(self, txt): + """Add an informational comment. A comment triplet is added to the separate "warning" graph. + @param txt: the information text. It will be preceded by the string "==== pyRdfa information ==== " + """ + self._add_triple(txt, INFO) + + def add_error(self, txt): + """Add an error comment. A comment triplet is added to the separate "warning" graph. + @param txt: the information text. It will be preceded by the string "==== pyRdfa information ==== " + """ + self._add_triple(txt, ERROR) + + def _add_debug(self, txt): + self._add_triple(txt, DEBUG) + + +GENERIC_XML = 0 +XHTML_RDFA = 1 +HTML5_RDFA = 2 + +class Options(object): + """Settable options. An instance of this class is stored in + the L{execution context<ExecutionContext>} of the parser. + + @ivar space_preserve: whether plain literals should preserve spaces at output or not + @type space_preserve: Boolean + @ivar comment_graph: Graph for the storage of warnings + @type comment_graph: L{CommentGraph} + @ivar warnings: whether warnings should be generated or not + @type warnings: Boolean + @ivar transformers: extra transformers + @type transformers: list + @type host_language: the host language for the RDFa attributes. Default is XHTML_RDFA, but it can be GENERIC_XML and HTML5_RDFA + @ivar host_language: integer (logically: an enumeration) + @ivar lax: whether a 'lax' parsing of XHTML (ie, HTML5) is allowed. This means that the value of the host language might change run time + @type lax: Boolean + """ + def __init__(self, warnings=False, space_preserve=True, transformers=[], xhtml=True, lax=False): + """ + @param space_preserve: whether plain literals should preserve spaces at output or not + @type space_preserve: Boolean + @param warnings: whether warnings should be generated or not + @type warnings: Boolean + @param transformers: extra transformers + @type transformers: list + @param xhtml: initial value for the host language. If True, the value is set to XHTML_RDFA. Note that run-time the class variable might be set ot HTML5_RDFA, depending on the value of the lax flag and the result of parsing. + @type xhtml: Booelan + @param lax: whether a 'lax' parsing of XHTML (ie, HTML5) is allowed. This means that the value of the host language might change run time + @type lax: Boolean + """ + self.space_preserve = space_preserve + self.transformers = transformers + self.comment_graph = CommentGraph(warnings) + self.warnings = warnings + self.lax = lax + if xhtml: + self.host_language = XHTML_RDFA + else: + self.host_language = GENERIC_XML + + def __str__(self): + retval = """Current options: + space_preserve : %s + warnings : %s + lax parsing : %s + host language : %s + """ + return retval % (self.space_preserve, self.warnings, self.lax, self.host_language) + + diff --git a/creactistore/_templates/lib/rdflib_/plugins/parsers/rdfa/parse.py b/creactistore/_templates/lib/rdflib_/plugins/parsers/rdfa/parse.py new file mode 100644 index 0000000..b786f7f --- /dev/null +++ b/creactistore/_templates/lib/rdflib_/plugins/parsers/rdfa/parse.py @@ -0,0 +1,200 @@ +# -*- coding: utf-8 -*- +""" +The core parsing function of RDFa. Some details are +put into other modules to make it clearer to update/modify (eg, generation of literals, or managing the current state). + +@summary: RDFa core parser processing step +@requires: U{RDFLib package<http://rdflib_.net>} +@organization: U{World Wide Web Consortium<http://www.w3.org>} +@author: U{Ivan Herman<a href="http://www.w3.org/People/Ivan/">} +@license: This software is available for use under the +U{W3C® SOFTWARE NOTICE AND LICENSE<href="http://www.w3.org/Consortium/Legal/2002/copyright-software-20021231">} +""" + +from rdflib_.term import BNode, URIRef +from rdflib_.namespace import RDF + +from rdflib_.plugins.parsers.rdfa.state import ExecutionContext +from rdflib_.plugins.parsers.rdfa.literal import generate_literal +from rdflib_.plugins.parsers.rdfa.embeddedrdf import handle_embeddedRDF +from rdflib_.plugins.parsers.rdfa.options import GENERIC_XML, XHTML_RDFA, HTML5_RDFA + +__all__ = ['parse_one_node'] + +def parse_one_node(node, graph, parent_object, incoming_state, parent_incomplete_triples): + """The (recursive) step of handling a single node. See the + U{RDFa syntax document<http://www.w3.org/TR/rdfa-syntax>} for further details. + + @param node: the DOM node to handle + @param graph: the RDF graph + @type graph: RDFLib's Graph object instance + @param parent_object: the parent's object, as an RDFLib URIRef + @param incoming_state: the inherited state (namespaces, lang, etc) + @type incoming_state: L{State.ExecutionContext} + @param parent_incomplete_triples: list of hanging triples (the missing resource set to None) to be handled (or not) + by the current node. + @return: whether the caller has to complete it's parent's incomplete triples + @rtype: Boolean + """ + def _get_resources_for_attr(attr): + """Get a series of resources encoded via CURIE-s for an attribute on a specific node. + @param attr: the name of the attribute + @return: a list of RDFLib URIRef instances + """ + if not node.hasAttribute(attr): + return [] + else: + rel = (attr == "rel") or (attr == "rev") + prop = (attr == "property") + return state.get_resources(node.getAttribute(attr), rel, prop) + + # Update the state. This means, for example, the possible local settings of + # namespaces and lang + state = ExecutionContext(node, graph, inherited_state=incoming_state) + + #--------------------------------------------------------------------------------- + # Handle the special case for embedded RDF, eg, in SVG1.2. + # This may add some triples to the target graph that does not originate from RDFa parsing + # If the function return TRUE, that means that an rdf:RDF has been found. No + # RDFa parsing should be done on that subtree, so we simply return... + if state.options.host_language == GENERIC_XML and node.nodeType == node.ELEMENT_NODE and handle_embeddedRDF(node, graph, state): + return + + #--------------------------------------------------------------------------------- + # First, let us check whether there is anything to do at all. Ie, + # whether there is any relevant RDFa specific attribute on the element + # + if not _has_one_of_attributes(node, "href", "resource", "about", "property", "rel", "rev", "typeof", "src"): + # nop, there is nothing to do here, just go down the tree and return... + for n in node.childNodes: + if n.nodeType == node.ELEMENT_NODE : parse_one_node(n, graph, parent_object, state, parent_incomplete_triples) + return + + + #----------------------------------------------------------------- + # The goal is to establish the subject and object for local processing + # The behaviour is slightly different depending on the presense or not + # of the @rel/@rev attributes + current_subject = None + current_object = None + + if _has_one_of_attributes(node, "rel", "rev"): + # in this case there is the notion of 'left' and 'right' of @rel/@rev + # in establishing the new Subject and the objectResource + + # set first the subject + if node.hasAttribute("about"): + current_subject = state.get_Curie_ref(node.getAttribute("about")) + elif node.hasAttribute("src"): + current_subject = state.get_URI_ref(node.getAttribute("src")) + elif node.hasAttribute("typeof"): + current_subject = BNode() + + # get_URI_ref may return None in case of an illegal Curie, so + # we have to be careful here, not use only an 'else' + if current_subject == None: + current_subject = parent_object + + # set the object resource + if node.hasAttribute("resource"): + current_object = state.get_Curie_ref(node.getAttribute("resource")) + elif node.hasAttribute("href"): + current_object = state.get_URI_ref(node.getAttribute("href")) + else: + # in this case all the various 'resource' setting attributes + # behave identically, except that their value might be different + # in terms of CURIE-s and they also have their own priority, of course + if node.hasAttribute("about"): + current_subject = state.get_Curie_ref(node.getAttribute("about")) + elif node.hasAttribute("src"): + current_subject = state.get_URI_ref(node.getAttribute("src")) + elif node.hasAttribute("resource"): + current_subject = state.get_Curie_ref(node.getAttribute("resource")) + elif node.hasAttribute("href"): + current_subject = state.get_URI_ref(node.getAttribute("href")) + elif node.hasAttribute("typeof"): + current_subject = BNode() + + # get_URI_ref may return None in case of an illegal Curie, so + # we have to be careful here, not use only an 'else' + if current_subject == None: + current_subject = parent_object + + # in this case no non-literal triples will be generated, so the + # only role of the current_objectResource is to be transferred to + # the children node + current_object = current_subject + + # --------------------------------------------------------------------- + # The possible typeof indicates a number of type statements on the newSubject + for defined_type in _get_resources_for_attr("typeof"): + graph.add((current_subject, RDF.type, defined_type)) + + # --------------------------------------------------------------------- + # In case of @rel/@rev, either triples or incomplete triples are generated + # the (possible) incomplete triples are collected, to be forwarded to the children + incomplete_triples = [] + for prop in _get_resources_for_attr("rel"): + theTriple = (current_subject, prop, current_object) + if current_object != None: + graph.add(theTriple) + else: + incomplete_triples.append(theTriple) + for prop in _get_resources_for_attr("rev"): + theTriple = (current_object, prop, current_subject) + if current_object != None: + graph.add(theTriple) + else: + incomplete_triples.append(theTriple) + + # ---------------------------------------------------------------------- + # Generation of the literal values. The newSubject is the subject + # A particularity of property is that it stops the parsing down the DOM tree if an XML Literal is generated, + # because everything down there is part of the generated literal. For this purpose the recurse flag is set (and used later + # in the parsing process). + if node.hasAttribute("property"): + # Generate the literal. It has been put it into a separate module to make it more managable + # the overall return value should be set to true if any valid triple has been generated + recurse = generate_literal(node, graph, current_subject, state) + else: + recurse = True + + # ---------------------------------------------------------------------- + # Setting the current object to a bnode is setting up a possible resource + # for the incomplete triples downwards + if current_object == None: + object_to_children = BNode() + else: + object_to_children = current_object + + #----------------------------------------------------------------------- + # Here is the recursion step for all the children + if recurse: + for n in node.childNodes: + if n.nodeType == node.ELEMENT_NODE: + parse_one_node(n, graph, object_to_children, state, incomplete_triples) + + # --------------------------------------------------------------------- + # At this point, the parent's incomplete triples may be completed + for s, p, o in parent_incomplete_triples: + if s == None: s = current_subject + if o == None: o = current_subject + graph.add((s, p, o)) + + # ------------------------------------------------------------------- + # This should be it... + # ------------------------------------------------------------------- + return + + +def _has_one_of_attributes(node, *args): + """ + Check whether one of the listed attributes is present on a (DOM) node. + @param node: DOM element node + @param args: possible attribute names + @return: True or False + @rtype: Boolean + """ + return True in [ node.hasAttribute(attr) for attr in args ] + + diff --git a/creactistore/_templates/lib/rdflib_/plugins/parsers/rdfa/parse.py~ b/creactistore/_templates/lib/rdflib_/plugins/parsers/rdfa/parse.py~ new file mode 100644 index 0000000..d5b411f --- /dev/null +++ b/creactistore/_templates/lib/rdflib_/plugins/parsers/rdfa/parse.py~ @@ -0,0 +1,200 @@ +# -*- coding: utf-8 -*- +""" +The core parsing function of RDFa. Some details are +put into other modules to make it clearer to update/modify (eg, generation of literals, or managing the current state). + +@summary: RDFa core parser processing step +@requires: U{RDFLib package<http://rdflib.net>} +@organization: U{World Wide Web Consortium<http://www.w3.org>} +@author: U{Ivan Herman<a href="http://www.w3.org/People/Ivan/">} +@license: This software is available for use under the +U{W3C® SOFTWARE NOTICE AND LICENSE<href="http://www.w3.org/Consortium/Legal/2002/copyright-software-20021231">} +""" + +from rdflib.term import BNode, URIRef +from rdflib.namespace import RDF + +from rdflib.plugins.parsers.rdfa.state import ExecutionContext +from rdflib.plugins.parsers.rdfa.literal import generate_literal +from rdflib.plugins.parsers.rdfa.embeddedrdf import handle_embeddedRDF +from rdflib.plugins.parsers.rdfa.options import GENERIC_XML, XHTML_RDFA, HTML5_RDFA + +__all__ = ['parse_one_node'] + +def parse_one_node(node, graph, parent_object, incoming_state, parent_incomplete_triples): + """The (recursive) step of handling a single node. See the + U{RDFa syntax document<http://www.w3.org/TR/rdfa-syntax>} for further details. + + @param node: the DOM node to handle + @param graph: the RDF graph + @type graph: RDFLib's Graph object instance + @param parent_object: the parent's object, as an RDFLib URIRef + @param incoming_state: the inherited state (namespaces, lang, etc) + @type incoming_state: L{State.ExecutionContext} + @param parent_incomplete_triples: list of hanging triples (the missing resource set to None) to be handled (or not) + by the current node. + @return: whether the caller has to complete it's parent's incomplete triples + @rtype: Boolean + """ + def _get_resources_for_attr(attr): + """Get a series of resources encoded via CURIE-s for an attribute on a specific node. + @param attr: the name of the attribute + @return: a list of RDFLib URIRef instances + """ + if not node.hasAttribute(attr): + return [] + else: + rel = (attr == "rel") or (attr == "rev") + prop = (attr == "property") + return state.get_resources(node.getAttribute(attr), rel, prop) + + # Update the state. This means, for example, the possible local settings of + # namespaces and lang + state = ExecutionContext(node, graph, inherited_state=incoming_state) + + #--------------------------------------------------------------------------------- + # Handle the special case for embedded RDF, eg, in SVG1.2. + # This may add some triples to the target graph that does not originate from RDFa parsing + # If the function return TRUE, that means that an rdf:RDF has been found. No + # RDFa parsing should be done on that subtree, so we simply return... + if state.options.host_language == GENERIC_XML and node.nodeType == node.ELEMENT_NODE and handle_embeddedRDF(node, graph, state): + return + + #--------------------------------------------------------------------------------- + # First, let us check whether there is anything to do at all. Ie, + # whether there is any relevant RDFa specific attribute on the element + # + if not _has_one_of_attributes(node, "href", "resource", "about", "property", "rel", "rev", "typeof", "src"): + # nop, there is nothing to do here, just go down the tree and return... + for n in node.childNodes: + if n.nodeType == node.ELEMENT_NODE : parse_one_node(n, graph, parent_object, state, parent_incomplete_triples) + return + + + #----------------------------------------------------------------- + # The goal is to establish the subject and object for local processing + # The behaviour is slightly different depending on the presense or not + # of the @rel/@rev attributes + current_subject = None + current_object = None + + if _has_one_of_attributes(node, "rel", "rev"): + # in this case there is the notion of 'left' and 'right' of @rel/@rev + # in establishing the new Subject and the objectResource + + # set first the subject + if node.hasAttribute("about"): + current_subject = state.get_Curie_ref(node.getAttribute("about")) + elif node.hasAttribute("src"): + current_subject = state.get_URI_ref(node.getAttribute("src")) + elif node.hasAttribute("typeof"): + current_subject = BNode() + + # get_URI_ref may return None in case of an illegal Curie, so + # we have to be careful here, not use only an 'else' + if current_subject == None: + current_subject = parent_object + + # set the object resource + if node.hasAttribute("resource"): + current_object = state.get_Curie_ref(node.getAttribute("resource")) + elif node.hasAttribute("href"): + current_object = state.get_URI_ref(node.getAttribute("href")) + else: + # in this case all the various 'resource' setting attributes + # behave identically, except that their value might be different + # in terms of CURIE-s and they also have their own priority, of course + if node.hasAttribute("about"): + current_subject = state.get_Curie_ref(node.getAttribute("about")) + elif node.hasAttribute("src"): + current_subject = state.get_URI_ref(node.getAttribute("src")) + elif node.hasAttribute("resource"): + current_subject = state.get_Curie_ref(node.getAttribute("resource")) + elif node.hasAttribute("href"): + current_subject = state.get_URI_ref(node.getAttribute("href")) + elif node.hasAttribute("typeof"): + current_subject = BNode() + + # get_URI_ref may return None in case of an illegal Curie, so + # we have to be careful here, not use only an 'else' + if current_subject == None: + current_subject = parent_object + + # in this case no non-literal triples will be generated, so the + # only role of the current_objectResource is to be transferred to + # the children node + current_object = current_subject + + # --------------------------------------------------------------------- + # The possible typeof indicates a number of type statements on the newSubject + for defined_type in _get_resources_for_attr("typeof"): + graph.add((current_subject, RDF.type, defined_type)) + + # --------------------------------------------------------------------- + # In case of @rel/@rev, either triples or incomplete triples are generated + # the (possible) incomplete triples are collected, to be forwarded to the children + incomplete_triples = [] + for prop in _get_resources_for_attr("rel"): + theTriple = (current_subject, prop, current_object) + if current_object != None: + graph.add(theTriple) + else: + incomplete_triples.append(theTriple) + for prop in _get_resources_for_attr("rev"): + theTriple = (current_object, prop, current_subject) + if current_object != None: + graph.add(theTriple) + else: + incomplete_triples.append(theTriple) + + # ---------------------------------------------------------------------- + # Generation of the literal values. The newSubject is the subject + # A particularity of property is that it stops the parsing down the DOM tree if an XML Literal is generated, + # because everything down there is part of the generated literal. For this purpose the recurse flag is set (and used later + # in the parsing process). + if node.hasAttribute("property"): + # Generate the literal. It has been put it into a separate module to make it more managable + # the overall return value should be set to true if any valid triple has been generated + recurse = generate_literal(node, graph, current_subject, state) + else: + recurse = True + + # ---------------------------------------------------------------------- + # Setting the current object to a bnode is setting up a possible resource + # for the incomplete triples downwards + if current_object == None: + object_to_children = BNode() + else: + object_to_children = current_object + + #----------------------------------------------------------------------- + # Here is the recursion step for all the children + if recurse: + for n in node.childNodes: + if n.nodeType == node.ELEMENT_NODE: + parse_one_node(n, graph, object_to_children, state, incomplete_triples) + + # --------------------------------------------------------------------- + # At this point, the parent's incomplete triples may be completed + for s, p, o in parent_incomplete_triples: + if s == None: s = current_subject + if o == None: o = current_subject + graph.add((s, p, o)) + + # ------------------------------------------------------------------- + # This should be it... + # ------------------------------------------------------------------- + return + + +def _has_one_of_attributes(node, *args): + """ + Check whether one of the listed attributes is present on a (DOM) node. + @param node: DOM element node + @param args: possible attribute names + @return: True or False + @rtype: Boolean + """ + return True in [ node.hasAttribute(attr) for attr in args ] + + diff --git a/creactistore/_templates/lib/rdflib_/plugins/parsers/rdfa/state.py b/creactistore/_templates/lib/rdflib_/plugins/parsers/rdfa/state.py new file mode 100644 index 0000000..0e6be08 --- /dev/null +++ b/creactistore/_templates/lib/rdflib_/plugins/parsers/rdfa/state.py @@ -0,0 +1,434 @@ +# -*- coding: utf-8 -*- +""" +Parser's execution context (a.k.a. state) object and handling. The state includes: + + - dictionary for namespaces. Keys are the namespace prefixes, values are RDFLib Namespace instances + - language, retrieved from C{@xml:lang} + - URI base, determined by <base> (or set explicitly). This is a little bit superfluous, because the current RDFa syntax does not make use of C{@xml:base}; ie, this could be a global value. But the structure is prepared to add C{@xml:base} easily, if needed. + - options, in the form of an L{Options<pyRdfa.Options>} instance + +The execution context object is also used to turn relative URI-s and CURIES into real URI references. + +@summary: RDFa core parser processing step +@requires: U{RDFLib package<http://rdflib_.net>} +@organization: U{World Wide Web Consortium<http://www.w3.org>} +@author: U{Ivan Herman<a href="http://www.w3.org/People/Ivan/">} +@license: This software is available for use under the +U{W3C® SOFTWARE NOTICE AND LICENSE<href="http://www.w3.org/Consortium/Legal/2002/copyright-software-20021231">} + +@var XHTML_PREFIX: prefix for the XHTML vocabulary namespace +@var XHTML_URI: URI prefix of the XHTML vocabulary +@var RDFa_PROFILE: the official RDFa profile URI +@var RDFa_VERSION: the official version string of RDFa +@var usual_protocols: list of "usual" protocols (used to generate warnings when CURIES are not protected) +@var _predefined_rel: list of predefined C{@rev} and C{@rel} values that should be mapped onto the XHTML vocabulary URI-s. +@var _predefined_property: list of predefined C{@property} values that should be mapped onto the XHTML vocabulary URI-s. (At present, this list is empty, but this has been an ongoing question in the group, so the I{mechanism} of checking is still there.) +@var __bnodes: dictionary of blank node names to real blank node +@var __empty_bnode: I{The} Bnode to be associated with the CURIE of the form "C{_:}". +""" + +from rdflib_.namespace import Namespace, RDF, RDFS +from rdflib_.term import BNode, URIRef +from rdflib_.plugins.parsers.rdfa.options import Options, GENERIC_XML, XHTML_RDFA, HTML5_RDFA + +import re +import random +import urlparse + +__all__ = ['ExecutionContext'] + +RDFa_PROFILE = "http://www.w3.org/1999/xhtml/vocab" +RDFa_VERSION = "XHTML+RDFa 1.0" +RDFa_PublicID = "-//W3C//DTD XHTML+RDFa 1.0//EN" +RDFa_SystemID = "http://www.w3.org/MarkUp/DTD/xhtml-rdfa-1.dtd" + +usual_protocols = ["http", "https", "mailto", "ftp", "urn", "gopher", "tel", "ldap", "doi", "news"] + +####Predefined @rel/@rev/@property values +# predefined values for the @rel and @rev values. These are considered to be part of a specific +# namespace, defined by the RDFa document. +# At the moment, there are no predefined @property values, but the code is there in case +# some will be defined +XHTML_PREFIX = "xhv" +XHTML_URI = "http://www.w3.org/1999/xhtml/vocab#" + +_predefined_rel = ['alternate', 'appendix', 'cite', 'bookmark', 'chapter', 'contents', +'copyright', 'glossary', 'help', 'icon', 'index', 'meta', 'next', 'p3pv1', 'prev', +'role', 'section', 'subsection', 'start', 'license', 'up', 'last', 'stylesheet', 'first', 'top'] + +_predefined_property = [] + +#### Managing blank nodes for CURIE-s +__bnodes = {} +__empty_bnode = BNode() +def _get_bnode_from_Curie(var): + """ + 'Var' gives the string after the coloumn in a CURIE of the form C{_:XXX}. If this variable has been used + before, then the corresponding BNode is returned; otherwise a new BNode is created and + associated to that value. + @param var: CURIE BNode identifier + @return: BNode + """ + if len(var) == 0: + return __empty_bnode + if var in __bnodes: + return __bnodes[var] + else: + retval = BNode() + __bnodes[var] = retval + return retval + +#### Quote URI-s +import urllib +# 'safe' characters for the URI quoting, ie, characters that can safely stay as they are. Other +# special characters are converted to their %.. equivalents for namespace prefixes +_unquotedChars = ':/\?=#' +_warnChars = [' ', '\n', '\r', '\t'] +def _quote(uri, options): + """ + 'quote' a URI, ie, exchange special characters for their '%..' equivalents. Some of the characters + may stay as they are (listed in L{_unquotedChars}. If one of the characters listed in L{_warnChars} + is also in the uri, an extra warning is also generated. + @param uri: URI + @param options: + @type options: L{Options<pyRdfa.Options>} + """ + suri = uri.strip() + for c in _warnChars: + if suri.find(c) != -1: + if options != None: + options.comment_graph.add_warning('Unusual character in uri:%s; possible error?' % suri) + break + return urllib.quote(suri, _unquotedChars) + + +#### Core Class definition +class ExecutionContext(object): + """State at a specific node, including the current set + of namespaces in the RDFLib sense, the + current language, and the base. The class is also used to interpret URI-s and CURIE-s to produce + URI references for RDFLib. + + @ivar options: reference to the overall options + @type ivar: L{Options.Options} + @ivar base: the 'base' URI + @ivar defaultNS: default namespace + @ivar lang: language tag (possibly None) + @ivar ns: dictionary of namespaces + @type ns: dictionary, each value is an RDFLib Namespace object + + """ + def __init__(self, node, graph, inherited_state=None, base="", options=None): + """ + @param node: the current DOM Node + @param graph: the RDFLib Graph + @keyword inherited_state: the state as inherited + from upper layers. This inherited_state is mixed with the state information + retrieved from the current node. + @type inherited_state: L{State.ExecutionContext} + @keyword base: string denoting the base URI for the specific node. This overrides the possible + base inherited from the upper layers. The + current XHTML+RDFa syntax does not allow the usage of C{@xml:base}, but SVG1.2 does, so this is + necessary for SVG (and other possible XML dialects that accept C{@xml:base}) + @keyword options: invocation option + @type options: L{Options<pyRdfa.Options>} + """ + #----------------------------------------------------------------- + # settling the base + # note that, strictly speaking, it is not necessary to add the base to the + # context, because there is only one place to set it (<base> element of the <header>). + # It is done because it is prepared for a possible future change in direction of + # accepting xml:base on each element. + # At the moment, it is invoked with a 'None' at the top level of parsing, that is + # when the <base> element is looked for. + if inherited_state: + self.base = inherited_state.base + self.options = inherited_state.options + # for generic XML versions the xml:base attribute should be handled + if self.options.host_language == GENERIC_XML and node.hasAttribute("xml:base"): + self.base = node.getAttribute("xml:base") + else: + # this is the branch called from the very top + self.base = "" + for bases in node.getElementsByTagName("base"): + if bases.hasAttribute("href"): + self.base = bases.getAttribute("href") + continue + if self.base == "": + self.base = base + + # this is just to play safe. I believe this branch should actually not happen... + if options == None: + from pyRdfa import Options + self.options = Options() + else: + self.options = options + + # xml:base is not part of XHTML+RDFa, but it is a valid setting for, say, SVG1.2 + if self.options.host_language == GENERIC_XML and node.hasAttribute("xml:base"): + self.base = node.getAttribute("xml:base") + + self.options.comment_graph.set_base_URI(URIRef(_quote(base, self.options))) + + # check the the presense of the @profile and or @version attribute for the RDFa profile... + # This whole branch is, however, irrelevant if the host language is a generic XML one (eg, SVG) + if self.options.host_language != GENERIC_XML: + doctype = None + try: + # I am not 100% sure the HTML5 minidom implementation has this, so let us just be + # cautious here... + doctype = node.ownerDocument.doctype + except: + pass + if doctype == None or not( doctype.publicId == RDFa_PublicID and doctype.systemId == RDFa_SystemID ): + # next level: check the version + html = node.ownerDocument.documentElement + if not( html.hasAttribute("version") and RDFa_VERSION == html.getAttribute("version") ): + # see if least the profile has been set + # Find the <head> element + head = None + for index in range(0, html.childNodes.length-1): + if html.childNodes.item(index).nodeName == "head": + head = html.childNodes.item(index) + break + if not( head != None and head.hasAttribute("profile") and RDFa_PROFILE in head.getAttribute("profile").strip().split() ): + if self.options.host_language == HTML5_RDFA: + self.options.comment_graph.add_info("RDFa profile or RFDa version has not been set (for a correct identification of RDFa). This is not a requirement for RDFa, but it is advised to use one of those nevertheless. Note that in the case of HTML5, the DOCTYPE setting may not work...") + else: + self.options.comment_graph.add_info("None of the RDFa DOCTYPE, RDFa profile, or RFDa version has been set (for a correct identification of RDFa). This is not a requirement for RDFa, but it is advised to use one of those nevertheless.") + + #----------------------------------------------------------------- + # Stripping the fragment ID from the base URI, as demanded by RFC 3986 + self.base = urlparse.urldefrag(self.base)[0] + + #----------------------------------------------------------------- + # Settling the language tags + # check first the lang or xml:lang attribute + # RDFa does not allow the lang attribute. HTML5 relies :-( on @lang; + # I just want to be prepared here... + if options != None and options.host_language == HTML5_RDFA and node.hasAttribute("lang"): + self.lang = node.getAttribute("lang") + if len(self.lang) == 0 : self.lang = None + elif node.hasAttribute("xml:lang"): + self.lang = node.getAttribute("xml:lang") + if len(self.lang) == 0 : self.lang = None + elif inherited_state: + self.lang = inherited_state.lang + else: + self.lang = None + + #----------------------------------------------------------------- + # Handling namespaces + # First get the local xmlns declarations/namespaces stuff. + dict = {} + for i in range(0, node.attributes.length): + attr = node.attributes.item(i) + if attr.name.find('xmlns:') == 0 : + # yep, there is a namespace setting + key = attr.localName + if key != "" : # exclude the top level xmlns setting... + if key == "_": + if warning: self.options.comment_graph.add_error("The '_' local CURIE prefix is reserved for blank nodes, and cannot be changed" ) + elif key.find(':') != -1: + if warning: self.options.comment_graph.add_error("The character ':' is not valid in a CURIE Prefix" ) + else : + # quote the URI, ie, convert special characters into %.. This is + # true, for example, for spaces + uri = _quote(attr.value, self.options) + # 1. create a new Namespace entry + ns = Namespace(uri) + # 2. 'bind' it in the current graph to + # get a nicer output + graph.bind(key, uri) + # 3. Add an entry to the dictionary + dict[key] = ns + + # See if anything has been collected at all. + # If not, the namespaces of the incoming state is + # taken over + self.ns = {} + if len(dict) == 0 and inherited_state: + self.ns = inherited_state.ns + else: + if inherited_state: + for k in inherited_state.ns : self.ns[k] = inherited_state.ns[k] + # copying the newly found namespace, possibly overwriting + # incoming values + for k in dict : self.ns[k] = dict[k] + else: + self.ns = dict + + # see if the xhtml core vocabulary has been set + self.xhtml_prefix = None + for key in self.ns.keys(): + if XHTML_URI == str(self.ns[key]): + self.xhtml_prefix = key + break + if self.xhtml_prefix == None: + if XHTML_PREFIX not in self.ns: + self.ns[XHTML_PREFIX] = Namespace(XHTML_URI) + self.xhtml_prefix = XHTML_PREFIX + else: + # the most disagreeable thing, the user has used + # the prefix for something else... + self.xhtml_prefix = XHTML_PREFIX + '_' + ("%d" % random.randint(1, 1000)) + self.ns[self.xhtml_prefix] = Namespace(XHTML_URI) + graph.bind(self.xhtml_prefix, XHTML_URI) + + # extra tricks for unusual usages... + # if the 'rdf' prefix is not used, it is artificially added... + if "rdf" not in self.ns: + self.ns["rdf"] = RDF + if "rdfs" not in self.ns: + self.ns["rdfs"] = RDFS + + # Final touch: setting the default namespace... + if node.hasAttribute("xmlns"): + self.defaultNS = node.getAttribute("xmlns") + elif inherited_state and inherited_state.defaultNS != None: + self.defaultNS = inherited_state.defaultNS + else: + self.defaultNS = None + + def _get_predefined_rels(self, val, warning): + """Get the predefined URI value for the C{@rel/@rev} attribute. + @param val: attribute name + @param warning: whether a warning should be generated or not + @type warning: boolean + @return: URIRef for the predefined URI (or None) + """ + vv = val.strip().lower() + if vv in _predefined_rel: + return self.ns[self.xhtml_prefix][vv] + else: + if warning: self.options.comment_graph.add_warning("invalid @rel/@rev value: '%s'" % val) + return None + + def _get_predefined_properties(self, val, warning): + """Get the predefined value for the C{@property} attribute. + @param val: attribute name + @param warning: whether a warning should be generated or not + @type warning: boolean + @return: URIRef for the predefined URI (or None) + """ + vv = val.strip().lower() + if vv in _predefined_property: + return self.ns[self.xhtml_prefix][vv] + else: + if warning: self.options.comment_graph.add_warning("invalid @property value: '%s'" % val) + return None + + def get_resource(self, val, rel=False, prop=False, warning=True): + """Get a resource for a CURIE. + The input argument is a CURIE; this is interpreted + via the current namespaces and the corresponding URI Reference is returned + @param val: string of the form "prefix:lname" + @keyword rel: whether the predefined C{@rel/@rev} values should also be interpreted + @keyword prop: whether the predefined C{@property} values should also be interpreted + @return: an RDFLib URIRef instance (or None) + """ + if val == "": + return None + elif val.find(":") != -1: + key = val.split(":", 1)[0] + lname = val.split(":", 1)[1] + if key == "_": + # A possible error: this method is invoked for property URI-s, which + # should not refer to a blank node. This case is checked and a possible + # error condition is handled + self.options.comment_graph.add_error("Blank node CURIE cannot be used in property position: _:%s" % lname) + return None + if key == "": + # This is the ":blabla" case + key = self.xhtml_prefix + else: + # if the resources correspond to a @rel or @rev or @property, then there + # may be one more possibility here, namely that it is one of the + # predefined values + if rel: + return self._get_predefined_rels(val, warning) + elif prop: + return self._get_predefined_properties(val, warning) + else: + self.options.comment_graph.add_warning("Invalid CURIE (without prefix): '%s'" % val) + return None + + if key not in self.ns: + self.options.comment_graph.add_error("CURIE used with non declared prefix: %s" % key) + return None + else: + if lname == "": + return URIRef(str(self.ns[key])) + else: + return self.ns[key][lname] + + def get_resources(self, val, rel=False, prop=False): + """Get a series of resources encoded in CURIE-s. + The input argument is a list of CURIE-s; these are interpreted + via the current namespaces and the corresponding URI References are returned. + @param val: strings of the form prefix':'lname, separated by space + @keyword rel: whether the predefined C{@rel/@rev} values should also be interpreted + @keyword prop: whether the predefined C{@property} values should also be interpreted + @return: a list of RDFLib URIRef instances (possibly empty) + """ + val.strip() + resources = [ self.get_resource(v, rel, prop) for v in val.split() if v != None ] + return [ r for r in resources if r != None ] + + def get_URI_ref(self, val): + """Create a URI RDFLib resource for a URI. + The input argument is a URI. It is checked whether it is a local + reference with a '#' or not. If yes, a URIRef combined with the + stored base value is returned. In both cases a URIRef for a full URI is created + and returned + @param val: URI string + @return: an RDFLib URIRef instance + """ + if val == "": + return URIRef(self.base) + elif val[0] == '[' and val[-1] == ']': + self.options.comment_graph.add_error("Illegal usage of CURIE: %s" % val) + return None + else: + return URIRef(urlparse.urljoin(self.base, val)) + + def get_Curie_ref(self, val): + """Create a URI RDFLib resource for a CURIE. + The input argument is a CURIE. This means that it is: + - either of the form [a:b] where a:b should be resolved as an + 'unprotected' CURIE, or + - it is a traditional URI (relative or absolute) + + If the second case the URI value is also compared to 'usual' URI + protocols ('http', 'https', 'ftp', etc) (see L{usual_protocols}). + If there is no match, a warning is generated (indeed, a frequent + mistake in authoring RDFa is to forget the '[' and ']' characters to + "protect" CURIE-s.) + + @param val: CURIE string + @return: an RDFLib URIRef instance + """ + if len(val) == 0: + return URIRef(self.base) + elif val[0] == "[": + if val[-1] == "]": + curie = val[1:-1] + # A possible Blank node reference should be separated here: + if len(curie) >= 2 and curie[0] == "_" and curie[1] == ":": + return _get_bnode_from_Curie(curie[2:]) + else: + return self.get_resource(val[1:-1]) + else: + # illegal CURIE... + self.options.comment_graph.add_error("Illegal CURIE: %s" % val) + return None + else: + # check the value, to see if an error may have been made... + # Usual protocol values in the URI + v = val.strip().lower() + protocol = urlparse.urlparse(val)[0] + if protocol != "" and protocol not in usual_protocols: + err = "Possible URI error with '%s'; the intention may have been to use a protected CURIE" % val + self.options.comment_graph.add_warning(err) + return self.get_URI_ref(val) + diff --git a/creactistore/_templates/lib/rdflib_/plugins/parsers/rdfa/state.py~ b/creactistore/_templates/lib/rdflib_/plugins/parsers/rdfa/state.py~ new file mode 100644 index 0000000..31caf41 --- /dev/null +++ b/creactistore/_templates/lib/rdflib_/plugins/parsers/rdfa/state.py~ @@ -0,0 +1,434 @@ +# -*- coding: utf-8 -*- +""" +Parser's execution context (a.k.a. state) object and handling. The state includes: + + - dictionary for namespaces. Keys are the namespace prefixes, values are RDFLib Namespace instances + - language, retrieved from C{@xml:lang} + - URI base, determined by <base> (or set explicitly). This is a little bit superfluous, because the current RDFa syntax does not make use of C{@xml:base}; ie, this could be a global value. But the structure is prepared to add C{@xml:base} easily, if needed. + - options, in the form of an L{Options<pyRdfa.Options>} instance + +The execution context object is also used to turn relative URI-s and CURIES into real URI references. + +@summary: RDFa core parser processing step +@requires: U{RDFLib package<http://rdflib.net>} +@organization: U{World Wide Web Consortium<http://www.w3.org>} +@author: U{Ivan Herman<a href="http://www.w3.org/People/Ivan/">} +@license: This software is available for use under the +U{W3C® SOFTWARE NOTICE AND LICENSE<href="http://www.w3.org/Consortium/Legal/2002/copyright-software-20021231">} + +@var XHTML_PREFIX: prefix for the XHTML vocabulary namespace +@var XHTML_URI: URI prefix of the XHTML vocabulary +@var RDFa_PROFILE: the official RDFa profile URI +@var RDFa_VERSION: the official version string of RDFa +@var usual_protocols: list of "usual" protocols (used to generate warnings when CURIES are not protected) +@var _predefined_rel: list of predefined C{@rev} and C{@rel} values that should be mapped onto the XHTML vocabulary URI-s. +@var _predefined_property: list of predefined C{@property} values that should be mapped onto the XHTML vocabulary URI-s. (At present, this list is empty, but this has been an ongoing question in the group, so the I{mechanism} of checking is still there.) +@var __bnodes: dictionary of blank node names to real blank node +@var __empty_bnode: I{The} Bnode to be associated with the CURIE of the form "C{_:}". +""" + +from rdflib.namespace import Namespace, RDF, RDFS +from rdflib.term import BNode, URIRef +from rdflib.plugins.parsers.rdfa.options import Options, GENERIC_XML, XHTML_RDFA, HTML5_RDFA + +import re +import random +import urlparse + +__all__ = ['ExecutionContext'] + +RDFa_PROFILE = "http://www.w3.org/1999/xhtml/vocab" +RDFa_VERSION = "XHTML+RDFa 1.0" +RDFa_PublicID = "-//W3C//DTD XHTML+RDFa 1.0//EN" +RDFa_SystemID = "http://www.w3.org/MarkUp/DTD/xhtml-rdfa-1.dtd" + +usual_protocols = ["http", "https", "mailto", "ftp", "urn", "gopher", "tel", "ldap", "doi", "news"] + +####Predefined @rel/@rev/@property values +# predefined values for the @rel and @rev values. These are considered to be part of a specific +# namespace, defined by the RDFa document. +# At the moment, there are no predefined @property values, but the code is there in case +# some will be defined +XHTML_PREFIX = "xhv" +XHTML_URI = "http://www.w3.org/1999/xhtml/vocab#" + +_predefined_rel = ['alternate', 'appendix', 'cite', 'bookmark', 'chapter', 'contents', +'copyright', 'glossary', 'help', 'icon', 'index', 'meta', 'next', 'p3pv1', 'prev', +'role', 'section', 'subsection', 'start', 'license', 'up', 'last', 'stylesheet', 'first', 'top'] + +_predefined_property = [] + +#### Managing blank nodes for CURIE-s +__bnodes = {} +__empty_bnode = BNode() +def _get_bnode_from_Curie(var): + """ + 'Var' gives the string after the coloumn in a CURIE of the form C{_:XXX}. If this variable has been used + before, then the corresponding BNode is returned; otherwise a new BNode is created and + associated to that value. + @param var: CURIE BNode identifier + @return: BNode + """ + if len(var) == 0: + return __empty_bnode + if var in __bnodes: + return __bnodes[var] + else: + retval = BNode() + __bnodes[var] = retval + return retval + +#### Quote URI-s +import urllib +# 'safe' characters for the URI quoting, ie, characters that can safely stay as they are. Other +# special characters are converted to their %.. equivalents for namespace prefixes +_unquotedChars = ':/\?=#' +_warnChars = [' ', '\n', '\r', '\t'] +def _quote(uri, options): + """ + 'quote' a URI, ie, exchange special characters for their '%..' equivalents. Some of the characters + may stay as they are (listed in L{_unquotedChars}. If one of the characters listed in L{_warnChars} + is also in the uri, an extra warning is also generated. + @param uri: URI + @param options: + @type options: L{Options<pyRdfa.Options>} + """ + suri = uri.strip() + for c in _warnChars: + if suri.find(c) != -1: + if options != None: + options.comment_graph.add_warning('Unusual character in uri:%s; possible error?' % suri) + break + return urllib.quote(suri, _unquotedChars) + + +#### Core Class definition +class ExecutionContext(object): + """State at a specific node, including the current set + of namespaces in the RDFLib sense, the + current language, and the base. The class is also used to interpret URI-s and CURIE-s to produce + URI references for RDFLib. + + @ivar options: reference to the overall options + @type ivar: L{Options.Options} + @ivar base: the 'base' URI + @ivar defaultNS: default namespace + @ivar lang: language tag (possibly None) + @ivar ns: dictionary of namespaces + @type ns: dictionary, each value is an RDFLib Namespace object + + """ + def __init__(self, node, graph, inherited_state=None, base="", options=None): + """ + @param node: the current DOM Node + @param graph: the RDFLib Graph + @keyword inherited_state: the state as inherited + from upper layers. This inherited_state is mixed with the state information + retrieved from the current node. + @type inherited_state: L{State.ExecutionContext} + @keyword base: string denoting the base URI for the specific node. This overrides the possible + base inherited from the upper layers. The + current XHTML+RDFa syntax does not allow the usage of C{@xml:base}, but SVG1.2 does, so this is + necessary for SVG (and other possible XML dialects that accept C{@xml:base}) + @keyword options: invocation option + @type options: L{Options<pyRdfa.Options>} + """ + #----------------------------------------------------------------- + # settling the base + # note that, strictly speaking, it is not necessary to add the base to the + # context, because there is only one place to set it (<base> element of the <header>). + # It is done because it is prepared for a possible future change in direction of + # accepting xml:base on each element. + # At the moment, it is invoked with a 'None' at the top level of parsing, that is + # when the <base> element is looked for. + if inherited_state: + self.base = inherited_state.base + self.options = inherited_state.options + # for generic XML versions the xml:base attribute should be handled + if self.options.host_language == GENERIC_XML and node.hasAttribute("xml:base"): + self.base = node.getAttribute("xml:base") + else: + # this is the branch called from the very top + self.base = "" + for bases in node.getElementsByTagName("base"): + if bases.hasAttribute("href"): + self.base = bases.getAttribute("href") + continue + if self.base == "": + self.base = base + + # this is just to play safe. I believe this branch should actually not happen... + if options == None: + from pyRdfa import Options + self.options = Options() + else: + self.options = options + + # xml:base is not part of XHTML+RDFa, but it is a valid setting for, say, SVG1.2 + if self.options.host_language == GENERIC_XML and node.hasAttribute("xml:base"): + self.base = node.getAttribute("xml:base") + + self.options.comment_graph.set_base_URI(URIRef(_quote(base, self.options))) + + # check the the presense of the @profile and or @version attribute for the RDFa profile... + # This whole branch is, however, irrelevant if the host language is a generic XML one (eg, SVG) + if self.options.host_language != GENERIC_XML: + doctype = None + try: + # I am not 100% sure the HTML5 minidom implementation has this, so let us just be + # cautious here... + doctype = node.ownerDocument.doctype + except: + pass + if doctype == None or not( doctype.publicId == RDFa_PublicID and doctype.systemId == RDFa_SystemID ): + # next level: check the version + html = node.ownerDocument.documentElement + if not( html.hasAttribute("version") and RDFa_VERSION == html.getAttribute("version") ): + # see if least the profile has been set + # Find the <head> element + head = None + for index in range(0, html.childNodes.length-1): + if html.childNodes.item(index).nodeName == "head": + head = html.childNodes.item(index) + break + if not( head != None and head.hasAttribute("profile") and RDFa_PROFILE in head.getAttribute("profile").strip().split() ): + if self.options.host_language == HTML5_RDFA: + self.options.comment_graph.add_info("RDFa profile or RFDa version has not been set (for a correct identification of RDFa). This is not a requirement for RDFa, but it is advised to use one of those nevertheless. Note that in the case of HTML5, the DOCTYPE setting may not work...") + else: + self.options.comment_graph.add_info("None of the RDFa DOCTYPE, RDFa profile, or RFDa version has been set (for a correct identification of RDFa). This is not a requirement for RDFa, but it is advised to use one of those nevertheless.") + + #----------------------------------------------------------------- + # Stripping the fragment ID from the base URI, as demanded by RFC 3986 + self.base = urlparse.urldefrag(self.base)[0] + + #----------------------------------------------------------------- + # Settling the language tags + # check first the lang or xml:lang attribute + # RDFa does not allow the lang attribute. HTML5 relies :-( on @lang; + # I just want to be prepared here... + if options != None and options.host_language == HTML5_RDFA and node.hasAttribute("lang"): + self.lang = node.getAttribute("lang") + if len(self.lang) == 0 : self.lang = None + elif node.hasAttribute("xml:lang"): + self.lang = node.getAttribute("xml:lang") + if len(self.lang) == 0 : self.lang = None + elif inherited_state: + self.lang = inherited_state.lang + else: + self.lang = None + + #----------------------------------------------------------------- + # Handling namespaces + # First get the local xmlns declarations/namespaces stuff. + dict = {} + for i in range(0, node.attributes.length): + attr = node.attributes.item(i) + if attr.name.find('xmlns:') == 0 : + # yep, there is a namespace setting + key = attr.localName + if key != "" : # exclude the top level xmlns setting... + if key == "_": + if warning: self.options.comment_graph.add_error("The '_' local CURIE prefix is reserved for blank nodes, and cannot be changed" ) + elif key.find(':') != -1: + if warning: self.options.comment_graph.add_error("The character ':' is not valid in a CURIE Prefix" ) + else : + # quote the URI, ie, convert special characters into %.. This is + # true, for example, for spaces + uri = _quote(attr.value, self.options) + # 1. create a new Namespace entry + ns = Namespace(uri) + # 2. 'bind' it in the current graph to + # get a nicer output + graph.bind(key, uri) + # 3. Add an entry to the dictionary + dict[key] = ns + + # See if anything has been collected at all. + # If not, the namespaces of the incoming state is + # taken over + self.ns = {} + if len(dict) == 0 and inherited_state: + self.ns = inherited_state.ns + else: + if inherited_state: + for k in inherited_state.ns : self.ns[k] = inherited_state.ns[k] + # copying the newly found namespace, possibly overwriting + # incoming values + for k in dict : self.ns[k] = dict[k] + else: + self.ns = dict + + # see if the xhtml core vocabulary has been set + self.xhtml_prefix = None + for key in self.ns.keys(): + if XHTML_URI == str(self.ns[key]): + self.xhtml_prefix = key + break + if self.xhtml_prefix == None: + if XHTML_PREFIX not in self.ns: + self.ns[XHTML_PREFIX] = Namespace(XHTML_URI) + self.xhtml_prefix = XHTML_PREFIX + else: + # the most disagreeable thing, the user has used + # the prefix for something else... + self.xhtml_prefix = XHTML_PREFIX + '_' + ("%d" % random.randint(1, 1000)) + self.ns[self.xhtml_prefix] = Namespace(XHTML_URI) + graph.bind(self.xhtml_prefix, XHTML_URI) + + # extra tricks for unusual usages... + # if the 'rdf' prefix is not used, it is artificially added... + if "rdf" not in self.ns: + self.ns["rdf"] = RDF + if "rdfs" not in self.ns: + self.ns["rdfs"] = RDFS + + # Final touch: setting the default namespace... + if node.hasAttribute("xmlns"): + self.defaultNS = node.getAttribute("xmlns") + elif inherited_state and inherited_state.defaultNS != None: + self.defaultNS = inherited_state.defaultNS + else: + self.defaultNS = None + + def _get_predefined_rels(self, val, warning): + """Get the predefined URI value for the C{@rel/@rev} attribute. + @param val: attribute name + @param warning: whether a warning should be generated or not + @type warning: boolean + @return: URIRef for the predefined URI (or None) + """ + vv = val.strip().lower() + if vv in _predefined_rel: + return self.ns[self.xhtml_prefix][vv] + else: + if warning: self.options.comment_graph.add_warning("invalid @rel/@rev value: '%s'" % val) + return None + + def _get_predefined_properties(self, val, warning): + """Get the predefined value for the C{@property} attribute. + @param val: attribute name + @param warning: whether a warning should be generated or not + @type warning: boolean + @return: URIRef for the predefined URI (or None) + """ + vv = val.strip().lower() + if vv in _predefined_property: + return self.ns[self.xhtml_prefix][vv] + else: + if warning: self.options.comment_graph.add_warning("invalid @property value: '%s'" % val) + return None + + def get_resource(self, val, rel=False, prop=False, warning=True): + """Get a resource for a CURIE. + The input argument is a CURIE; this is interpreted + via the current namespaces and the corresponding URI Reference is returned + @param val: string of the form "prefix:lname" + @keyword rel: whether the predefined C{@rel/@rev} values should also be interpreted + @keyword prop: whether the predefined C{@property} values should also be interpreted + @return: an RDFLib URIRef instance (or None) + """ + if val == "": + return None + elif val.find(":") != -1: + key = val.split(":", 1)[0] + lname = val.split(":", 1)[1] + if key == "_": + # A possible error: this method is invoked for property URI-s, which + # should not refer to a blank node. This case is checked and a possible + # error condition is handled + self.options.comment_graph.add_error("Blank node CURIE cannot be used in property position: _:%s" % lname) + return None + if key == "": + # This is the ":blabla" case + key = self.xhtml_prefix + else: + # if the resources correspond to a @rel or @rev or @property, then there + # may be one more possibility here, namely that it is one of the + # predefined values + if rel: + return self._get_predefined_rels(val, warning) + elif prop: + return self._get_predefined_properties(val, warning) + else: + self.options.comment_graph.add_warning("Invalid CURIE (without prefix): '%s'" % val) + return None + + if key not in self.ns: + self.options.comment_graph.add_error("CURIE used with non declared prefix: %s" % key) + return None + else: + if lname == "": + return URIRef(str(self.ns[key])) + else: + return self.ns[key][lname] + + def get_resources(self, val, rel=False, prop=False): + """Get a series of resources encoded in CURIE-s. + The input argument is a list of CURIE-s; these are interpreted + via the current namespaces and the corresponding URI References are returned. + @param val: strings of the form prefix':'lname, separated by space + @keyword rel: whether the predefined C{@rel/@rev} values should also be interpreted + @keyword prop: whether the predefined C{@property} values should also be interpreted + @return: a list of RDFLib URIRef instances (possibly empty) + """ + val.strip() + resources = [ self.get_resource(v, rel, prop) for v in val.split() if v != None ] + return [ r for r in resources if r != None ] + + def get_URI_ref(self, val): + """Create a URI RDFLib resource for a URI. + The input argument is a URI. It is checked whether it is a local + reference with a '#' or not. If yes, a URIRef combined with the + stored base value is returned. In both cases a URIRef for a full URI is created + and returned + @param val: URI string + @return: an RDFLib URIRef instance + """ + if val == "": + return URIRef(self.base) + elif val[0] == '[' and val[-1] == ']': + self.options.comment_graph.add_error("Illegal usage of CURIE: %s" % val) + return None + else: + return URIRef(urlparse.urljoin(self.base, val)) + + def get_Curie_ref(self, val): + """Create a URI RDFLib resource for a CURIE. + The input argument is a CURIE. This means that it is: + - either of the form [a:b] where a:b should be resolved as an + 'unprotected' CURIE, or + - it is a traditional URI (relative or absolute) + + If the second case the URI value is also compared to 'usual' URI + protocols ('http', 'https', 'ftp', etc) (see L{usual_protocols}). + If there is no match, a warning is generated (indeed, a frequent + mistake in authoring RDFa is to forget the '[' and ']' characters to + "protect" CURIE-s.) + + @param val: CURIE string + @return: an RDFLib URIRef instance + """ + if len(val) == 0: + return URIRef(self.base) + elif val[0] == "[": + if val[-1] == "]": + curie = val[1:-1] + # A possible Blank node reference should be separated here: + if len(curie) >= 2 and curie[0] == "_" and curie[1] == ":": + return _get_bnode_from_Curie(curie[2:]) + else: + return self.get_resource(val[1:-1]) + else: + # illegal CURIE... + self.options.comment_graph.add_error("Illegal CURIE: %s" % val) + return None + else: + # check the value, to see if an error may have been made... + # Usual protocol values in the URI + v = val.strip().lower() + protocol = urlparse.urlparse(val)[0] + if protocol != "" and protocol not in usual_protocols: + err = "Possible URI error with '%s'; the intention may have been to use a protected CURIE" % val + self.options.comment_graph.add_warning(err) + return self.get_URI_ref(val) + diff --git a/creactistore/_templates/lib/rdflib_/plugins/parsers/rdfa/transform/__init__.py b/creactistore/_templates/lib/rdflib_/plugins/parsers/rdfa/transform/__init__.py new file mode 100644 index 0000000..e69de29 --- /dev/null +++ b/creactistore/_templates/lib/rdflib_/plugins/parsers/rdfa/transform/__init__.py diff --git a/creactistore/_templates/lib/rdflib_/plugins/parsers/rdfa/transform/headabout.py b/creactistore/_templates/lib/rdflib_/plugins/parsers/rdfa/transform/headabout.py new file mode 100644 index 0000000..feff6ff --- /dev/null +++ b/creactistore/_templates/lib/rdflib_/plugins/parsers/rdfa/transform/headabout.py @@ -0,0 +1,27 @@ +# -*- coding: utf-8 -*- +""" +Simple transfomer: the C{@about=""} is added to the C{<head>} and C{<body>} elements (unless something is already there). +Note that this transformer is always invoked by the parser because this behaviour is mandated by the RDFa syntax. + +@summary: Add a top "about" to <head> and <body> +@requires: U{RDFLib package<http://rdflib_.net>} +@organization: U{World Wide Web Consortium<http://www.w3.org>} +@author: U{Ivan Herman<a href="http://www.w3.org/People/Ivan/">} +@license: This software is available for use under the +U{W3C® SOFTWARE NOTICE AND LICENSE<href="http://www.w3.org/Consortium/Legal/2002/copyright-software-20021231">} +@contact: Ivan Herman, ivan@w3.org +""" + +def head_about_transform(html, options): + """ + @param html: a DOM node for the top level html element + @param options: invocation options + @type options: L{Options<pyRdfa.Options>} + """ + for top in html.getElementsByTagName("head"): + if not top.hasAttribute("about"): + top.setAttribute("about", "") + for top in html.getElementsByTagName("body"): + if not top.hasAttribute("about"): + top.setAttribute("about", "") + diff --git a/creactistore/_templates/lib/rdflib_/plugins/parsers/rdfa/transform/headabout.py~ b/creactistore/_templates/lib/rdflib_/plugins/parsers/rdfa/transform/headabout.py~ new file mode 100644 index 0000000..0cf8f7a --- /dev/null +++ b/creactistore/_templates/lib/rdflib_/plugins/parsers/rdfa/transform/headabout.py~ @@ -0,0 +1,27 @@ +# -*- coding: utf-8 -*- +""" +Simple transfomer: the C{@about=""} is added to the C{<head>} and C{<body>} elements (unless something is already there). +Note that this transformer is always invoked by the parser because this behaviour is mandated by the RDFa syntax. + +@summary: Add a top "about" to <head> and <body> +@requires: U{RDFLib package<http://rdflib.net>} +@organization: U{World Wide Web Consortium<http://www.w3.org>} +@author: U{Ivan Herman<a href="http://www.w3.org/People/Ivan/">} +@license: This software is available for use under the +U{W3C® SOFTWARE NOTICE AND LICENSE<href="http://www.w3.org/Consortium/Legal/2002/copyright-software-20021231">} +@contact: Ivan Herman, ivan@w3.org +""" + +def head_about_transform(html, options): + """ + @param html: a DOM node for the top level html element + @param options: invocation options + @type options: L{Options<pyRdfa.Options>} + """ + for top in html.getElementsByTagName("head"): + if not top.hasAttribute("about"): + top.setAttribute("about", "") + for top in html.getElementsByTagName("body"): + if not top.hasAttribute("about"): + top.setAttribute("about", "") + diff --git a/creactistore/_templates/lib/rdflib_/plugins/parsers/rdfxml.py b/creactistore/_templates/lib/rdflib_/plugins/parsers/rdfxml.py new file mode 100644 index 0000000..a9d00ff --- /dev/null +++ b/creactistore/_templates/lib/rdflib_/plugins/parsers/rdfxml.py @@ -0,0 +1,579 @@ +# Copyright (c) 2002, Daniel Krech, http://eikeon.com/ +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above +# copyright notice, this list of conditions and the following +# disclaimer in the documentation and/or other materials provided +# with the distribution. +# +# * Neither the name of Daniel Krech nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +""" +""" +from xml.sax import make_parser +from xml.sax.handler import ErrorHandler +from xml.sax.saxutils import handler, quoteattr, escape +from urlparse import urljoin, urldefrag + +from rdflib_.namespace import RDF, is_ncname +from rdflib_.term import URIRef +from rdflib_.term import BNode +from rdflib_.term import Literal +from rdflib_.exceptions import ParserError, Error +from rdflib_.parser import Parser + +__all__ = ['create_parser', 'BagID', 'ElementHandler', 'RDFXMLHandler', 'RDFXMLParser'] + +RDFNS = RDF + +# http://www.w3.org/TR/rdf-syntax-grammar/#eventterm-attribute-URI +# A mapping from unqualified terms to there qualified version. +UNQUALIFIED = {"about" : RDF.about, + "ID" : RDF.ID, + "type" : RDF.type, + "resource": RDF.resource, + "parseType": RDF.parseType} + +# http://www.w3.org/TR/rdf-syntax-grammar/#coreSyntaxTerms +CORE_SYNTAX_TERMS = [RDF.RDF, RDF.ID, RDF.about, RDF.parseType, RDF.resource, RDF.nodeID, RDF.datatype] + +# http://www.w3.org/TR/rdf-syntax-grammar/#syntaxTerms +SYNTAX_TERMS = CORE_SYNTAX_TERMS + [RDF.Description, RDF.li] + +# http://www.w3.org/TR/rdf-syntax-grammar/#oldTerms +OLD_TERMS = [ + URIRef("http://www.w3.org/1999/02/22-rdf-syntax-ns#aboutEach"), + URIRef("http://www.w3.org/1999/02/22-rdf-syntax-ns#aboutEachPrefix"), + URIRef("http://www.w3.org/1999/02/22-rdf-syntax-ns#bagID")] + +NODE_ELEMENT_EXCEPTIONS = CORE_SYNTAX_TERMS + [RDF.li,] + OLD_TERMS +NODE_ELEMENT_ATTRIBUTES = [RDF.ID, RDF.nodeID, RDF.about] + +PROPERTY_ELEMENT_EXCEPTIONS = CORE_SYNTAX_TERMS + [RDF.Description,] + OLD_TERMS +PROPERTY_ATTRIBUTE_EXCEPTIONS = CORE_SYNTAX_TERMS + [RDF.Description, RDF.li] + OLD_TERMS +PROPERTY_ELEMENT_ATTRIBUTES = [RDF.ID, RDF.resource, RDF.nodeID] + +XMLNS = "http://www.w3.org/XML/1998/namespace" +BASE = (XMLNS, "base") +LANG = (XMLNS, "lang") + + +class BagID(URIRef): + __slots__ = ['li'] + def __init__(self, val): + super(URIRef, self).__init__(val) + self.li = 0 + + def next_li(self): + self.li += 1 + return RDFNS[self.li] + + +class ElementHandler(object): + __slots__ = ['start', 'char', 'end', 'li', 'id', + 'base', 'subject', 'predicate', 'object', + 'list', 'language', 'datatype', 'declared', 'data'] + def __init__(self): + self.start = None + self.char = None + self.end = None + self.li = 0 + self.id = None + self.base = None + self.subject = None + self.object = None + self.list = None + self.language = None + self.datatype = None + self.declared = None + self.data = None + + def next_li(self): + self.li += 1 + return RDFNS[self.li] + + +class RDFXMLHandler(handler.ContentHandler): + + def __init__(self, store): + self.store = store + self.preserve_bnode_ids = False + self.reset() + + def reset(self): + document_element = ElementHandler() + document_element.start = self.document_element_start + document_element.end = lambda name, qname: None + self.stack = [None, document_element,] + self.ids = {} # remember IDs we have already seen + self.bnode = {} + self._ns_contexts = [{}] # contains uri -> prefix dicts + self._current_context = self._ns_contexts[-1] + + # ContentHandler methods + + def setDocumentLocator(self, locator): + self.locator = locator + + def startDocument(self): + pass + + def startPrefixMapping(self, prefix, namespace): + self._ns_contexts.append(self._current_context.copy()) + self._current_context[namespace] = prefix + self.store.bind(prefix, URIRef(namespace), override=False) + + def endPrefixMapping(self, prefix): + self._current_context = self._ns_contexts[-1] + del self._ns_contexts[-1] + + def startElementNS(self, name, qname, attrs): + stack = self.stack + stack.append(ElementHandler()) + current = self.current + parent = self.parent + base = attrs.get(BASE, None) + if base is not None: + base, frag = urldefrag(base) + if parent and parent.base: + base = urljoin(parent.base, base) + else: + systemId = self.locator.getPublicId() or self.locator.getSystemId() + if systemId: + base = urljoin(systemId, base) + else: + if parent: + base = parent.base + if base is None: + systemId = self.locator.getPublicId() or self.locator.getSystemId() + if systemId: + base, frag = urldefrag(systemId) + current.base = base + language = attrs.get(LANG, None) + if language is None: + if parent: + language = parent.language + current.language = language + current.start(name, qname, attrs) + + def endElementNS(self, name, qname): + self.current.end(name, qname) + self.stack.pop() + + def characters(self, content): + char = self.current.char + if char: + char(content) + + def ignorableWhitespace(self, content): + pass + + def processingInstruction(self, target, data): + pass + + def add_reified(self, sid, (s, p, o)): + self.store.add((sid, RDF.type, RDF.Statement)) + self.store.add((sid, RDF.subject, s)) + self.store.add((sid, RDF.predicate, p)) + self.store.add((sid, RDF.object, o)) + + def error(self, message): + locator = self.locator + info = "%s:%s:%s: " % (locator.getSystemId(), + locator.getLineNumber(), locator.getColumnNumber()) + raise ParserError(info + message) + + def get_current(self): + return self.stack[-2] + # Create a read only property called current so that self.current + # give the current element handler. + current = property(get_current) + + def get_next(self): + return self.stack[-1] + # Create a read only property that gives the element handler to be + # used for the next element. + next = property(get_next) + + def get_parent(self): + return self.stack[-3] + # Create a read only property that gives the current parent + # element handler + parent = property(get_parent) + + def absolutize(self, uri): + result = urljoin(self.current.base, uri, allow_fragments=1) + if uri and uri[-1]=="#" and result[-1]!="#": + result = "%s#" % result + return URIRef(result) + + def convert(self, name, qname, attrs): + if name[0] is None: + name = URIRef(name[1]) + else: + name = URIRef("".join(name)) + atts = {} + for (n, v) in attrs.items(): #attrs._attrs.iteritems(): # + if n[0] is None: + att = URIRef(n[1]) + else: + att = URIRef("".join(n)) + if att.startswith(XMLNS) or att[0:3].lower()=="xml": + pass + elif att in UNQUALIFIED: + #if not RDFNS[att] in atts: + atts[RDFNS[att]] = v + else: + atts[URIRef(att)] = v + return name, atts + + def document_element_start(self, name, qname, attrs): + if name[0] and URIRef("".join(name)) == RDF.RDF: + # Cheap hack so 2to3 doesn't turn it into __next__ + next = getattr(self, 'next') + next.start = self.node_element_start + next.end = self.node_element_end + else: + self.node_element_start(name, qname, attrs) + #self.current.end = self.node_element_end + # TODO... set end to something that sets start such that + # another element will cause error + + + def node_element_start(self, name, qname, attrs): + name, atts = self.convert(name, qname, attrs) + current = self.current + absolutize = self.absolutize + + # Cheap hack so 2to3 doesn't turn it into __next__ + next = getattr(self, 'next') + next.start = self.property_element_start + next.end = self.property_element_end + + if name in NODE_ELEMENT_EXCEPTIONS: + self.error("Invalid node element URI: %s" % name) + + if RDF.ID in atts: + if RDF.about in atts or RDF.nodeID in atts: + self.error("Can have at most one of rdf:ID, rdf:about, and rdf:nodeID") + + id = atts[RDF.ID] + if not is_ncname(id): + self.error("rdf:ID value is not a valid NCName: %s" % id) + subject = absolutize("#%s" % id) + if subject in self.ids: + self.error("two elements cannot use the same ID: '%s'" % subject) + self.ids[subject] = 1 # IDs can only appear once within a document + elif RDF.nodeID in atts: + if RDF.ID in atts or RDF.about in atts: + self.error("Can have at most one of rdf:ID, rdf:about, and rdf:nodeID") + nodeID = atts[RDF.nodeID] + if not is_ncname(nodeID): + self.error("rdf:nodeID value is not a valid NCName: %s" % nodeID) + if self.preserve_bnode_ids is False: + if nodeID in self.bnode: + subject = self.bnode[nodeID] + else: + subject = BNode() + self.bnode[nodeID] = subject + else: + subject = BNode(nodeID) + elif RDF.about in atts: + if RDF.ID in atts or RDF.nodeID in atts: + self.error("Can have at most one of rdf:ID, rdf:about, and rdf:nodeID") + subject = absolutize(atts[RDF.about]) + else: + subject = BNode() + + if name!=RDF.Description: # S1 + self.store.add((subject, RDF.type, absolutize(name))) + + language = current.language + for att in atts: + if not att.startswith(str(RDFNS)): + predicate = absolutize(att) + try: + object = Literal(atts[att], language) + except Error, e: + self.error(e.msg) + elif att==RDF.type: #S2 + predicate = RDF.type + object = absolutize(atts[RDF.type]) + elif att in NODE_ELEMENT_ATTRIBUTES: + continue + elif att in PROPERTY_ATTRIBUTE_EXCEPTIONS: #S3 + self.error("Invalid property attribute URI: %s" % att) + continue # for when error does not throw an exception + else: + predicate = absolutize(att) + try: + object = Literal(atts[att], language) + except Error, e: + self.error(e.msg) + self.store.add((subject, predicate, object)) + + current.subject = subject + + + def node_element_end(self, name, qname): + self.parent.object = self.current.subject + + def property_element_start(self, name, qname, attrs): + name, atts = self.convert(name, qname, attrs) + current = self.current + absolutize = self.absolutize + + # Cheap hack so 2to3 doesn't turn it into __next__ + next = getattr(self, 'next') + object = None + current.data = None + current.list = None + + if not name.startswith(str(RDFNS)): + current.predicate = absolutize(name) + elif name==RDF.li: + current.predicate = current.next_li() + elif name in PROPERTY_ELEMENT_EXCEPTIONS: + self.error("Invalid property element URI: %s" % name) + else: + current.predicate = absolutize(name) + + id = atts.get(RDF.ID, None) + if id is not None: + if not is_ncname(id): + self.error("rdf:ID value is not a value NCName: %s" % id) + current.id = absolutize("#%s" % id) + else: + current.id = None + + resource = atts.get(RDF.resource, None) + nodeID = atts.get(RDF.nodeID, None) + parse_type = atts.get(RDF.parseType, None) + if resource is not None and nodeID is not None: + self.error("Property element cannot have both rdf:nodeID and rdf:resource") + if resource is not None: + object = absolutize(resource) + next.start = self.node_element_start + next.end = self.node_element_end + elif nodeID is not None: + if not is_ncname(nodeID): + self.error("rdf:nodeID value is not a valid NCName: %s" % nodeID) + if self.preserve_bnode_ids is False: + if nodeID in self.bnode: + object = self.bnode[nodeID] + else: + subject = BNode() + self.bnode[nodeID] = subject + object = subject + else: + object = subject = BNode(nodeID) + next.start = self.node_element_start + next.end = self.node_element_end + else: + if parse_type is not None: + for att in atts: + if att!=RDF.parseType and att!=RDF.ID: + self.error("Property attr '%s' now allowed here" % att) + if parse_type=="Resource": + current.subject = object = BNode() + current.char = self.property_element_char + next.start = self.property_element_start + next.end = self.property_element_end + elif parse_type=="Collection": + current.char = None + object = current.list = RDF.nil #BNode()#self.parent.subject + next.start = self.node_element_start + next.end = self.list_node_element_end + else: #if parse_type=="Literal": + # All other values are treated as Literal + # See: http://www.w3.org/TR/rdf-syntax-grammar/#parseTypeOtherPropertyElt + object = Literal("", datatype=RDF.XMLLiteral) + current.char = self.literal_element_char + current.declared = {} + next.start = self.literal_element_start + next.char = self.literal_element_char + next.end = self.literal_element_end + current.object = object + return + else: + object = None + current.char = self.property_element_char + next.start = self.node_element_start + next.end = self.node_element_end + + datatype = current.datatype = atts.get(RDF.datatype, None) + language = current.language + if datatype is not None: + # TODO: check that there are no atts other than datatype and id + datatype = absolutize(datatype) + else: + for att in atts: + if not att.startswith(str(RDFNS)): + predicate = absolutize(att) + elif att in PROPERTY_ELEMENT_ATTRIBUTES: + continue + elif att in PROPERTY_ATTRIBUTE_EXCEPTIONS: + self.error("""Invalid property attribute URI: %s""" % att) + else: + predicate = absolutize(att) + + if att==RDF.type: + o = URIRef(atts[att]) + else: + if datatype is not None: + language = None + o = Literal(atts[att], language, datatype) + + if object is None: + object = BNode() + self.store.add((object, predicate, o)) + if object is None: + current.data = "" + current.object = None + else: + current.data = None + current.object = object + + def property_element_char(self, data): + current = self.current + if current.data is not None: + current.data += data + + def property_element_end(self, name, qname): + current = self.current + if current.data is not None and current.object is None: + literalLang = current.language + if current.datatype is not None: + literalLang = None + current.object = Literal(current.data, literalLang, current.datatype) + current.data = None + if self.next.end==self.list_node_element_end: + if current.object!=RDF.nil: + self.store.add((current.list, RDF.rest, RDF.nil)) + if current.object is not None: + self.store.add((self.parent.subject, current.predicate, current.object)) + if current.id is not None: + self.add_reified(current.id, (self.parent.subject, + current.predicate, current.object)) + current.subject = None + + def list_node_element_end(self, name, qname): + current = self.current + if self.parent.list==RDF.nil: + list = BNode() + # Removed between 20030123 and 20030905 + #self.store.add((list, RDF.type, LIST)) + self.parent.list = list + self.store.add((self.parent.list, RDF.first, current.subject)) + self.parent.object = list + self.parent.char = None + else: + list = BNode() + # Removed between 20030123 and 20030905 + #self.store.add((list, RDF.type, LIST)) + self.store.add((self.parent.list, RDF.rest, list)) + self.store.add((list, RDF.first, current.subject)) + self.parent.list = list + + def literal_element_start(self, name, qname, attrs): + current = self.current + self.next.start = self.literal_element_start + self.next.char = self.literal_element_char + self.next.end = self.literal_element_end + current.declared = self.parent.declared.copy() + if name[0]: + prefix = self._current_context[name[0]] + if prefix: + current.object = "<%s:%s" % (prefix, name[1]) + else: + current.object = "<%s" % name[1] + if not name[0] in current.declared: + current.declared[name[0]] = prefix + if prefix: + current.object += (' xmlns:%s="%s"' % (prefix, name[0])) + else: + current.object += (' xmlns="%s"' % name[0]) + else: + current.object = "<%s" % name[1] + + for (name, value) in attrs.items(): + if name[0]: + if not name[0] in current.declared: + current.declared[name[0]] = self._current_context[name[0]] + name = current.declared[name[0]] + ":" + name[1] + else: + name = name[1] + current.object += (' %s=%s' % (name, quoteattr(value))) + current.object += ">" + + def literal_element_char(self, data): + self.current.object += escape(data) + + def literal_element_end(self, name, qname): + if name[0]: + prefix = self._current_context[name[0]] + if prefix: + end = u"</%s:%s>" % (prefix, name[1]) + else: + end = u"</%s>" % name[1] + else: + end = u"</%s>" % name[1] + self.parent.object += self.current.object + end + + +def create_parser(target, store): + parser = make_parser() + try: + # Workaround for bug in expatreader.py. Needed when + # expatreader is trying to guess a prefix. + parser.start_namespace_decl("xml", "http://www.w3.org/XML/1998/namespace") + except AttributeError: + pass # Not present in Jython (at least) + parser.setFeature(handler.feature_namespaces, 1) + rdfxml = RDFXMLHandler(store) + rdfxml.setDocumentLocator(target) + #rdfxml.setDocumentLocator(_Locator(self.url, self.parser)) + parser.setContentHandler(rdfxml) + parser.setErrorHandler(ErrorHandler()) + return parser + + +class RDFXMLParser(Parser): + + def __init__(self): + pass + + def parse(self, source, sink, **args): + self._parser = create_parser(source, sink) + content_handler = self._parser.getContentHandler() + preserve_bnode_ids = args.get("preserve_bnode_ids", None) + if preserve_bnode_ids is not None: + content_handler.preserve_bnode_ids = preserve_bnode_ids + # We're only using it once now + #content_handler.reset() + #self._parser.reset() + self._parser.parse(source) + + + diff --git a/creactistore/_templates/lib/rdflib_/plugins/parsers/rdfxml.py~ b/creactistore/_templates/lib/rdflib_/plugins/parsers/rdfxml.py~ new file mode 100644 index 0000000..00e8d6a --- /dev/null +++ b/creactistore/_templates/lib/rdflib_/plugins/parsers/rdfxml.py~ @@ -0,0 +1,579 @@ +# Copyright (c) 2002, Daniel Krech, http://eikeon.com/ +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above +# copyright notice, this list of conditions and the following +# disclaimer in the documentation and/or other materials provided +# with the distribution. +# +# * Neither the name of Daniel Krech nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +""" +""" +from xml.sax import make_parser +from xml.sax.handler import ErrorHandler +from xml.sax.saxutils import handler, quoteattr, escape +from urlparse import urljoin, urldefrag + +from rdflib.namespace import RDF, is_ncname +from rdflib.term import URIRef +from rdflib.term import BNode +from rdflib.term import Literal +from rdflib.exceptions import ParserError, Error +from rdflib.parser import Parser + +__all__ = ['create_parser', 'BagID', 'ElementHandler', 'RDFXMLHandler', 'RDFXMLParser'] + +RDFNS = RDF + +# http://www.w3.org/TR/rdf-syntax-grammar/#eventterm-attribute-URI +# A mapping from unqualified terms to there qualified version. +UNQUALIFIED = {"about" : RDF.about, + "ID" : RDF.ID, + "type" : RDF.type, + "resource": RDF.resource, + "parseType": RDF.parseType} + +# http://www.w3.org/TR/rdf-syntax-grammar/#coreSyntaxTerms +CORE_SYNTAX_TERMS = [RDF.RDF, RDF.ID, RDF.about, RDF.parseType, RDF.resource, RDF.nodeID, RDF.datatype] + +# http://www.w3.org/TR/rdf-syntax-grammar/#syntaxTerms +SYNTAX_TERMS = CORE_SYNTAX_TERMS + [RDF.Description, RDF.li] + +# http://www.w3.org/TR/rdf-syntax-grammar/#oldTerms +OLD_TERMS = [ + URIRef("http://www.w3.org/1999/02/22-rdf-syntax-ns#aboutEach"), + URIRef("http://www.w3.org/1999/02/22-rdf-syntax-ns#aboutEachPrefix"), + URIRef("http://www.w3.org/1999/02/22-rdf-syntax-ns#bagID")] + +NODE_ELEMENT_EXCEPTIONS = CORE_SYNTAX_TERMS + [RDF.li,] + OLD_TERMS +NODE_ELEMENT_ATTRIBUTES = [RDF.ID, RDF.nodeID, RDF.about] + +PROPERTY_ELEMENT_EXCEPTIONS = CORE_SYNTAX_TERMS + [RDF.Description,] + OLD_TERMS +PROPERTY_ATTRIBUTE_EXCEPTIONS = CORE_SYNTAX_TERMS + [RDF.Description, RDF.li] + OLD_TERMS +PROPERTY_ELEMENT_ATTRIBUTES = [RDF.ID, RDF.resource, RDF.nodeID] + +XMLNS = "http://www.w3.org/XML/1998/namespace" +BASE = (XMLNS, "base") +LANG = (XMLNS, "lang") + + +class BagID(URIRef): + __slots__ = ['li'] + def __init__(self, val): + super(URIRef, self).__init__(val) + self.li = 0 + + def next_li(self): + self.li += 1 + return RDFNS[self.li] + + +class ElementHandler(object): + __slots__ = ['start', 'char', 'end', 'li', 'id', + 'base', 'subject', 'predicate', 'object', + 'list', 'language', 'datatype', 'declared', 'data'] + def __init__(self): + self.start = None + self.char = None + self.end = None + self.li = 0 + self.id = None + self.base = None + self.subject = None + self.object = None + self.list = None + self.language = None + self.datatype = None + self.declared = None + self.data = None + + def next_li(self): + self.li += 1 + return RDFNS[self.li] + + +class RDFXMLHandler(handler.ContentHandler): + + def __init__(self, store): + self.store = store + self.preserve_bnode_ids = False + self.reset() + + def reset(self): + document_element = ElementHandler() + document_element.start = self.document_element_start + document_element.end = lambda name, qname: None + self.stack = [None, document_element,] + self.ids = {} # remember IDs we have already seen + self.bnode = {} + self._ns_contexts = [{}] # contains uri -> prefix dicts + self._current_context = self._ns_contexts[-1] + + # ContentHandler methods + + def setDocumentLocator(self, locator): + self.locator = locator + + def startDocument(self): + pass + + def startPrefixMapping(self, prefix, namespace): + self._ns_contexts.append(self._current_context.copy()) + self._current_context[namespace] = prefix + self.store.bind(prefix, URIRef(namespace), override=False) + + def endPrefixMapping(self, prefix): + self._current_context = self._ns_contexts[-1] + del self._ns_contexts[-1] + + def startElementNS(self, name, qname, attrs): + stack = self.stack + stack.append(ElementHandler()) + current = self.current + parent = self.parent + base = attrs.get(BASE, None) + if base is not None: + base, frag = urldefrag(base) + if parent and parent.base: + base = urljoin(parent.base, base) + else: + systemId = self.locator.getPublicId() or self.locator.getSystemId() + if systemId: + base = urljoin(systemId, base) + else: + if parent: + base = parent.base + if base is None: + systemId = self.locator.getPublicId() or self.locator.getSystemId() + if systemId: + base, frag = urldefrag(systemId) + current.base = base + language = attrs.get(LANG, None) + if language is None: + if parent: + language = parent.language + current.language = language + current.start(name, qname, attrs) + + def endElementNS(self, name, qname): + self.current.end(name, qname) + self.stack.pop() + + def characters(self, content): + char = self.current.char + if char: + char(content) + + def ignorableWhitespace(self, content): + pass + + def processingInstruction(self, target, data): + pass + + def add_reified(self, sid, (s, p, o)): + self.store.add((sid, RDF.type, RDF.Statement)) + self.store.add((sid, RDF.subject, s)) + self.store.add((sid, RDF.predicate, p)) + self.store.add((sid, RDF.object, o)) + + def error(self, message): + locator = self.locator + info = "%s:%s:%s: " % (locator.getSystemId(), + locator.getLineNumber(), locator.getColumnNumber()) + raise ParserError(info + message) + + def get_current(self): + return self.stack[-2] + # Create a read only property called current so that self.current + # give the current element handler. + current = property(get_current) + + def get_next(self): + return self.stack[-1] + # Create a read only property that gives the element handler to be + # used for the next element. + next = property(get_next) + + def get_parent(self): + return self.stack[-3] + # Create a read only property that gives the current parent + # element handler + parent = property(get_parent) + + def absolutize(self, uri): + result = urljoin(self.current.base, uri, allow_fragments=1) + if uri and uri[-1]=="#" and result[-1]!="#": + result = "%s#" % result + return URIRef(result) + + def convert(self, name, qname, attrs): + if name[0] is None: + name = URIRef(name[1]) + else: + name = URIRef("".join(name)) + atts = {} + for (n, v) in attrs.items(): #attrs._attrs.iteritems(): # + if n[0] is None: + att = URIRef(n[1]) + else: + att = URIRef("".join(n)) + if att.startswith(XMLNS) or att[0:3].lower()=="xml": + pass + elif att in UNQUALIFIED: + #if not RDFNS[att] in atts: + atts[RDFNS[att]] = v + else: + atts[URIRef(att)] = v + return name, atts + + def document_element_start(self, name, qname, attrs): + if name[0] and URIRef("".join(name)) == RDF.RDF: + # Cheap hack so 2to3 doesn't turn it into __next__ + next = getattr(self, 'next') + next.start = self.node_element_start + next.end = self.node_element_end + else: + self.node_element_start(name, qname, attrs) + #self.current.end = self.node_element_end + # TODO... set end to something that sets start such that + # another element will cause error + + + def node_element_start(self, name, qname, attrs): + name, atts = self.convert(name, qname, attrs) + current = self.current + absolutize = self.absolutize + + # Cheap hack so 2to3 doesn't turn it into __next__ + next = getattr(self, 'next') + next.start = self.property_element_start + next.end = self.property_element_end + + if name in NODE_ELEMENT_EXCEPTIONS: + self.error("Invalid node element URI: %s" % name) + + if RDF.ID in atts: + if RDF.about in atts or RDF.nodeID in atts: + self.error("Can have at most one of rdf:ID, rdf:about, and rdf:nodeID") + + id = atts[RDF.ID] + if not is_ncname(id): + self.error("rdf:ID value is not a valid NCName: %s" % id) + subject = absolutize("#%s" % id) + if subject in self.ids: + self.error("two elements cannot use the same ID: '%s'" % subject) + self.ids[subject] = 1 # IDs can only appear once within a document + elif RDF.nodeID in atts: + if RDF.ID in atts or RDF.about in atts: + self.error("Can have at most one of rdf:ID, rdf:about, and rdf:nodeID") + nodeID = atts[RDF.nodeID] + if not is_ncname(nodeID): + self.error("rdf:nodeID value is not a valid NCName: %s" % nodeID) + if self.preserve_bnode_ids is False: + if nodeID in self.bnode: + subject = self.bnode[nodeID] + else: + subject = BNode() + self.bnode[nodeID] = subject + else: + subject = BNode(nodeID) + elif RDF.about in atts: + if RDF.ID in atts or RDF.nodeID in atts: + self.error("Can have at most one of rdf:ID, rdf:about, and rdf:nodeID") + subject = absolutize(atts[RDF.about]) + else: + subject = BNode() + + if name!=RDF.Description: # S1 + self.store.add((subject, RDF.type, absolutize(name))) + + language = current.language + for att in atts: + if not att.startswith(str(RDFNS)): + predicate = absolutize(att) + try: + object = Literal(atts[att], language) + except Error, e: + self.error(e.msg) + elif att==RDF.type: #S2 + predicate = RDF.type + object = absolutize(atts[RDF.type]) + elif att in NODE_ELEMENT_ATTRIBUTES: + continue + elif att in PROPERTY_ATTRIBUTE_EXCEPTIONS: #S3 + self.error("Invalid property attribute URI: %s" % att) + continue # for when error does not throw an exception + else: + predicate = absolutize(att) + try: + object = Literal(atts[att], language) + except Error, e: + self.error(e.msg) + self.store.add((subject, predicate, object)) + + current.subject = subject + + + def node_element_end(self, name, qname): + self.parent.object = self.current.subject + + def property_element_start(self, name, qname, attrs): + name, atts = self.convert(name, qname, attrs) + current = self.current + absolutize = self.absolutize + + # Cheap hack so 2to3 doesn't turn it into __next__ + next = getattr(self, 'next') + object = None + current.data = None + current.list = None + + if not name.startswith(str(RDFNS)): + current.predicate = absolutize(name) + elif name==RDF.li: + current.predicate = current.next_li() + elif name in PROPERTY_ELEMENT_EXCEPTIONS: + self.error("Invalid property element URI: %s" % name) + else: + current.predicate = absolutize(name) + + id = atts.get(RDF.ID, None) + if id is not None: + if not is_ncname(id): + self.error("rdf:ID value is not a value NCName: %s" % id) + current.id = absolutize("#%s" % id) + else: + current.id = None + + resource = atts.get(RDF.resource, None) + nodeID = atts.get(RDF.nodeID, None) + parse_type = atts.get(RDF.parseType, None) + if resource is not None and nodeID is not None: + self.error("Property element cannot have both rdf:nodeID and rdf:resource") + if resource is not None: + object = absolutize(resource) + next.start = self.node_element_start + next.end = self.node_element_end + elif nodeID is not None: + if not is_ncname(nodeID): + self.error("rdf:nodeID value is not a valid NCName: %s" % nodeID) + if self.preserve_bnode_ids is False: + if nodeID in self.bnode: + object = self.bnode[nodeID] + else: + subject = BNode() + self.bnode[nodeID] = subject + object = subject + else: + object = subject = BNode(nodeID) + next.start = self.node_element_start + next.end = self.node_element_end + else: + if parse_type is not None: + for att in atts: + if att!=RDF.parseType and att!=RDF.ID: + self.error("Property attr '%s' now allowed here" % att) + if parse_type=="Resource": + current.subject = object = BNode() + current.char = self.property_element_char + next.start = self.property_element_start + next.end = self.property_element_end + elif parse_type=="Collection": + current.char = None + object = current.list = RDF.nil #BNode()#self.parent.subject + next.start = self.node_element_start + next.end = self.list_node_element_end + else: #if parse_type=="Literal": + # All other values are treated as Literal + # See: http://www.w3.org/TR/rdf-syntax-grammar/#parseTypeOtherPropertyElt + object = Literal("", datatype=RDF.XMLLiteral) + current.char = self.literal_element_char + current.declared = {} + next.start = self.literal_element_start + next.char = self.literal_element_char + next.end = self.literal_element_end + current.object = object + return + else: + object = None + current.char = self.property_element_char + next.start = self.node_element_start + next.end = self.node_element_end + + datatype = current.datatype = atts.get(RDF.datatype, None) + language = current.language + if datatype is not None: + # TODO: check that there are no atts other than datatype and id + datatype = absolutize(datatype) + else: + for att in atts: + if not att.startswith(str(RDFNS)): + predicate = absolutize(att) + elif att in PROPERTY_ELEMENT_ATTRIBUTES: + continue + elif att in PROPERTY_ATTRIBUTE_EXCEPTIONS: + self.error("""Invalid property attribute URI: %s""" % att) + else: + predicate = absolutize(att) + + if att==RDF.type: + o = URIRef(atts[att]) + else: + if datatype is not None: + language = None + o = Literal(atts[att], language, datatype) + + if object is None: + object = BNode() + self.store.add((object, predicate, o)) + if object is None: + current.data = "" + current.object = None + else: + current.data = None + current.object = object + + def property_element_char(self, data): + current = self.current + if current.data is not None: + current.data += data + + def property_element_end(self, name, qname): + current = self.current + if current.data is not None and current.object is None: + literalLang = current.language + if current.datatype is not None: + literalLang = None + current.object = Literal(current.data, literalLang, current.datatype) + current.data = None + if self.next.end==self.list_node_element_end: + if current.object!=RDF.nil: + self.store.add((current.list, RDF.rest, RDF.nil)) + if current.object is not None: + self.store.add((self.parent.subject, current.predicate, current.object)) + if current.id is not None: + self.add_reified(current.id, (self.parent.subject, + current.predicate, current.object)) + current.subject = None + + def list_node_element_end(self, name, qname): + current = self.current + if self.parent.list==RDF.nil: + list = BNode() + # Removed between 20030123 and 20030905 + #self.store.add((list, RDF.type, LIST)) + self.parent.list = list + self.store.add((self.parent.list, RDF.first, current.subject)) + self.parent.object = list + self.parent.char = None + else: + list = BNode() + # Removed between 20030123 and 20030905 + #self.store.add((list, RDF.type, LIST)) + self.store.add((self.parent.list, RDF.rest, list)) + self.store.add((list, RDF.first, current.subject)) + self.parent.list = list + + def literal_element_start(self, name, qname, attrs): + current = self.current + self.next.start = self.literal_element_start + self.next.char = self.literal_element_char + self.next.end = self.literal_element_end + current.declared = self.parent.declared.copy() + if name[0]: + prefix = self._current_context[name[0]] + if prefix: + current.object = "<%s:%s" % (prefix, name[1]) + else: + current.object = "<%s" % name[1] + if not name[0] in current.declared: + current.declared[name[0]] = prefix + if prefix: + current.object += (' xmlns:%s="%s"' % (prefix, name[0])) + else: + current.object += (' xmlns="%s"' % name[0]) + else: + current.object = "<%s" % name[1] + + for (name, value) in attrs.items(): + if name[0]: + if not name[0] in current.declared: + current.declared[name[0]] = self._current_context[name[0]] + name = current.declared[name[0]] + ":" + name[1] + else: + name = name[1] + current.object += (' %s=%s' % (name, quoteattr(value))) + current.object += ">" + + def literal_element_char(self, data): + self.current.object += escape(data) + + def literal_element_end(self, name, qname): + if name[0]: + prefix = self._current_context[name[0]] + if prefix: + end = u"</%s:%s>" % (prefix, name[1]) + else: + end = u"</%s>" % name[1] + else: + end = u"</%s>" % name[1] + self.parent.object += self.current.object + end + + +def create_parser(target, store): + parser = make_parser() + try: + # Workaround for bug in expatreader.py. Needed when + # expatreader is trying to guess a prefix. + parser.start_namespace_decl("xml", "http://www.w3.org/XML/1998/namespace") + except AttributeError: + pass # Not present in Jython (at least) + parser.setFeature(handler.feature_namespaces, 1) + rdfxml = RDFXMLHandler(store) + rdfxml.setDocumentLocator(target) + #rdfxml.setDocumentLocator(_Locator(self.url, self.parser)) + parser.setContentHandler(rdfxml) + parser.setErrorHandler(ErrorHandler()) + return parser + + +class RDFXMLParser(Parser): + + def __init__(self): + pass + + def parse(self, source, sink, **args): + self._parser = create_parser(source, sink) + content_handler = self._parser.getContentHandler() + preserve_bnode_ids = args.get("preserve_bnode_ids", None) + if preserve_bnode_ids is not None: + content_handler.preserve_bnode_ids = preserve_bnode_ids + # We're only using it once now + #content_handler.reset() + #self._parser.reset() + self._parser.parse(source) + + + diff --git a/creactistore/_templates/lib/rdflib_/plugins/parsers/trix.py b/creactistore/_templates/lib/rdflib_/plugins/parsers/trix.py new file mode 100644 index 0000000..9a1c3ba --- /dev/null +++ b/creactistore/_templates/lib/rdflib_/plugins/parsers/trix.py @@ -0,0 +1,286 @@ +# Copyright (c) 2002, Daniel Krech, http://eikeon.com/ +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above +# copyright notice, this list of conditions and the following +# disclaimer in the documentation and/or other materials provided +# with the distribution. +# +# * Neither the name of Daniel Krech nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +""" +""" +from rdflib_.namespace import Namespace +from rdflib_.term import URIRef +from rdflib_.term import BNode +from rdflib_.term import Literal +from rdflib_.graph import Graph, ConjunctiveGraph +from rdflib_.exceptions import ParserError +from rdflib_.parser import Parser + +from xml.sax.saxutils import handler +from xml.sax import make_parser +from xml.sax.handler import ErrorHandler + +__all__ = ['create_parser', 'TriXHandler', 'TriXParser'] + + +TRIXNS=Namespace("http://www.w3.org/2004/03/trix/trix-1/") +XMLNS=Namespace("http://www.w3.org/XML/1998/namespace") + +class TriXHandler(handler.ContentHandler): + """An Sax Handler for TriX. See http://sw.nokia.com/trix/""" + + def __init__(self, store): + self.store = store + self.preserve_bnode_ids = False + self.reset() + + def reset(self): + self.bnode = {} + self.graph=self.store + self.triple=None + self.state=0 + self.lang=None + self.datatype=None + + # ContentHandler methods + + def setDocumentLocator(self, locator): + self.locator = locator + + def startDocument(self): + pass + + def startPrefixMapping(self, prefix, namespace): + pass + + def endPrefixMapping(self, prefix): + pass + + def startElementNS(self, name, qname, attrs): + + if name[0]!=str(TRIXNS): + self.error("Only elements in the TriX namespace are allowed. %s!=%s"%(name[0],TRIXNS)) + + if name[1]=="TriX": + if self.state==0: + self.state=1 + else: + self.error("Unexpected TriX element") + + elif name[1]=="graph": + if self.state==1: + self.state=2 + else: + self.error("Unexpected graph element") + + elif name[1]=="uri": + if self.state==2: + # the context uri + self.state=3 + elif self.state==4: + # part of a triple + pass + else: + self.error("Unexpected uri element") + + elif name[1]=="triple": + if self.state==2: + if self.graph==None: + # anonymous graph, create one with random bnode id + self.graph=Graph(store=self.store.store) + # start of a triple + self.triple=[] + self.state=4 + else: + self.error("Unexpected triple element") + + elif name[1]=="typedLiteral": + if self.state==4: + # part of triple + self.lang=None + self.datatype=None + + try: + self.lang=attrs.getValue((unicode(XMLNS), u"lang")) + except: + # language not required - ignore + pass + try: + self.datatype=attrs.getValueByQName(u"datatype") + except KeyError: + self.error("No required attribute 'datatype'") + else: + self.error("Unexpected typedLiteral element") + + elif name[1]=="plainLiteral": + if self.state==4: + # part of triple + self.lang=None + self.datatype=None + try: + self.lang=attrs.getValue((unicode(XMLNS), u"lang")) + except: + # language not required - ignore + pass + + else: + self.error("Unexpected plainLiteral element") + + elif name[1]=="id": + if self.state==2: + # the context uri + self.state=3 + + elif self.state==4: + # part of triple + pass + else: + self.error("Unexpected id element") + + else: + self.error("Unknown element %s in TriX namespace"%name[1]) + + self.chars="" + + + def endElementNS(self, name, qname): + if name[0]!=str(TRIXNS): + self.error("Only elements in the TriX namespace are allowed. %s!=%s"%(name[0], TRIXNS)) + + if name[1]=="uri": + if self.state==3: + self.graph=Graph(store=self.store.store, identifier=URIRef(self.chars.strip())) + self.state=2 + elif self.state==4: + self.triple+=[URIRef(self.chars.strip())] + else: + self.error("Illegal internal self.state - This should never happen if the SAX parser ensures XML syntax correctness") + + elif name[1]=="id": + if self.state==3: + self.graph=Graph(self.store.store,identifier=self.get_bnode(self.chars.strip())) + self.state=2 + elif self.state==4: + self.triple+=[self.get_bnode(self.chars.strip())] + else: + self.error("Illegal internal self.state - This should never happen if the SAX parser ensures XML syntax correctness") + + elif name[1]=="plainLiteral" or name[1]=="typedLiteral": + if self.state==4: + self.triple+=[Literal(self.chars, lang=self.lang, datatype=self.datatype)] + else: + self.error("This should never happen if the SAX parser ensures XML syntax correctness") + + elif name[1]=="triple": + if self.state==4: + if len(self.triple)!=3: + self.error("Triple has wrong length, got %d elements: %s"%(len(self.triple),self.triple)) + + self.graph.add(self.triple) + #self.store.store.add(self.triple,context=self.graph) + #self.store.addN([self.triple+[self.graph]]) + self.state=2 + else: + self.error("This should never happen if the SAX parser ensures XML syntax correctness") + + elif name[1]=="graph": + self.graph=None + self.state=1 + + elif name[1]=="TriX": + self.state=0 + + else: + self.error("Unexpected close element") + + + def get_bnode(self,label): + if self.preserve_bnode_ids: + bn=BNode(label) + else: + if label in self.bnode: + bn=self.bnode[label] + else: + bn=BNode(label) + self.bnode[label]=bn + return bn + + + def characters(self, content): + self.chars+=content + + + def ignorableWhitespace(self, content): + pass + + def processingInstruction(self, target, data): + pass + + + def error(self, message): + locator = self.locator + info = "%s:%s:%s: " % (locator.getSystemId(), + locator.getLineNumber(), locator.getColumnNumber()) + raise ParserError(info + message) + + +def create_parser(store): + parser = make_parser() + try: + # Workaround for bug in expatreader.py. Needed when + # expatreader is trying to guess a prefix. + parser.start_namespace_decl("xml", "http://www.w3.org/XML/1998/namespace") + except AttributeError: + pass # Not present in Jython (at least) + parser.setFeature(handler.feature_namespaces, 1) + trix = TriXHandler(store) + parser.setContentHandler(trix) + parser.setErrorHandler(ErrorHandler()) + return parser + + +class TriXParser(Parser): + """A parser for TriX. See http://sw.nokia.com/trix/""" + + def __init__(self): + pass + + def parse(self, source, sink, **args): + assert sink.store.context_aware + g=ConjunctiveGraph(store=sink.store) + + self._parser = create_parser(g) + content_handler = self._parser.getContentHandler() + preserve_bnode_ids = args.get("preserve_bnode_ids", None) + if preserve_bnode_ids is not None: + content_handler.preserve_bnode_ids = preserve_bnode_ids + # We're only using it once now + #content_handler.reset() + #self._parser.reset() + self._parser.parse(source) + + + diff --git a/creactistore/_templates/lib/rdflib_/plugins/parsers/trix.py~ b/creactistore/_templates/lib/rdflib_/plugins/parsers/trix.py~ new file mode 100644 index 0000000..0c2e708 --- /dev/null +++ b/creactistore/_templates/lib/rdflib_/plugins/parsers/trix.py~ @@ -0,0 +1,286 @@ +# Copyright (c) 2002, Daniel Krech, http://eikeon.com/ +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above +# copyright notice, this list of conditions and the following +# disclaimer in the documentation and/or other materials provided +# with the distribution. +# +# * Neither the name of Daniel Krech nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +""" +""" +from rdflib.namespace import Namespace +from rdflib.term import URIRef +from rdflib.term import BNode +from rdflib.term import Literal +from rdflib.graph import Graph, ConjunctiveGraph +from rdflib.exceptions import ParserError +from rdflib.parser import Parser + +from xml.sax.saxutils import handler +from xml.sax import make_parser +from xml.sax.handler import ErrorHandler + +__all__ = ['create_parser', 'TriXHandler', 'TriXParser'] + + +TRIXNS=Namespace("http://www.w3.org/2004/03/trix/trix-1/") +XMLNS=Namespace("http://www.w3.org/XML/1998/namespace") + +class TriXHandler(handler.ContentHandler): + """An Sax Handler for TriX. See http://sw.nokia.com/trix/""" + + def __init__(self, store): + self.store = store + self.preserve_bnode_ids = False + self.reset() + + def reset(self): + self.bnode = {} + self.graph=self.store + self.triple=None + self.state=0 + self.lang=None + self.datatype=None + + # ContentHandler methods + + def setDocumentLocator(self, locator): + self.locator = locator + + def startDocument(self): + pass + + def startPrefixMapping(self, prefix, namespace): + pass + + def endPrefixMapping(self, prefix): + pass + + def startElementNS(self, name, qname, attrs): + + if name[0]!=str(TRIXNS): + self.error("Only elements in the TriX namespace are allowed. %s!=%s"%(name[0],TRIXNS)) + + if name[1]=="TriX": + if self.state==0: + self.state=1 + else: + self.error("Unexpected TriX element") + + elif name[1]=="graph": + if self.state==1: + self.state=2 + else: + self.error("Unexpected graph element") + + elif name[1]=="uri": + if self.state==2: + # the context uri + self.state=3 + elif self.state==4: + # part of a triple + pass + else: + self.error("Unexpected uri element") + + elif name[1]=="triple": + if self.state==2: + if self.graph==None: + # anonymous graph, create one with random bnode id + self.graph=Graph(store=self.store.store) + # start of a triple + self.triple=[] + self.state=4 + else: + self.error("Unexpected triple element") + + elif name[1]=="typedLiteral": + if self.state==4: + # part of triple + self.lang=None + self.datatype=None + + try: + self.lang=attrs.getValue((unicode(XMLNS), u"lang")) + except: + # language not required - ignore + pass + try: + self.datatype=attrs.getValueByQName(u"datatype") + except KeyError: + self.error("No required attribute 'datatype'") + else: + self.error("Unexpected typedLiteral element") + + elif name[1]=="plainLiteral": + if self.state==4: + # part of triple + self.lang=None + self.datatype=None + try: + self.lang=attrs.getValue((unicode(XMLNS), u"lang")) + except: + # language not required - ignore + pass + + else: + self.error("Unexpected plainLiteral element") + + elif name[1]=="id": + if self.state==2: + # the context uri + self.state=3 + + elif self.state==4: + # part of triple + pass + else: + self.error("Unexpected id element") + + else: + self.error("Unknown element %s in TriX namespace"%name[1]) + + self.chars="" + + + def endElementNS(self, name, qname): + if name[0]!=str(TRIXNS): + self.error("Only elements in the TriX namespace are allowed. %s!=%s"%(name[0], TRIXNS)) + + if name[1]=="uri": + if self.state==3: + self.graph=Graph(store=self.store.store, identifier=URIRef(self.chars.strip())) + self.state=2 + elif self.state==4: + self.triple+=[URIRef(self.chars.strip())] + else: + self.error("Illegal internal self.state - This should never happen if the SAX parser ensures XML syntax correctness") + + elif name[1]=="id": + if self.state==3: + self.graph=Graph(self.store.store,identifier=self.get_bnode(self.chars.strip())) + self.state=2 + elif self.state==4: + self.triple+=[self.get_bnode(self.chars.strip())] + else: + self.error("Illegal internal self.state - This should never happen if the SAX parser ensures XML syntax correctness") + + elif name[1]=="plainLiteral" or name[1]=="typedLiteral": + if self.state==4: + self.triple+=[Literal(self.chars, lang=self.lang, datatype=self.datatype)] + else: + self.error("This should never happen if the SAX parser ensures XML syntax correctness") + + elif name[1]=="triple": + if self.state==4: + if len(self.triple)!=3: + self.error("Triple has wrong length, got %d elements: %s"%(len(self.triple),self.triple)) + + self.graph.add(self.triple) + #self.store.store.add(self.triple,context=self.graph) + #self.store.addN([self.triple+[self.graph]]) + self.state=2 + else: + self.error("This should never happen if the SAX parser ensures XML syntax correctness") + + elif name[1]=="graph": + self.graph=None + self.state=1 + + elif name[1]=="TriX": + self.state=0 + + else: + self.error("Unexpected close element") + + + def get_bnode(self,label): + if self.preserve_bnode_ids: + bn=BNode(label) + else: + if label in self.bnode: + bn=self.bnode[label] + else: + bn=BNode(label) + self.bnode[label]=bn + return bn + + + def characters(self, content): + self.chars+=content + + + def ignorableWhitespace(self, content): + pass + + def processingInstruction(self, target, data): + pass + + + def error(self, message): + locator = self.locator + info = "%s:%s:%s: " % (locator.getSystemId(), + locator.getLineNumber(), locator.getColumnNumber()) + raise ParserError(info + message) + + +def create_parser(store): + parser = make_parser() + try: + # Workaround for bug in expatreader.py. Needed when + # expatreader is trying to guess a prefix. + parser.start_namespace_decl("xml", "http://www.w3.org/XML/1998/namespace") + except AttributeError: + pass # Not present in Jython (at least) + parser.setFeature(handler.feature_namespaces, 1) + trix = TriXHandler(store) + parser.setContentHandler(trix) + parser.setErrorHandler(ErrorHandler()) + return parser + + +class TriXParser(Parser): + """A parser for TriX. See http://sw.nokia.com/trix/""" + + def __init__(self): + pass + + def parse(self, source, sink, **args): + assert sink.store.context_aware + g=ConjunctiveGraph(store=sink.store) + + self._parser = create_parser(g) + content_handler = self._parser.getContentHandler() + preserve_bnode_ids = args.get("preserve_bnode_ids", None) + if preserve_bnode_ids is not None: + content_handler.preserve_bnode_ids = preserve_bnode_ids + # We're only using it once now + #content_handler.reset() + #self._parser.reset() + self._parser.parse(source) + + + diff --git a/creactistore/_templates/lib/rdflib_/plugins/serializers/__init__.py b/creactistore/_templates/lib/rdflib_/plugins/serializers/__init__.py new file mode 100644 index 0000000..e69de29 --- /dev/null +++ b/creactistore/_templates/lib/rdflib_/plugins/serializers/__init__.py diff --git a/creactistore/_templates/lib/rdflib_/plugins/serializers/n3.py b/creactistore/_templates/lib/rdflib_/plugins/serializers/n3.py new file mode 100644 index 0000000..c7177c1 --- /dev/null +++ b/creactistore/_templates/lib/rdflib_/plugins/serializers/n3.py @@ -0,0 +1,123 @@ +""" +Notation 3 (N3) RDF graph serializer for RDFLib. +""" +from rdflib_.graph import Graph +from rdflib_.namespace import Namespace, OWL +from rdflib_.plugins.serializers.turtle import (TurtleSerializer, + SUBJECT, VERB, OBJECT) + +__all__ = ['N3Serializer'] + +SWAP_LOG = Namespace("http://www.w3.org/2000/10/swap/log#") + + +class N3Serializer(TurtleSerializer): + + short_name = "n3" + + def __init__(self, store, parent=None): + super(N3Serializer, self).__init__(store) + self.keywords.update({ + OWL.sameAs: '=', + SWAP_LOG.implies: '=>' + }) + self.parent = parent + + def reset(self): + super(N3Serializer, self).reset() + self._stores = {} + + def subjectDone(self, subject): + super(N3Serializer, self).subjectDone(subject) + if self.parent: + self.parent.subjectDone(subject) + + def isDone(self, subject): + return (super(N3Serializer, self).isDone(subject) + and (not self.parent or self.parent.isDone(subject))) + + def startDocument(self): + super(N3Serializer, self).startDocument() + #if not isinstance(self.store, N3Store): + # return + # + #all_list = [self.label(var) for var in + # self.store.get_universals(recurse=False)] + #all_list.sort() + #some_list = [self.label(var) for var in + # self.store.get_existentials(recurse=False)] + #some_list.sort() + # + #for var in all_list: + # self.write('\n'+self.indent()+'@forAll %s. '%var) + #for var in some_list: + # self.write('\n'+self.indent()+'@forSome %s. '%var) + # + #if (len(all_list) + len(some_list)) > 0: + # self.write('\n') + + def endDocument(self): + if not self.parent: + super(N3Serializer, self).endDocument() + + def indent(self, modifier=0): + indent = super(N3Serializer, self).indent(modifier) + if self.parent is not None: + indent += self.parent.indent()#modifier) + return indent + + def preprocessTriple(self, triple): + super(N3Serializer, self).preprocessTriple(triple) + if isinstance(triple[0], Graph): + for t in triple[0]: + self.preprocessTriple(t) + if isinstance(triple[2], Graph): + for t in triple[2]: + self.preprocessTriple(t) + + def getQName(self, uri, gen_prefix=True): + qname = None + if self.parent is not None: + qname = self.parent.getQName(uri, gen_prefix) + if qname is None: + qname = super(N3Serializer, self).getQName(uri, gen_prefix) + return qname + + def statement(self, subject): + self.subjectDone(subject) + properties = self.buildPredicateHash(subject) + if len(properties) == 0: + return False + return (self.s_clause(subject) + or super(N3Serializer, self).statement(subject)) + + def path(self, node, position, newline=False): + if not self.p_clause(node, position): + super(N3Serializer, self).path(node, position, newline) + + def s_clause(self, subject): + if isinstance(subject, Graph): + self.write('\n'+self.indent()) + self.p_clause(subject, SUBJECT) + self.predicateList(subject) + self.write(' .') + return True + else: + return False + + def p_clause(self, node, position): + if isinstance(node, Graph): + self.subjectDone(node) + if position is OBJECT: + self.write(' ') + self.write('{') + self.depth += 1 + serializer = N3Serializer(node, parent=self) + serializer.serialize(self.stream) + self.depth -= 1 + self.write(self.indent()+'}') + return True + else: + return False + + diff --git a/creactistore/_templates/lib/rdflib_/plugins/serializers/n3.py~ b/creactistore/_templates/lib/rdflib_/plugins/serializers/n3.py~ new file mode 100644 index 0000000..63faf9d --- /dev/null +++ b/creactistore/_templates/lib/rdflib_/plugins/serializers/n3.py~ @@ -0,0 +1,123 @@ +""" +Notation 3 (N3) RDF graph serializer for RDFLib. +""" +from rdflib.graph import Graph +from rdflib.namespace import Namespace, OWL +from rdflib.plugins.serializers.turtle import (TurtleSerializer, + SUBJECT, VERB, OBJECT) + +__all__ = ['N3Serializer'] + +SWAP_LOG = Namespace("http://www.w3.org/2000/10/swap/log#") + + +class N3Serializer(TurtleSerializer): + + short_name = "n3" + + def __init__(self, store, parent=None): + super(N3Serializer, self).__init__(store) + self.keywords.update({ + OWL.sameAs: '=', + SWAP_LOG.implies: '=>' + }) + self.parent = parent + + def reset(self): + super(N3Serializer, self).reset() + self._stores = {} + + def subjectDone(self, subject): + super(N3Serializer, self).subjectDone(subject) + if self.parent: + self.parent.subjectDone(subject) + + def isDone(self, subject): + return (super(N3Serializer, self).isDone(subject) + and (not self.parent or self.parent.isDone(subject))) + + def startDocument(self): + super(N3Serializer, self).startDocument() + #if not isinstance(self.store, N3Store): + # return + # + #all_list = [self.label(var) for var in + # self.store.get_universals(recurse=False)] + #all_list.sort() + #some_list = [self.label(var) for var in + # self.store.get_existentials(recurse=False)] + #some_list.sort() + # + #for var in all_list: + # self.write('\n'+self.indent()+'@forAll %s. '%var) + #for var in some_list: + # self.write('\n'+self.indent()+'@forSome %s. '%var) + # + #if (len(all_list) + len(some_list)) > 0: + # self.write('\n') + + def endDocument(self): + if not self.parent: + super(N3Serializer, self).endDocument() + + def indent(self, modifier=0): + indent = super(N3Serializer, self).indent(modifier) + if self.parent is not None: + indent += self.parent.indent()#modifier) + return indent + + def preprocessTriple(self, triple): + super(N3Serializer, self).preprocessTriple(triple) + if isinstance(triple[0], Graph): + for t in triple[0]: + self.preprocessTriple(t) + if isinstance(triple[2], Graph): + for t in triple[2]: + self.preprocessTriple(t) + + def getQName(self, uri, gen_prefix=True): + qname = None + if self.parent is not None: + qname = self.parent.getQName(uri, gen_prefix) + if qname is None: + qname = super(N3Serializer, self).getQName(uri, gen_prefix) + return qname + + def statement(self, subject): + self.subjectDone(subject) + properties = self.buildPredicateHash(subject) + if len(properties) == 0: + return False + return (self.s_clause(subject) + or super(N3Serializer, self).statement(subject)) + + def path(self, node, position, newline=False): + if not self.p_clause(node, position): + super(N3Serializer, self).path(node, position, newline) + + def s_clause(self, subject): + if isinstance(subject, Graph): + self.write('\n'+self.indent()) + self.p_clause(subject, SUBJECT) + self.predicateList(subject) + self.write(' .') + return True + else: + return False + + def p_clause(self, node, position): + if isinstance(node, Graph): + self.subjectDone(node) + if position is OBJECT: + self.write(' ') + self.write('{') + self.depth += 1 + serializer = N3Serializer(node, parent=self) + serializer.serialize(self.stream) + self.depth -= 1 + self.write(self.indent()+'}') + return True + else: + return False + + diff --git a/creactistore/_templates/lib/rdflib_/plugins/serializers/nquads.py b/creactistore/_templates/lib/rdflib_/plugins/serializers/nquads.py new file mode 100644 index 0000000..ff0eed0 --- /dev/null +++ b/creactistore/_templates/lib/rdflib_/plugins/serializers/nquads.py @@ -0,0 +1,34 @@ +import warnings + +from rdflib_.serializer import Serializer +from rdflib_.py3compat import b + +from rdflib_.plugins.serializers.nt import _xmlcharref_encode + +__all__ = ['NQuadsSerializer'] + +class NQuadsSerializer(Serializer): + + def __init__(self, store): + if not store.context_aware: + raise Exception("NQuads serialization only makes sense for context-aware stores!") + + super(NQuadsSerializer, self).__init__(store) + + def serialize(self, stream, base=None, encoding=None, **args): + if base is not None: + warnings.warn("NQuadsSerializer does not support base.") + if encoding is not None: + warnings.warn("NQuadsSerializer does not use custom encoding.") + encoding = self.encoding + for context in self.store.contexts(): + for triple in context: + stream.write(_nq_row(triple, context.identifier).encode(encoding, "replace")) + stream.write(b("\n")) + +def _nq_row(triple,context): + return u"%s %s %s %s .\n" % (triple[0].n3(), + triple[1].n3(), + _xmlcharref_encode(triple[2].n3()), + context.n3()) + diff --git a/creactistore/_templates/lib/rdflib_/plugins/serializers/nquads.py~ b/creactistore/_templates/lib/rdflib_/plugins/serializers/nquads.py~ new file mode 100644 index 0000000..29e0dff --- /dev/null +++ b/creactistore/_templates/lib/rdflib_/plugins/serializers/nquads.py~ @@ -0,0 +1,34 @@ +import warnings + +from rdflib.serializer import Serializer +from rdflib.py3compat import b + +from rdflib.plugins.serializers.nt import _xmlcharref_encode + +__all__ = ['NQuadsSerializer'] + +class NQuadsSerializer(Serializer): + + def __init__(self, store): + if not store.context_aware: + raise Exception("NQuads serialization only makes sense for context-aware stores!") + + super(NQuadsSerializer, self).__init__(store) + + def serialize(self, stream, base=None, encoding=None, **args): + if base is not None: + warnings.warn("NQuadsSerializer does not support base.") + if encoding is not None: + warnings.warn("NQuadsSerializer does not use custom encoding.") + encoding = self.encoding + for context in self.store.contexts(): + for triple in context: + stream.write(_nq_row(triple, context.identifier).encode(encoding, "replace")) + stream.write(b("\n")) + +def _nq_row(triple,context): + return u"%s %s %s %s .\n" % (triple[0].n3(), + triple[1].n3(), + _xmlcharref_encode(triple[2].n3()), + context.n3()) + diff --git a/creactistore/_templates/lib/rdflib_/plugins/serializers/nt.py b/creactistore/_templates/lib/rdflib_/plugins/serializers/nt.py new file mode 100644 index 0000000..35a9d53 --- /dev/null +++ b/creactistore/_templates/lib/rdflib_/plugins/serializers/nt.py @@ -0,0 +1,76 @@ +""" +N-Triples RDF graph serializer for RDFLib. +See <http://www.w3.org/TR/rdf-testcases/#ntriples> for details about the +format. +""" +from rdflib_.serializer import Serializer +from rdflib_.py3compat import b +import warnings + +__all__ = ['NTSerializer'] + +class NTSerializer(Serializer): + """ + Serializes RDF graphs to NTriples format. + """ + + def serialize(self, stream, base=None, encoding=None, **args): + if base is not None: + warnings.warn("NTSerializer does not support base.") + if encoding is not None: + warnings.warn("NTSerializer does not use custom encoding.") + encoding = self.encoding + for triple in self.store: + stream.write(_nt_row(triple).encode(encoding, "replace")) + stream.write(b("\n")) + + +def _nt_row(triple): + return u"%s %s %s .\n" % (triple[0].n3(), + triple[1].n3(), + _xmlcharref_encode(triple[2].n3())) + +# from <http://code.activestate.com/recipes/303668/> +def _xmlcharref_encode(unicode_data, encoding="ascii"): + """Emulate Python 2.3's 'xmlcharrefreplace' encoding error handler.""" + chars = [] + + # nothing to do about xmlchars, but replace newlines with escapes: + unicode_data=unicode_data.replace("\n","\\n") + if unicode_data.startswith('"""'): + # Updated with Bernhard Schandl's patch... + # unicode_data = unicode_data.replace('"""', '"') # original + + last_triplequote_pos = unicode_data.rfind('"""') + payload = unicode_data[3:last_triplequote_pos] + trail = unicode_data[last_triplequote_pos+3:] + + # fix three-quotes encoding + payload = payload.replace('\\"""', '"""') + + # corner case: if string ends with " it is already encoded. + # so we need to de-escape it before it will be re-escaped in the next step. + if payload.endswith('\\"'): + payload = payload.replace('\\"', '"') + + # escape quotes in payload + payload = payload.replace('"', '\\"') + + # reconstruct result using single quotes + unicode_data = '"%s"%s' % (payload, trail) + + # Step through the unicode_data string one character at a time in + # order to catch unencodable characters: + for char in unicode_data: + try: + char.encode(encoding, 'strict') + except UnicodeError: + if ord(char) <= 0xFFFF: + chars.append('\\u%04X' % ord(char)) + else: + chars.append('\\U%08X' % ord(char)) + else: + chars.append(char) + + return ''.join(chars) + diff --git a/creactistore/_templates/lib/rdflib_/plugins/serializers/nt.py~ b/creactistore/_templates/lib/rdflib_/plugins/serializers/nt.py~ new file mode 100644 index 0000000..bbbe720 --- /dev/null +++ b/creactistore/_templates/lib/rdflib_/plugins/serializers/nt.py~ @@ -0,0 +1,76 @@ +""" +N-Triples RDF graph serializer for RDFLib. +See <http://www.w3.org/TR/rdf-testcases/#ntriples> for details about the +format. +""" +from rdflib.serializer import Serializer +from rdflib.py3compat import b +import warnings + +__all__ = ['NTSerializer'] + +class NTSerializer(Serializer): + """ + Serializes RDF graphs to NTriples format. + """ + + def serialize(self, stream, base=None, encoding=None, **args): + if base is not None: + warnings.warn("NTSerializer does not support base.") + if encoding is not None: + warnings.warn("NTSerializer does not use custom encoding.") + encoding = self.encoding + for triple in self.store: + stream.write(_nt_row(triple).encode(encoding, "replace")) + stream.write(b("\n")) + + +def _nt_row(triple): + return u"%s %s %s .\n" % (triple[0].n3(), + triple[1].n3(), + _xmlcharref_encode(triple[2].n3())) + +# from <http://code.activestate.com/recipes/303668/> +def _xmlcharref_encode(unicode_data, encoding="ascii"): + """Emulate Python 2.3's 'xmlcharrefreplace' encoding error handler.""" + chars = [] + + # nothing to do about xmlchars, but replace newlines with escapes: + unicode_data=unicode_data.replace("\n","\\n") + if unicode_data.startswith('"""'): + # Updated with Bernhard Schandl's patch... + # unicode_data = unicode_data.replace('"""', '"') # original + + last_triplequote_pos = unicode_data.rfind('"""') + payload = unicode_data[3:last_triplequote_pos] + trail = unicode_data[last_triplequote_pos+3:] + + # fix three-quotes encoding + payload = payload.replace('\\"""', '"""') + + # corner case: if string ends with " it is already encoded. + # so we need to de-escape it before it will be re-escaped in the next step. + if payload.endswith('\\"'): + payload = payload.replace('\\"', '"') + + # escape quotes in payload + payload = payload.replace('"', '\\"') + + # reconstruct result using single quotes + unicode_data = '"%s"%s' % (payload, trail) + + # Step through the unicode_data string one character at a time in + # order to catch unencodable characters: + for char in unicode_data: + try: + char.encode(encoding, 'strict') + except UnicodeError: + if ord(char) <= 0xFFFF: + chars.append('\\u%04X' % ord(char)) + else: + chars.append('\\U%08X' % ord(char)) + else: + chars.append(char) + + return ''.join(chars) + diff --git a/creactistore/_templates/lib/rdflib_/plugins/serializers/rdfxml.py b/creactistore/_templates/lib/rdflib_/plugins/serializers/rdfxml.py new file mode 100644 index 0000000..002f3e5 --- /dev/null +++ b/creactistore/_templates/lib/rdflib_/plugins/serializers/rdfxml.py @@ -0,0 +1,282 @@ +from __future__ import generators + +from rdflib_.plugins.serializers.xmlwriter import XMLWriter + +from rdflib_.namespace import Namespace, RDF, RDFS, split_uri + +from rdflib_.term import URIRef, Literal, BNode +from rdflib_.util import first, more_than +from rdflib_.collection import Collection +from rdflib_.serializer import Serializer + +from rdflib_.exceptions import Error + +from rdflib_.py3compat import b + +from xml.sax.saxutils import quoteattr, escape + +__all__ = ['fix', 'XMLSerializer', 'PrettyXMLSerializer'] + +class XMLSerializer(Serializer): + + def __init__(self, store): + super(XMLSerializer, self).__init__(store) + + def __bindings(self): + store = self.store + nm = store.namespace_manager + bindings = {} + for predicate in set(store.predicates()): + prefix, namespace, name = nm.compute_qname(predicate) + bindings[prefix] = URIRef(namespace) + RDFNS = URIRef("http://www.w3.org/1999/02/22-rdf-syntax-ns#") + if "rdf" in bindings: + assert bindings["rdf"]==RDFNS + else: + bindings["rdf"] = RDFNS + for prefix, namespace in bindings.iteritems(): + yield prefix, namespace + + + def serialize(self, stream, base=None, encoding=None, **args): + self.base = base + self.__stream = stream + self.__serialized = {} + encoding = self.encoding + self.write = write = lambda uni: stream.write(uni.encode(encoding, 'replace')) + + # startDocument + write('<?xml version="1.0" encoding="%s"?>\n' % self.encoding) + + # startRDF + write('<rdf:RDF\n') + # If provided, write xml:base attribute for the RDF + if "xml_base" in args: + write(' xml:base="%s"\n' % args['xml_base']) + # TODO: assert(namespaces["http://www.w3.org/1999/02/22-rdf-syntax-ns#"]=='rdf') + bindings = list(self.__bindings()) + bindings.sort() + for prefix, namespace in bindings: + if prefix: + write(' xmlns:%s="%s"\n' % (prefix, namespace)) + else: + write(' xmlns="%s"\n' % namespace) + write('>\n') + + # write out triples by subject + for subject in self.store.subjects(): + self.subject(subject, 1) + + # endRDF + write( "</rdf:RDF>\n" ) + + # Set to None so that the memory can get garbage collected. + #self.__serialized = None + del self.__serialized + + + def subject(self, subject, depth=1): + if not subject in self.__serialized: + self.__serialized[subject] = 1 + if isinstance(subject, (BNode,URIRef)): + write = self.write + indent = " " * depth + element_name = "rdf:Description" + if isinstance(subject, BNode): + write( '%s<%s rdf:nodeID="%s"' % + (indent, element_name, subject)) + else: + uri = quoteattr(self.relativize(subject)) + write( "%s<%s rdf:about=%s" % (indent, element_name, uri)) + if (subject, None, None) in self.store: + write( ">\n" ) + for predicate, object in self.store.predicate_objects(subject): + self.predicate(predicate, object, depth+1) + write( "%s</%s>\n" % (indent, element_name)) + else: + write( "/>\n" ) + + def predicate(self, predicate, object, depth=1): + write = self.write + indent = " " * depth + qname = self.store.namespace_manager.qname(predicate) + if isinstance(object, Literal): + attributes = "" + if object.language: + attributes += ' xml:lang="%s"'%object.language + + if object.datatype: + attributes += ' rdf:datatype="%s"'%object.datatype + + write("%s<%s%s>%s</%s>\n" % + (indent, qname, attributes, + escape(object), qname) ) + else: + if isinstance(object, BNode): + write('%s<%s rdf:nodeID="%s"/>\n' % + (indent, qname, object)) + else: + write("%s<%s rdf:resource=%s/>\n" % + (indent, qname, quoteattr(self.relativize(object)))) + + + +XMLLANG = "http://www.w3.org/XML/1998/namespacelang" +XMLBASE = "http://www.w3.org/XML/1998/namespacebase" +OWL_NS = Namespace('http://www.w3.org/2002/07/owl#') + +# TODO: +def fix(val): + "strip off _: from nodeIDs... as they are not valid NCNames" + if val.startswith("_:"): + return val[2:] + else: + return val + + +class PrettyXMLSerializer(Serializer): + + def __init__(self, store, max_depth=3): + super(PrettyXMLSerializer, self).__init__(store) + self.forceRDFAbout=set() + + def serialize(self, stream, base=None, encoding=None, **args): + self.__serialized = {} + store = self.store + self.base = base + self.max_depth = args.get("max_depth", 3) + assert self.max_depth>0, "max_depth must be greater than 0" + + self.nm = nm = store.namespace_manager + self.writer = writer = XMLWriter(stream, nm, encoding) + + namespaces = {} + possible = set(store.predicates()).union(store.objects(None, RDF.type)) + for predicate in possible: + prefix, namespace, local = nm.compute_qname(predicate) + namespaces[prefix] = namespace + namespaces["rdf"] = "http://www.w3.org/1999/02/22-rdf-syntax-ns#" + writer.push(RDF.RDF) + if "xml_base" in args: + writer.attribute(XMLBASE, args["xml_base"]) + writer.namespaces(namespaces.iteritems()) + + # Write out subjects that can not be inline + for subject in store.subjects(): + if (None, None, subject) in store: + if (subject, None, subject) in store: + self.subject(subject, 1) + else: + self.subject(subject, 1) + + # write out anything that has not yet been reached + # write out BNodes last (to ensure they can be inlined where possible) + bnodes=set() + for subject in store.subjects(): + if isinstance(subject,BNode): + bnodes.add(subject) + continue + self.subject(subject, 1) + #now serialize only those BNodes that have not been serialized yet + for bnode in bnodes: + if bnode not in self.__serialized: + self.subject(subject, 1) + writer.pop(RDF.RDF) + stream.write(b("\n")) + + # Set to None so that the memory can get garbage collected. + self.__serialized = None + + + + def subject(self, subject, depth=1): + store = self.store + writer = self.writer + if subject in self.forceRDFAbout: + writer.push(RDF.Description) + writer.attribute(RDF.about, self.relativize(subject)) + writer.pop(RDF.Description) + self.forceRDFAbout.remove(subject) + elif not subject in self.__serialized: + self.__serialized[subject] = 1 + type = first(store.objects(subject, RDF.type)) + try: + self.nm.qname(type) + except: + type = None + element = type or RDF.Description + writer.push(element) + if isinstance(subject, BNode): + def subj_as_obj_more_than(ceil): + return True + # more_than(store.triples((None, None, subject)), ceil) + + #here we only include BNode labels if they are referenced + #more than once (this reduces the use of redundant BNode identifiers) + if subj_as_obj_more_than(1): + writer.attribute(RDF.nodeID, fix(subject)) + else: + writer.attribute(RDF.about, self.relativize(subject)) + if (subject, None, None) in store: + for predicate, object in store.predicate_objects(subject): + if not (predicate==RDF.type and object==type): + self.predicate(predicate, object, depth+1) + writer.pop(element) + elif subject in self.forceRDFAbout: + writer.push(RDF.Description) + writer.attribute(RDF.about, self.relativize(subject)) + writer.pop(RDF.Description) + self.forceRDFAbout.remove(subject) + + def predicate(self, predicate, object, depth=1): + writer = self.writer + store = self.store + writer.push(predicate) + if isinstance(object, Literal): + attributes = "" + if object.language: + writer.attribute(XMLLANG, object.language) + if object.datatype: + writer.attribute(RDF.datatype, object.datatype) + writer.text(object) + elif object in self.__serialized or not (object, None, None) in store: + if isinstance(object, BNode): + if more_than(store.triples((None, None, object)), 0): + writer.attribute(RDF.nodeID, fix(object)) + else: + writer.attribute(RDF.resource, self.relativize(object)) + else: + if first(store.objects(object, RDF.first)): # may not have type RDF.List + collection = object + self.__serialized[object] = 1 + # TODO: warn that any assertions on object other than + # RDF.first and RDF.rest are ignored... including RDF.List + writer.attribute(RDF.parseType, "Collection") + col=Collection(store,object) + for item in col: + if isinstance(item,URIRef): + self.forceRDFAbout.add(item) + self.subject(item) + if not isinstance(item,URIRef): + self.__serialized[item] = 1 + else: + if first(store.triples_choices((object, + RDF.type, + [OWL_NS.Class,RDFS.Class]))) and\ + isinstance(object, URIRef): + writer.attribute(RDF.resource, self.relativize(object)) + elif depth<=self.max_depth: + self.subject(object, depth+1) + elif isinstance(object, BNode): + if not object in self.__serialized and \ + (object, None, None) in store and \ + len(list(store.subjects(object=object)))==1: + #inline blank nodes if they haven't been serialized yet and are + #only referenced once (regardless of depth) + self.subject(object, depth+1) + else: + writer.attribute(RDF.nodeID, fix(object)) + else: + writer.attribute(RDF.resource, self.relativize(object)) + writer.pop(predicate) + diff --git a/creactistore/_templates/lib/rdflib_/plugins/serializers/rdfxml.py~ b/creactistore/_templates/lib/rdflib_/plugins/serializers/rdfxml.py~ new file mode 100644 index 0000000..d72c27e --- /dev/null +++ b/creactistore/_templates/lib/rdflib_/plugins/serializers/rdfxml.py~ @@ -0,0 +1,282 @@ +from __future__ import generators + +from rdflib.plugins.serializers.xmlwriter import XMLWriter + +from rdflib.namespace import Namespace, RDF, RDFS, split_uri + +from rdflib.term import URIRef, Literal, BNode +from rdflib.util import first, more_than +from rdflib.collection import Collection +from rdflib.serializer import Serializer + +from rdflib.exceptions import Error + +from rdflib.py3compat import b + +from xml.sax.saxutils import quoteattr, escape + +__all__ = ['fix', 'XMLSerializer', 'PrettyXMLSerializer'] + +class XMLSerializer(Serializer): + + def __init__(self, store): + super(XMLSerializer, self).__init__(store) + + def __bindings(self): + store = self.store + nm = store.namespace_manager + bindings = {} + for predicate in set(store.predicates()): + prefix, namespace, name = nm.compute_qname(predicate) + bindings[prefix] = URIRef(namespace) + RDFNS = URIRef("http://www.w3.org/1999/02/22-rdf-syntax-ns#") + if "rdf" in bindings: + assert bindings["rdf"]==RDFNS + else: + bindings["rdf"] = RDFNS + for prefix, namespace in bindings.iteritems(): + yield prefix, namespace + + + def serialize(self, stream, base=None, encoding=None, **args): + self.base = base + self.__stream = stream + self.__serialized = {} + encoding = self.encoding + self.write = write = lambda uni: stream.write(uni.encode(encoding, 'replace')) + + # startDocument + write('<?xml version="1.0" encoding="%s"?>\n' % self.encoding) + + # startRDF + write('<rdf:RDF\n') + # If provided, write xml:base attribute for the RDF + if "xml_base" in args: + write(' xml:base="%s"\n' % args['xml_base']) + # TODO: assert(namespaces["http://www.w3.org/1999/02/22-rdf-syntax-ns#"]=='rdf') + bindings = list(self.__bindings()) + bindings.sort() + for prefix, namespace in bindings: + if prefix: + write(' xmlns:%s="%s"\n' % (prefix, namespace)) + else: + write(' xmlns="%s"\n' % namespace) + write('>\n') + + # write out triples by subject + for subject in self.store.subjects(): + self.subject(subject, 1) + + # endRDF + write( "</rdf:RDF>\n" ) + + # Set to None so that the memory can get garbage collected. + #self.__serialized = None + del self.__serialized + + + def subject(self, subject, depth=1): + if not subject in self.__serialized: + self.__serialized[subject] = 1 + if isinstance(subject, (BNode,URIRef)): + write = self.write + indent = " " * depth + element_name = "rdf:Description" + if isinstance(subject, BNode): + write( '%s<%s rdf:nodeID="%s"' % + (indent, element_name, subject)) + else: + uri = quoteattr(self.relativize(subject)) + write( "%s<%s rdf:about=%s" % (indent, element_name, uri)) + if (subject, None, None) in self.store: + write( ">\n" ) + for predicate, object in self.store.predicate_objects(subject): + self.predicate(predicate, object, depth+1) + write( "%s</%s>\n" % (indent, element_name)) + else: + write( "/>\n" ) + + def predicate(self, predicate, object, depth=1): + write = self.write + indent = " " * depth + qname = self.store.namespace_manager.qname(predicate) + if isinstance(object, Literal): + attributes = "" + if object.language: + attributes += ' xml:lang="%s"'%object.language + + if object.datatype: + attributes += ' rdf:datatype="%s"'%object.datatype + + write("%s<%s%s>%s</%s>\n" % + (indent, qname, attributes, + escape(object), qname) ) + else: + if isinstance(object, BNode): + write('%s<%s rdf:nodeID="%s"/>\n' % + (indent, qname, object)) + else: + write("%s<%s rdf:resource=%s/>\n" % + (indent, qname, quoteattr(self.relativize(object)))) + + + +XMLLANG = "http://www.w3.org/XML/1998/namespacelang" +XMLBASE = "http://www.w3.org/XML/1998/namespacebase" +OWL_NS = Namespace('http://www.w3.org/2002/07/owl#') + +# TODO: +def fix(val): + "strip off _: from nodeIDs... as they are not valid NCNames" + if val.startswith("_:"): + return val[2:] + else: + return val + + +class PrettyXMLSerializer(Serializer): + + def __init__(self, store, max_depth=3): + super(PrettyXMLSerializer, self).__init__(store) + self.forceRDFAbout=set() + + def serialize(self, stream, base=None, encoding=None, **args): + self.__serialized = {} + store = self.store + self.base = base + self.max_depth = args.get("max_depth", 3) + assert self.max_depth>0, "max_depth must be greater than 0" + + self.nm = nm = store.namespace_manager + self.writer = writer = XMLWriter(stream, nm, encoding) + + namespaces = {} + possible = set(store.predicates()).union(store.objects(None, RDF.type)) + for predicate in possible: + prefix, namespace, local = nm.compute_qname(predicate) + namespaces[prefix] = namespace + namespaces["rdf"] = "http://www.w3.org/1999/02/22-rdf-syntax-ns#" + writer.push(RDF.RDF) + if "xml_base" in args: + writer.attribute(XMLBASE, args["xml_base"]) + writer.namespaces(namespaces.iteritems()) + + # Write out subjects that can not be inline + for subject in store.subjects(): + if (None, None, subject) in store: + if (subject, None, subject) in store: + self.subject(subject, 1) + else: + self.subject(subject, 1) + + # write out anything that has not yet been reached + # write out BNodes last (to ensure they can be inlined where possible) + bnodes=set() + for subject in store.subjects(): + if isinstance(subject,BNode): + bnodes.add(subject) + continue + self.subject(subject, 1) + #now serialize only those BNodes that have not been serialized yet + for bnode in bnodes: + if bnode not in self.__serialized: + self.subject(subject, 1) + writer.pop(RDF.RDF) + stream.write(b("\n")) + + # Set to None so that the memory can get garbage collected. + self.__serialized = None + + + + def subject(self, subject, depth=1): + store = self.store + writer = self.writer + if subject in self.forceRDFAbout: + writer.push(RDF.Description) + writer.attribute(RDF.about, self.relativize(subject)) + writer.pop(RDF.Description) + self.forceRDFAbout.remove(subject) + elif not subject in self.__serialized: + self.__serialized[subject] = 1 + type = first(store.objects(subject, RDF.type)) + try: + self.nm.qname(type) + except: + type = None + element = type or RDF.Description + writer.push(element) + if isinstance(subject, BNode): + def subj_as_obj_more_than(ceil): + return True + # more_than(store.triples((None, None, subject)), ceil) + + #here we only include BNode labels if they are referenced + #more than once (this reduces the use of redundant BNode identifiers) + if subj_as_obj_more_than(1): + writer.attribute(RDF.nodeID, fix(subject)) + else: + writer.attribute(RDF.about, self.relativize(subject)) + if (subject, None, None) in store: + for predicate, object in store.predicate_objects(subject): + if not (predicate==RDF.type and object==type): + self.predicate(predicate, object, depth+1) + writer.pop(element) + elif subject in self.forceRDFAbout: + writer.push(RDF.Description) + writer.attribute(RDF.about, self.relativize(subject)) + writer.pop(RDF.Description) + self.forceRDFAbout.remove(subject) + + def predicate(self, predicate, object, depth=1): + writer = self.writer + store = self.store + writer.push(predicate) + if isinstance(object, Literal): + attributes = "" + if object.language: + writer.attribute(XMLLANG, object.language) + if object.datatype: + writer.attribute(RDF.datatype, object.datatype) + writer.text(object) + elif object in self.__serialized or not (object, None, None) in store: + if isinstance(object, BNode): + if more_than(store.triples((None, None, object)), 0): + writer.attribute(RDF.nodeID, fix(object)) + else: + writer.attribute(RDF.resource, self.relativize(object)) + else: + if first(store.objects(object, RDF.first)): # may not have type RDF.List + collection = object + self.__serialized[object] = 1 + # TODO: warn that any assertions on object other than + # RDF.first and RDF.rest are ignored... including RDF.List + writer.attribute(RDF.parseType, "Collection") + col=Collection(store,object) + for item in col: + if isinstance(item,URIRef): + self.forceRDFAbout.add(item) + self.subject(item) + if not isinstance(item,URIRef): + self.__serialized[item] = 1 + else: + if first(store.triples_choices((object, + RDF.type, + [OWL_NS.Class,RDFS.Class]))) and\ + isinstance(object, URIRef): + writer.attribute(RDF.resource, self.relativize(object)) + elif depth<=self.max_depth: + self.subject(object, depth+1) + elif isinstance(object, BNode): + if not object in self.__serialized and \ + (object, None, None) in store and \ + len(list(store.subjects(object=object)))==1: + #inline blank nodes if they haven't been serialized yet and are + #only referenced once (regardless of depth) + self.subject(object, depth+1) + else: + writer.attribute(RDF.nodeID, fix(object)) + else: + writer.attribute(RDF.resource, self.relativize(object)) + writer.pop(predicate) + diff --git a/creactistore/_templates/lib/rdflib_/plugins/serializers/trix.py b/creactistore/_templates/lib/rdflib_/plugins/serializers/trix.py new file mode 100644 index 0000000..c89d049 --- /dev/null +++ b/creactistore/_templates/lib/rdflib_/plugins/serializers/trix.py @@ -0,0 +1,72 @@ +from rdflib_.serializer import Serializer +from rdflib_.plugins.serializers.xmlwriter import XMLWriter + +from rdflib_.term import URIRef, Literal, BNode +from rdflib_.namespace import Namespace + +from rdflib_.graph import Graph, ConjunctiveGraph + +from rdflib_.py3compat import b + +__all__ = ['TriXSerializer'] + +## TODO: MOve this somewhere central +TRIXNS=Namespace("http://www.w3.org/2004/03/trix/trix-1/") +XMLNS=Namespace("http://www.w3.org/XML/1998/namespace") + +class TriXSerializer(Serializer): + def __init__(self, store): + super(TriXSerializer, self).__init__(store) + + def serialize(self, stream, base=None, encoding=None, **args): + + nm=self.store.namespace_manager + + self.writer=XMLWriter(stream, nm, encoding, extra_ns={"": TRIXNS}) + + self.writer.push(TRIXNS[u"TriX"]) + self.writer.namespaces() + + if isinstance(self.store, ConjunctiveGraph): + for subgraph in self.store.contexts(): + self._writeGraph(subgraph) + elif isinstance(self.store, Graph): + self._writeGraph(self.store) + else: + raise Exception("Unknown graph type: "+type(self.store)) + + self.writer.pop() + stream.write(b("\n")) + + + def _writeGraph(self, graph): + self.writer.push(TRIXNS[u"graph"]) + if isinstance(graph.identifier, URIRef): + self.writer.element(TRIXNS[u"uri"], content=unicode(graph.identifier)) + + for triple in graph.triples((None,None,None)): + self._writeTriple(triple) + self.writer.pop() + + def _writeTriple(self, triple): + self.writer.push(TRIXNS[u"triple"]) + for component in triple: + if isinstance(component, URIRef): + self.writer.element(TRIXNS[u"uri"], + content=unicode(component)) + elif isinstance(component, BNode): + self.writer.element(TRIXNS[u"id"], + content=unicode(component)) + elif isinstance(component, Literal): + if component.datatype: + self.writer.element(TRIXNS[u"typedLiteral"], + content=unicode(component), + attributes={ TRIXNS[u"datatype"]: unicode(component.datatype) }) + elif component.language: + self.writer.element(TRIXNS[u"plainLiteral"], + content=unicode(component), + attributes={ XMLNS[u"lang"]: unicode(component.language) }) + else: + self.writer.element(TRIXNS[u"plainLiteral"], + content=unicode(component)) + self.writer.pop() diff --git a/creactistore/_templates/lib/rdflib_/plugins/serializers/trix.py~ b/creactistore/_templates/lib/rdflib_/plugins/serializers/trix.py~ new file mode 100644 index 0000000..c7115c0 --- /dev/null +++ b/creactistore/_templates/lib/rdflib_/plugins/serializers/trix.py~ @@ -0,0 +1,72 @@ +from rdflib.serializer import Serializer +from rdflib.plugins.serializers.xmlwriter import XMLWriter + +from rdflib.term import URIRef, Literal, BNode +from rdflib.namespace import Namespace + +from rdflib.graph import Graph, ConjunctiveGraph + +from rdflib.py3compat import b + +__all__ = ['TriXSerializer'] + +## TODO: MOve this somewhere central +TRIXNS=Namespace("http://www.w3.org/2004/03/trix/trix-1/") +XMLNS=Namespace("http://www.w3.org/XML/1998/namespace") + +class TriXSerializer(Serializer): + def __init__(self, store): + super(TriXSerializer, self).__init__(store) + + def serialize(self, stream, base=None, encoding=None, **args): + + nm=self.store.namespace_manager + + self.writer=XMLWriter(stream, nm, encoding, extra_ns={"": TRIXNS}) + + self.writer.push(TRIXNS[u"TriX"]) + self.writer.namespaces() + + if isinstance(self.store, ConjunctiveGraph): + for subgraph in self.store.contexts(): + self._writeGraph(subgraph) + elif isinstance(self.store, Graph): + self._writeGraph(self.store) + else: + raise Exception("Unknown graph type: "+type(self.store)) + + self.writer.pop() + stream.write(b("\n")) + + + def _writeGraph(self, graph): + self.writer.push(TRIXNS[u"graph"]) + if isinstance(graph.identifier, URIRef): + self.writer.element(TRIXNS[u"uri"], content=unicode(graph.identifier)) + + for triple in graph.triples((None,None,None)): + self._writeTriple(triple) + self.writer.pop() + + def _writeTriple(self, triple): + self.writer.push(TRIXNS[u"triple"]) + for component in triple: + if isinstance(component, URIRef): + self.writer.element(TRIXNS[u"uri"], + content=unicode(component)) + elif isinstance(component, BNode): + self.writer.element(TRIXNS[u"id"], + content=unicode(component)) + elif isinstance(component, Literal): + if component.datatype: + self.writer.element(TRIXNS[u"typedLiteral"], + content=unicode(component), + attributes={ TRIXNS[u"datatype"]: unicode(component.datatype) }) + elif component.language: + self.writer.element(TRIXNS[u"plainLiteral"], + content=unicode(component), + attributes={ XMLNS[u"lang"]: unicode(component.language) }) + else: + self.writer.element(TRIXNS[u"plainLiteral"], + content=unicode(component)) + self.writer.pop() diff --git a/creactistore/_templates/lib/rdflib_/plugins/serializers/turtle.py b/creactistore/_templates/lib/rdflib_/plugins/serializers/turtle.py new file mode 100644 index 0000000..bba4ed9 --- /dev/null +++ b/creactistore/_templates/lib/rdflib_/plugins/serializers/turtle.py @@ -0,0 +1,364 @@ +""" +Turtle RDF graph serializer for RDFLib. +See <http://www.w3.org/TeamSubmission/turtle/> for syntax specification. +""" +from rdflib_.term import BNode, Literal, URIRef + +from rdflib_.exceptions import Error + +from rdflib_.serializer import Serializer + +from rdflib_.namespace import RDF, RDFS + +__all__ = ['RecursiveSerializer', 'TurtleSerializer'] + +class RecursiveSerializer(Serializer): + + topClasses = [RDFS.Class] + predicateOrder = [RDF.type, RDFS.label] + maxDepth = 10 + indentString = u" " + + def __init__(self, store): + + super(RecursiveSerializer, self).__init__(store) + self.stream = None + self.reset() + + def addNamespace(self, prefix, uri): + self.namespaces[prefix] = uri + + def checkSubject(self, subject): + """Check to see if the subject should be serialized yet""" + if ((self.isDone(subject)) + or (subject not in self._subjects) + or ((subject in self._topLevels) and (self.depth > 1)) + or (isinstance(subject, URIRef) and (self.depth >= self.maxDepth)) + ): + return False + return True + + def isDone(self, subject): + """Return true if subject is serialized""" + return subject in self._serialized + + def orderSubjects(self): + seen = {} + subjects = [] + + for classURI in self.topClasses: + members = list(self.store.subjects(RDF.type, classURI)) + members.sort() + + for member in members: + subjects.append(member) + self._topLevels[member] = True + seen[member] = True + + recursable = [(isinstance(subject,BNode), self.refCount(subject), subject) for subject in self._subjects + if subject not in seen] + + recursable.sort() + subjects.extend([subject for (isbnode, refs, subject) in recursable]) + + return subjects + + def preprocess(self): + for triple in self.store.triples((None,None,None)): + self.preprocessTriple(triple) + + def preprocessTriple(self, (s,p,o)): + references = self.refCount(o) + 1 + self._references[o] = references + self._subjects[s] = True + + def refCount(self, node): + """Return the number of times this node has been referenced in the object position""" + return self._references.get(node, 0) + + def reset(self): + self.depth = 0 + self.lists = {} + self.namespaces = {} + self._references = {} + self._serialized = {} + self._subjects = {} + self._topLevels = {} + + def buildPredicateHash(self, subject): + """Build a hash key by predicate to a list of objects for the given subject""" + properties = {} + for s,p,o in self.store.triples((subject, None, None)): + oList = properties.get(p, []) + oList.append(o) + properties[p] = oList + return properties + + def sortProperties(self, properties): + """Take a hash from predicate uris to lists of values. + Sort the lists of values. Return a sorted list of properties.""" + # Sort object lists + for prop, objects in properties.items(): + objects.sort() + + # Make sorted list of properties + propList = [] + seen = {} + for prop in self.predicateOrder: + if (prop in properties) and (prop not in seen): + propList.append(prop) + seen[prop] = True + props = properties.keys() + props.sort() + for prop in props: + if prop not in seen: + propList.append(prop) + seen[prop] = True + return propList + + def subjectDone(self, subject): + """Mark a subject as done.""" + self._serialized[subject] = True + + def indent(self, modifier=0): + """Returns indent string multiplied by the depth""" + return (self.depth+modifier)*self.indentString + + def write(self, text): + """Write text in given encoding.""" + self.stream.write(text.encode(self.encoding, 'replace')) + + +SUBJECT = 0 +VERB = 1 +OBJECT = 2 + +_GEN_QNAME_FOR_DT = False +_SPACIOUS_OUTPUT = False + + +class TurtleSerializer(RecursiveSerializer): + + short_name = "turtle" + indentString = ' ' + + def __init__(self, store): + super(TurtleSerializer, self).__init__(store) + self.keywords = { + RDF.type: 'a' + } + self.reset() + self.stream = None + self._spacious = _SPACIOUS_OUTPUT + + def reset(self): + super(TurtleSerializer, self).reset() + self._shortNames = {} + self._started = False + + def serialize(self, stream, base=None, encoding=None, spacious=None, **args): + self.reset() + self.stream = stream + self.base = base + + if spacious is not None: + self._spacious = spacious + # In newer rdflibs these are always in the namespace manager + #self.store.prefix_mapping('rdf', RDFNS) + #self.store.prefix_mapping('rdfs', RDFSNS) + + self.preprocess() + subjects_list = self.orderSubjects() + + self.startDocument() + + firstTime = True + for subject in subjects_list: + if self.isDone(subject): + continue + if firstTime: + firstTime = False + if self.statement(subject) and not firstTime: + self.write('\n') + + self.endDocument() + stream.write(u"\n".encode('ascii')) + + def preprocessTriple(self, triple): + super(TurtleSerializer, self).preprocessTriple(triple) + for i, node in enumerate(triple): + if node in self.keywords: + continue + # Don't use generated prefixes for subjects and objects + self.getQName(node, gen_prefix=(i==VERB)) + if isinstance(node, Literal) and node.datatype: + self.getQName(node.datatype, gen_prefix=_GEN_QNAME_FOR_DT) + p = triple[1] + if isinstance(p, BNode): + self._references[p] = self.refCount(p) + 1 + + def getQName(self, uri, gen_prefix=True): + if not isinstance(uri, URIRef): + return None + + parts=None + + try: + parts = self.store.compute_qname(uri, generate=gen_prefix) + except: + + # is the uri a namespace in itself? + pfx = self.store.store.prefix(uri) + + if pfx is not None: + parts = (pfx, uri, '') + else: + # nothing worked + return None + + prefix, namespace, local = parts + # Local parts with '.' will mess up serialization + if '.' in local: + return None + self.addNamespace(prefix, namespace) + return u'%s:%s' % (prefix, local) + + def startDocument(self): + self._started = True + ns_list = sorted(self.namespaces.items()) + for prefix, uri in ns_list: + self.write(self.indent()+'@prefix %s: <%s> .\n' % (prefix, uri)) + if ns_list and self._spacious: + self.write('\n') + + def endDocument(self): + if self._spacious: + self.write('\n') + + def statement(self, subject): + self.subjectDone(subject) + return self.s_squared(subject) or self.s_default(subject) + + def s_default(self, subject): + self.write('\n'+self.indent()) + self.path(subject, SUBJECT) + self.predicateList(subject) + self.write(' .') + return True + + def s_squared(self, subject): + if (self.refCount(subject) > 0) or not isinstance(subject, BNode): + return False + self.write('\n'+self.indent()+'[]') + #self.depth+=1 + self.predicateList(subject) + #self.depth-=1 + self.write(' .') + return True + + def path(self, node, position, newline=False): + if not (self.p_squared(node, position, newline) + or self.p_default(node, position, newline)): + raise Error("Cannot serialize node '%s'"%(node, )) + + def p_default(self, node, position, newline=False): + if position != SUBJECT and not newline: + self.write(' ') + self.write(self.label(node, position)) + return True + + def label(self, node, position): + if node == RDF.nil: + return '()' + if position is VERB and node in self.keywords: + return self.keywords[node] + if isinstance(node, Literal): + return node._literal_n3(use_plain=True, + qname_callback=lambda dt: + self.getQName(dt, _GEN_QNAME_FOR_DT)) + else: + return self.getQName(node, position==VERB) or node.n3() + + def p_squared(self, node, position, newline=False): + if (not isinstance(node, BNode) + or node in self._serialized + or self.refCount(node) > 1 + or position == SUBJECT): + return False + + if not newline: + self.write(' ') + + if self.isValidList(node): + # this is a list + self.write('(') + self.depth += 1#2 + self.doList(node) + self.depth -= 1#2 + self.write(' )') + else: + self.subjectDone(node) + self.depth += 2 + #self.write('[\n' + self.indent()) + self.write('[') + self.depth -= 1 + #self.predicateList(node, newline=True) + self.predicateList(node, newline=False) + #self.write('\n' + self.indent() + ']') + self.write(' ]') + self.depth -= 1 + + return True + + def isValidList(self, l): + """ + Checks if l is a valid RDF list, i.e. no nodes have other properties. + """ + try: + if not self.store.value(l, RDF.first): + return False + except: + return False + while l: + if l != RDF.nil and len( + list(self.store.predicate_objects(l))) != 2: + return False + l = self.store.value(l, RDF.rest) + return True + + def doList(self,l): + while l: + item = self.store.value(l, RDF.first) + if item: + self.path(item, OBJECT) + self.subjectDone(l) + l = self.store.value(l, RDF.rest) + + def predicateList(self, subject, newline=False): + properties = self.buildPredicateHash(subject) + propList = self.sortProperties(properties) + if len(propList) == 0: + return + self.verb(propList[0], newline=newline) + self.objectList(properties[propList[0]]) + for predicate in propList[1:]: + self.write(';\n' + self.indent(1)) + self.verb(predicate, newline=True) + self.objectList(properties[predicate]) + + def verb(self, node, newline=False): + self.path(node, VERB, newline) + + def objectList(self, objects): + count = len(objects) + if count == 0: + return + depthmod = (count == 1) and 0 or 1 + self.depth += depthmod + self.path(objects[0], OBJECT) + for obj in objects[1:]: + self.write(',\n' + self.indent(1)) + self.path(obj, OBJECT, newline=True) + self.depth -= depthmod + + diff --git a/creactistore/_templates/lib/rdflib_/plugins/serializers/turtle.py~ b/creactistore/_templates/lib/rdflib_/plugins/serializers/turtle.py~ new file mode 100644 index 0000000..6878013 --- /dev/null +++ b/creactistore/_templates/lib/rdflib_/plugins/serializers/turtle.py~ @@ -0,0 +1,364 @@ +""" +Turtle RDF graph serializer for RDFLib. +See <http://www.w3.org/TeamSubmission/turtle/> for syntax specification. +""" +from rdflib.term import BNode, Literal, URIRef + +from rdflib.exceptions import Error + +from rdflib.serializer import Serializer + +from rdflib.namespace import RDF, RDFS + +__all__ = ['RecursiveSerializer', 'TurtleSerializer'] + +class RecursiveSerializer(Serializer): + + topClasses = [RDFS.Class] + predicateOrder = [RDF.type, RDFS.label] + maxDepth = 10 + indentString = u" " + + def __init__(self, store): + + super(RecursiveSerializer, self).__init__(store) + self.stream = None + self.reset() + + def addNamespace(self, prefix, uri): + self.namespaces[prefix] = uri + + def checkSubject(self, subject): + """Check to see if the subject should be serialized yet""" + if ((self.isDone(subject)) + or (subject not in self._subjects) + or ((subject in self._topLevels) and (self.depth > 1)) + or (isinstance(subject, URIRef) and (self.depth >= self.maxDepth)) + ): + return False + return True + + def isDone(self, subject): + """Return true if subject is serialized""" + return subject in self._serialized + + def orderSubjects(self): + seen = {} + subjects = [] + + for classURI in self.topClasses: + members = list(self.store.subjects(RDF.type, classURI)) + members.sort() + + for member in members: + subjects.append(member) + self._topLevels[member] = True + seen[member] = True + + recursable = [(isinstance(subject,BNode), self.refCount(subject), subject) for subject in self._subjects + if subject not in seen] + + recursable.sort() + subjects.extend([subject for (isbnode, refs, subject) in recursable]) + + return subjects + + def preprocess(self): + for triple in self.store.triples((None,None,None)): + self.preprocessTriple(triple) + + def preprocessTriple(self, (s,p,o)): + references = self.refCount(o) + 1 + self._references[o] = references + self._subjects[s] = True + + def refCount(self, node): + """Return the number of times this node has been referenced in the object position""" + return self._references.get(node, 0) + + def reset(self): + self.depth = 0 + self.lists = {} + self.namespaces = {} + self._references = {} + self._serialized = {} + self._subjects = {} + self._topLevels = {} + + def buildPredicateHash(self, subject): + """Build a hash key by predicate to a list of objects for the given subject""" + properties = {} + for s,p,o in self.store.triples((subject, None, None)): + oList = properties.get(p, []) + oList.append(o) + properties[p] = oList + return properties + + def sortProperties(self, properties): + """Take a hash from predicate uris to lists of values. + Sort the lists of values. Return a sorted list of properties.""" + # Sort object lists + for prop, objects in properties.items(): + objects.sort() + + # Make sorted list of properties + propList = [] + seen = {} + for prop in self.predicateOrder: + if (prop in properties) and (prop not in seen): + propList.append(prop) + seen[prop] = True + props = properties.keys() + props.sort() + for prop in props: + if prop not in seen: + propList.append(prop) + seen[prop] = True + return propList + + def subjectDone(self, subject): + """Mark a subject as done.""" + self._serialized[subject] = True + + def indent(self, modifier=0): + """Returns indent string multiplied by the depth""" + return (self.depth+modifier)*self.indentString + + def write(self, text): + """Write text in given encoding.""" + self.stream.write(text.encode(self.encoding, 'replace')) + + +SUBJECT = 0 +VERB = 1 +OBJECT = 2 + +_GEN_QNAME_FOR_DT = False +_SPACIOUS_OUTPUT = False + + +class TurtleSerializer(RecursiveSerializer): + + short_name = "turtle" + indentString = ' ' + + def __init__(self, store): + super(TurtleSerializer, self).__init__(store) + self.keywords = { + RDF.type: 'a' + } + self.reset() + self.stream = None + self._spacious = _SPACIOUS_OUTPUT + + def reset(self): + super(TurtleSerializer, self).reset() + self._shortNames = {} + self._started = False + + def serialize(self, stream, base=None, encoding=None, spacious=None, **args): + self.reset() + self.stream = stream + self.base = base + + if spacious is not None: + self._spacious = spacious + # In newer rdflibs these are always in the namespace manager + #self.store.prefix_mapping('rdf', RDFNS) + #self.store.prefix_mapping('rdfs', RDFSNS) + + self.preprocess() + subjects_list = self.orderSubjects() + + self.startDocument() + + firstTime = True + for subject in subjects_list: + if self.isDone(subject): + continue + if firstTime: + firstTime = False + if self.statement(subject) and not firstTime: + self.write('\n') + + self.endDocument() + stream.write(u"\n".encode('ascii')) + + def preprocessTriple(self, triple): + super(TurtleSerializer, self).preprocessTriple(triple) + for i, node in enumerate(triple): + if node in self.keywords: + continue + # Don't use generated prefixes for subjects and objects + self.getQName(node, gen_prefix=(i==VERB)) + if isinstance(node, Literal) and node.datatype: + self.getQName(node.datatype, gen_prefix=_GEN_QNAME_FOR_DT) + p = triple[1] + if isinstance(p, BNode): + self._references[p] = self.refCount(p) + 1 + + def getQName(self, uri, gen_prefix=True): + if not isinstance(uri, URIRef): + return None + + parts=None + + try: + parts = self.store.compute_qname(uri, generate=gen_prefix) + except: + + # is the uri a namespace in itself? + pfx = self.store.store.prefix(uri) + + if pfx is not None: + parts = (pfx, uri, '') + else: + # nothing worked + return None + + prefix, namespace, local = parts + # Local parts with '.' will mess up serialization + if '.' in local: + return None + self.addNamespace(prefix, namespace) + return u'%s:%s' % (prefix, local) + + def startDocument(self): + self._started = True + ns_list = sorted(self.namespaces.items()) + for prefix, uri in ns_list: + self.write(self.indent()+'@prefix %s: <%s> .\n' % (prefix, uri)) + if ns_list and self._spacious: + self.write('\n') + + def endDocument(self): + if self._spacious: + self.write('\n') + + def statement(self, subject): + self.subjectDone(subject) + return self.s_squared(subject) or self.s_default(subject) + + def s_default(self, subject): + self.write('\n'+self.indent()) + self.path(subject, SUBJECT) + self.predicateList(subject) + self.write(' .') + return True + + def s_squared(self, subject): + if (self.refCount(subject) > 0) or not isinstance(subject, BNode): + return False + self.write('\n'+self.indent()+'[]') + #self.depth+=1 + self.predicateList(subject) + #self.depth-=1 + self.write(' .') + return True + + def path(self, node, position, newline=False): + if not (self.p_squared(node, position, newline) + or self.p_default(node, position, newline)): + raise Error("Cannot serialize node '%s'"%(node, )) + + def p_default(self, node, position, newline=False): + if position != SUBJECT and not newline: + self.write(' ') + self.write(self.label(node, position)) + return True + + def label(self, node, position): + if node == RDF.nil: + return '()' + if position is VERB and node in self.keywords: + return self.keywords[node] + if isinstance(node, Literal): + return node._literal_n3(use_plain=True, + qname_callback=lambda dt: + self.getQName(dt, _GEN_QNAME_FOR_DT)) + else: + return self.getQName(node, position==VERB) or node.n3() + + def p_squared(self, node, position, newline=False): + if (not isinstance(node, BNode) + or node in self._serialized + or self.refCount(node) > 1 + or position == SUBJECT): + return False + + if not newline: + self.write(' ') + + if self.isValidList(node): + # this is a list + self.write('(') + self.depth += 1#2 + self.doList(node) + self.depth -= 1#2 + self.write(' )') + else: + self.subjectDone(node) + self.depth += 2 + #self.write('[\n' + self.indent()) + self.write('[') + self.depth -= 1 + #self.predicateList(node, newline=True) + self.predicateList(node, newline=False) + #self.write('\n' + self.indent() + ']') + self.write(' ]') + self.depth -= 1 + + return True + + def isValidList(self, l): + """ + Checks if l is a valid RDF list, i.e. no nodes have other properties. + """ + try: + if not self.store.value(l, RDF.first): + return False + except: + return False + while l: + if l != RDF.nil and len( + list(self.store.predicate_objects(l))) != 2: + return False + l = self.store.value(l, RDF.rest) + return True + + def doList(self,l): + while l: + item = self.store.value(l, RDF.first) + if item: + self.path(item, OBJECT) + self.subjectDone(l) + l = self.store.value(l, RDF.rest) + + def predicateList(self, subject, newline=False): + properties = self.buildPredicateHash(subject) + propList = self.sortProperties(properties) + if len(propList) == 0: + return + self.verb(propList[0], newline=newline) + self.objectList(properties[propList[0]]) + for predicate in propList[1:]: + self.write(';\n' + self.indent(1)) + self.verb(predicate, newline=True) + self.objectList(properties[predicate]) + + def verb(self, node, newline=False): + self.path(node, VERB, newline) + + def objectList(self, objects): + count = len(objects) + if count == 0: + return + depthmod = (count == 1) and 0 or 1 + self.depth += depthmod + self.path(objects[0], OBJECT) + for obj in objects[1:]: + self.write(',\n' + self.indent(1)) + self.path(obj, OBJECT, newline=True) + self.depth -= depthmod + + diff --git a/creactistore/_templates/lib/rdflib_/plugins/serializers/xmlwriter.py b/creactistore/_templates/lib/rdflib_/plugins/serializers/xmlwriter.py new file mode 100644 index 0000000..d36af4b --- /dev/null +++ b/creactistore/_templates/lib/rdflib_/plugins/serializers/xmlwriter.py @@ -0,0 +1,103 @@ +import codecs +from xml.sax.saxutils import quoteattr, escape + +__all__ = ['XMLWriter'] + +class XMLWriter(object): + def __init__(self, stream, namespace_manager, encoding=None, decl=1, extra_ns={}): + encoding = encoding or 'utf-8' + encoder, decoder, stream_reader, stream_writer = codecs.lookup(encoding) + self.stream = stream = stream_writer(stream) + if decl: + stream.write('<?xml version="1.0" encoding="%s"?>' % encoding) + self.element_stack = [] + self.nm = namespace_manager + self.extra_ns=extra_ns + self.closed = True + + def __get_indent(self): + return " " * len(self.element_stack) + indent = property(__get_indent) + + def __close_start_tag(self): + if not self.closed: # TODO: + self.closed = True + self.stream.write(">") + + def push(self, uri): + self.__close_start_tag() + write = self.stream.write + write("\n") + write(self.indent) + write("<%s" % self.qname(uri)) + self.element_stack.append(uri) + self.closed = False + self.parent = False + + def pop(self, uri=None): + top = self.element_stack.pop() + if uri: + assert uri == top + write = self.stream.write + if not self.closed: + self.closed = True + write("/>") + else: + if self.parent: + write("\n") + write(self.indent) + write("</%s>" % self.qname(top)) + self.parent = True + + def element(self, uri, content, attributes={}): + """Utility method for adding a complete simple element""" + self.push(uri) + for k, v in attributes.iteritems(): + self.attribute(k,v) + self.text(content) + self.pop() + + def namespaces(self, namespaces=None): + if not namespaces: + namespaces=self.nm.namespaces() + + write = self.stream.write + write("\n") + for prefix, namespace in namespaces: + if prefix: + write(' xmlns:%s="%s"\n' % (prefix, namespace)) + else: + write(' xmlns="%s"\n' % namespace) + + for prefix, namespace in self.extra_ns.items(): + if prefix: + write(' xmlns:%s="%s"\n' % (prefix, namespace)) + else: + write(' xmlns="%s"\n' % namespace) + + + def attribute(self, uri, value): + write = self.stream.write + write(" %s=%s" % (self.qname(uri), quoteattr(value))) + + def text(self, text): + self.__close_start_tag() + if "<" in text and ">" in text and not "]]>" in text: + self.stream.write("<![CDATA[") + self.stream.write(text) + self.stream.write("]]>") + else: + self.stream.write(escape(text)) + + def qname(self,uri): + """Compute qname for a uri using our extra namespaces, + or the given namespace manager""" + + for pre,ns in self.extra_ns.items(): + if uri.startswith(ns): + if pre!="": + return ":".join(pre,uri[len(ns):]) + else: + return uri[len(ns):] + + return self.nm.qname(uri) diff --git a/creactistore/_templates/lib/rdflib_/plugins/serializers/xmlwriter.py~ b/creactistore/_templates/lib/rdflib_/plugins/serializers/xmlwriter.py~ new file mode 100644 index 0000000..d36af4b --- /dev/null +++ b/creactistore/_templates/lib/rdflib_/plugins/serializers/xmlwriter.py~ @@ -0,0 +1,103 @@ +import codecs +from xml.sax.saxutils import quoteattr, escape + +__all__ = ['XMLWriter'] + +class XMLWriter(object): + def __init__(self, stream, namespace_manager, encoding=None, decl=1, extra_ns={}): + encoding = encoding or 'utf-8' + encoder, decoder, stream_reader, stream_writer = codecs.lookup(encoding) + self.stream = stream = stream_writer(stream) + if decl: + stream.write('<?xml version="1.0" encoding="%s"?>' % encoding) + self.element_stack = [] + self.nm = namespace_manager + self.extra_ns=extra_ns + self.closed = True + + def __get_indent(self): + return " " * len(self.element_stack) + indent = property(__get_indent) + + def __close_start_tag(self): + if not self.closed: # TODO: + self.closed = True + self.stream.write(">") + + def push(self, uri): + self.__close_start_tag() + write = self.stream.write + write("\n") + write(self.indent) + write("<%s" % self.qname(uri)) + self.element_stack.append(uri) + self.closed = False + self.parent = False + + def pop(self, uri=None): + top = self.element_stack.pop() + if uri: + assert uri == top + write = self.stream.write + if not self.closed: + self.closed = True + write("/>") + else: + if self.parent: + write("\n") + write(self.indent) + write("</%s>" % self.qname(top)) + self.parent = True + + def element(self, uri, content, attributes={}): + """Utility method for adding a complete simple element""" + self.push(uri) + for k, v in attributes.iteritems(): + self.attribute(k,v) + self.text(content) + self.pop() + + def namespaces(self, namespaces=None): + if not namespaces: + namespaces=self.nm.namespaces() + + write = self.stream.write + write("\n") + for prefix, namespace in namespaces: + if prefix: + write(' xmlns:%s="%s"\n' % (prefix, namespace)) + else: + write(' xmlns="%s"\n' % namespace) + + for prefix, namespace in self.extra_ns.items(): + if prefix: + write(' xmlns:%s="%s"\n' % (prefix, namespace)) + else: + write(' xmlns="%s"\n' % namespace) + + + def attribute(self, uri, value): + write = self.stream.write + write(" %s=%s" % (self.qname(uri), quoteattr(value))) + + def text(self, text): + self.__close_start_tag() + if "<" in text and ">" in text and not "]]>" in text: + self.stream.write("<![CDATA[") + self.stream.write(text) + self.stream.write("]]>") + else: + self.stream.write(escape(text)) + + def qname(self,uri): + """Compute qname for a uri using our extra namespaces, + or the given namespace manager""" + + for pre,ns in self.extra_ns.items(): + if uri.startswith(ns): + if pre!="": + return ":".join(pre,uri[len(ns):]) + else: + return uri[len(ns):] + + return self.nm.qname(uri) diff --git a/creactistore/_templates/lib/rdflib_/plugins/sleepycat.py b/creactistore/_templates/lib/rdflib_/plugins/sleepycat.py new file mode 100644 index 0000000..282d4de --- /dev/null +++ b/creactistore/_templates/lib/rdflib_/plugins/sleepycat.py @@ -0,0 +1,531 @@ +from rdflib_.store import Store, VALID_STORE, CORRUPTED_STORE, NO_STORE, UNKNOWN +from rdflib_.term import URIRef +from rdflib_.py3compat import b +def bb(u): return u.encode('utf-8') + +try: + from bsddb import db + has_bsddb = True +except ImportError: + try: + from bsddb3 import db + has_bsddb = True + except ImportError: + has_bsddb = False +from os import mkdir +from os.path import exists, abspath +from urllib import pathname2url +from threading import Thread + +import logging +_logger = logging.getLogger(__name__) + +__all__ = ['Sleepycat'] + +class Sleepycat(Store): + context_aware = True + formula_aware = True + transaction_aware = False + db_env = None + + def __init__(self, configuration=None, identifier=None): + if not has_bsddb: raise Exception("Unable to import bsddb/bsddb3, store is unusable.") + self.__open = False + self.__identifier = identifier + super(Sleepycat, self).__init__(configuration) + self._loads = self.node_pickler.loads + self._dumps = self.node_pickler.dumps + + def __get_identifier(self): + return self.__identifier + identifier = property(__get_identifier) + + def _init_db_environment(self, homeDir, create=True): + envsetflags = db.DB_CDB_ALLDB + envflags = db.DB_INIT_MPOOL | db.DB_INIT_CDB | db.DB_THREAD + if not exists(homeDir): + if create==True: + mkdir(homeDir) # TODO: implement create method and refactor this to it + self.create(homeDir) + else: + return NO_STORE + db_env = db.DBEnv() + db_env.set_cachesize(0, 1024*1024*50) # TODO + #db_env.set_lg_max(1024*1024) + db_env.set_flags(envsetflags, 1) + db_env.open(homeDir, envflags | db.DB_CREATE) + return db_env + + def is_open(self): + return self.__open + + def open(self, path, create=True): + if not has_bsddb: return NO_STORE + homeDir = path + + if self.__identifier is None: + self.__identifier = URIRef(pathname2url(abspath(homeDir))) + + db_env = self._init_db_environment(homeDir, create) + if db_env == NO_STORE: + return NO_STORE + self.db_env = db_env + self.__open = True + + dbname = None + dbtype = db.DB_BTREE + # auto-commit ensures that the open-call commits when transactions are enabled + dbopenflags = db.DB_THREAD + if self.transaction_aware == True: + dbopenflags |= db.DB_AUTO_COMMIT + + dbmode = 0660 + dbsetflags = 0 + + # create and open the DBs + self.__indicies = [None,] * 3 + self.__indicies_info = [None,] * 3 + for i in xrange(0, 3): + index_name = to_key_func(i)((b("s"), b("p"), b("o")), b("c")).decode() + index = db.DB(db_env) + index.set_flags(dbsetflags) + index.open(index_name, dbname, dbtype, dbopenflags|db.DB_CREATE, dbmode) + self.__indicies[i] = index + self.__indicies_info[i] = (index, to_key_func(i), from_key_func(i)) + + lookup = {} + for i in xrange(0, 8): + results = [] + for start in xrange(0, 3): + score = 1 + len = 0 + for j in xrange(start, start+3): + if i & (1<<(j%3)): + score = score << 1 + len += 1 + else: + break + tie_break = 2-start + results.append(((score, tie_break), start, len)) + + results.sort() + score, start, len = results[-1] + + def get_prefix_func(start, end): + def get_prefix(triple, context): + if context is None: + yield "" + else: + yield context + i = start + while i<end: + yield triple[i%3] + i += 1 + yield "" + return get_prefix + + lookup[i] = (self.__indicies[start], get_prefix_func(start, start + len), from_key_func(start), results_from_key_func(start, self._from_string)) + + + self.__lookup_dict = lookup + + self.__contexts = db.DB(db_env) + self.__contexts.set_flags(dbsetflags) + self.__contexts.open("contexts", dbname, dbtype, dbopenflags|db.DB_CREATE, dbmode) + + self.__namespace = db.DB(db_env) + self.__namespace.set_flags(dbsetflags) + self.__namespace.open("namespace", dbname, dbtype, dbopenflags|db.DB_CREATE, dbmode) + + self.__prefix = db.DB(db_env) + self.__prefix.set_flags(dbsetflags) + self.__prefix.open("prefix", dbname, dbtype, dbopenflags|db.DB_CREATE, dbmode) + + self.__k2i = db.DB(db_env) + self.__k2i.set_flags(dbsetflags) + self.__k2i.open("k2i", dbname, db.DB_HASH, dbopenflags|db.DB_CREATE, dbmode) + + self.__i2k = db.DB(db_env) + self.__i2k.set_flags(dbsetflags) + self.__i2k.open("i2k", dbname, db.DB_RECNO, dbopenflags|db.DB_CREATE, dbmode) + + self.__needs_sync = False + t = Thread(target=self.__sync_run) + t.setDaemon(True) + t.start() + self.__sync_thread = t + return VALID_STORE + + + def __sync_run(self): + from time import sleep, time + try: + min_seconds, max_seconds = 10, 300 + while self.__open: + if self.__needs_sync: + t0 = t1 = time() + self.__needs_sync = False + while self.__open: + sleep(.1) + if self.__needs_sync: + t1 = time() + self.__needs_sync = False + if time()-t1 > min_seconds or time()-t0 > max_seconds: + self.__needs_sync = False + _logger.debug("sync") + self.sync() + break + else: + sleep(1) + except Exception, e: + _logger.exception(e) + + def sync(self): + if self.__open: + for i in self.__indicies: + i.sync() + self.__contexts.sync() + self.__namespace.sync() + self.__prefix.sync() + self.__i2k.sync() + self.__k2i.sync() + + def close(self, commit_pending_transaction=False): + self.__open = False + self.__sync_thread.join() + for i in self.__indicies: + i.close() + self.__contexts.close() + self.__namespace.close() + self.__prefix.close() + self.__i2k.close() + self.__k2i.close() + self.db_env.close() + + def add(self, (subject, predicate, object), context, quoted=False, txn=None): + """\ + Add a triple to the store of triples. + """ + assert self.__open, "The Store must be open." + assert context!=self, "Can not add triple directly to store" + Store.add(self, (subject, predicate, object), context, quoted) + + _to_string = self._to_string + + s = _to_string(subject, txn=txn) + p = _to_string(predicate, txn=txn) + o = _to_string(object, txn=txn) + c = _to_string(context, txn=txn) + + cspo, cpos, cosp = self.__indicies + + value = cspo.get(bb("%s^%s^%s^%s^" % (c, s, p, o)), txn=txn) + if value is None: + self.__contexts.put(bb(c), "", txn=txn) + + contexts_value = cspo.get(bb("%s^%s^%s^%s^" % ("", s, p, o)), txn=txn) or b("") + contexts = set(contexts_value.split(b("^"))) + contexts.add(bb(c)) + contexts_value = b("^").join(contexts) + assert contexts_value!=None + + cspo.put(bb("%s^%s^%s^%s^" % (c, s, p, o)), "", txn=txn) + cpos.put(bb("%s^%s^%s^%s^" % (c, p, o, s)), "", txn=txn) + cosp.put(bb("%s^%s^%s^%s^" % (c, o, s, p)), "", txn=txn) + if not quoted: + cspo.put(bb("%s^%s^%s^%s^" % ("", s, p, o)), contexts_value, txn=txn) + cpos.put(bb("%s^%s^%s^%s^" % ("", p, o, s)), contexts_value, txn=txn) + cosp.put(bb("%s^%s^%s^%s^" % ("", o, s, p)), contexts_value, txn=txn) + + self.__needs_sync = True + + def __remove(self, (s, p, o), c, quoted=False, txn=None): + cspo, cpos, cosp = self.__indicies + contexts_value = cspo.get(b("^").join([b(""), s, p, o, b("")]), txn=txn) or b("") + contexts = set(contexts_value.split(b("^"))) + contexts.discard(c) + contexts_value = b("^").join(contexts) + for i, _to_key, _from_key in self.__indicies_info: + i.delete(_to_key((s, p, o), c), txn=txn) + if not quoted: + if contexts_value: + for i, _to_key, _from_key in self.__indicies_info: + i.put(_to_key((s, p, o), b("")), contexts_value, txn=txn) + else: + for i, _to_key, _from_key in self.__indicies_info: + try: + i.delete(_to_key((s, p, o), b("")), txn=txn) + except db.DBNotFoundError, e: + pass # TODO: is it okay to ignore these? + + def remove(self, (subject, predicate, object), context, txn=None): + assert self.__open, "The Store must be open." + Store.remove(self, (subject, predicate, object), context) + _to_string = self._to_string + + if context is not None: + if context == self: + context = None + + if subject is not None and predicate is not None and object is not None and context is not None: + s = _to_string(subject, txn=txn) + p = _to_string(predicate, txn=txn) + o = _to_string(object, txn=txn) + c = _to_string(context, txn=txn) + value = self.__indicies[0].get(bb("%s^%s^%s^%s^" % (c, s, p, o)), txn=txn) + if value is not None: + self.__remove((bb(s), bb(p), bb(o)), bb(c), txn=txn) + self.__needs_sync = True + else: + cspo, cpos, cosp = self.__indicies + index, prefix, from_key, results_from_key = self.__lookup((subject, predicate, object), context, txn=txn) + + cursor = index.cursor(txn=txn) + try: + current = cursor.set_range(prefix) + needs_sync = True + except db.DBNotFoundError: + current = None + needs_sync = False + cursor.close() + while current: + key, value = current + cursor = index.cursor(txn=txn) + try: + cursor.set_range(key) + # Hack to stop 2to3 converting this to next(cursor) + current = getattr(cursor, 'next')() + except db.DBNotFoundError: + current = None + cursor.close() + if key.startswith(prefix): + c, s, p, o = from_key(key) + if context is None: + contexts_value = index.get(key, txn=txn) or b("") + contexts = set(contexts_value.split(b("^"))) # remove triple from all non quoted contexts + contexts.add(b("")) # and from the conjunctive index + for c in contexts: + for i, _to_key, _ in self.__indicies_info: + i.delete(_to_key((s, p, o), c), txn=txn) + else: + self.__remove((s, p, o), c, txn=txn) + else: + break + + if context is not None: + if subject is None and predicate is None and object is None: + # TODO: also if context becomes empty and not just on remove((None, None, None), c) + try: + self.__contexts.delete(bb(_to_string(context, txn=txn)), txn=txn) + except db.DBNotFoundError, e: + pass + + self.__needs_sync = needs_sync + + def triples(self, (subject, predicate, object), context=None, txn=None): + """A generator over all the triples matching """ + assert self.__open, "The Store must be open." + + if context is not None: + if context == self: + context = None + + _from_string = self._from_string + index, prefix, from_key, results_from_key = self.__lookup((subject, predicate, object), context, txn=txn) + + cursor = index.cursor(txn=txn) + try: + current = cursor.set_range(prefix) + except db.DBNotFoundError: + current = None + cursor.close() + while current: + key, value = current + cursor = index.cursor(txn=txn) + try: + cursor.set_range(key) + # Cheap hack so 2to3 doesn't convert to next(cursor) + current = getattr(cursor, 'next')() + except db.DBNotFoundError: + current = None + cursor.close() + if key and key.startswith(prefix): + contexts_value = index.get(key, txn=txn) + yield results_from_key(key, subject, predicate, object, contexts_value) + else: + break + + def __len__(self, context=None): + assert self.__open, "The Store must be open." + if context is not None: + if context == self: + context = None + + if context is None: + prefix = b("^") + else: + prefix = bb("%s^" % self._to_string(context)) + + index = self.__indicies[0] + cursor = index.cursor() + current = cursor.set_range(prefix) + count = 0 + while current: + key, value = current + if key.startswith(prefix): + count +=1 + # Hack to stop 2to3 converting this to next(cursor) + current = getattr(cursor, 'next')() + else: + break + cursor.close() + return count + + def bind(self, prefix, namespace): + prefix = prefix.encode("utf-8") + namespace = namespace.encode("utf-8") + bound_prefix = self.__prefix.get(namespace) + if bound_prefix: + self.__namespace.delete(bound_prefix) + self.__prefix[namespace] = prefix + self.__namespace[prefix] = namespace + + def namespace(self, prefix): + prefix = prefix.encode("utf-8") + ns = self.__namespace.get(prefix, None) + if ns is not None: + return ns.decode('utf-8') + return None + + def prefix(self, namespace): + namespace = namespace.encode("utf-8") + prefix = self.__prefix.get(namespace, None) + if prefix is not None: + return prefix.decode('utf-8') + return None + + def namespaces(self): + cursor = self.__namespace.cursor() + results = [] + current = cursor.first() + while current: + prefix, namespace = current + results.append((prefix.decode('utf-8'), namespace.decode('utf-8'))) + # Hack to stop 2to3 converting this to next(cursor) + current = getattr(cursor, 'next')() + cursor.close() + for prefix, namespace in results: + yield prefix, URIRef(namespace) + + def contexts(self, triple=None): + _from_string = self._from_string + _to_string = self._to_string + + if triple: + s, p, o = triple + s = _to_string(s) + p = _to_string(p) + o = _to_string(o) + contexts = self.__indicies[0].get(bb("%s^%s^%s^%s^" % ("", s, p, o))) + if contexts: + for c in contexts.split(b("^")): + if c: + yield _from_string(c) + else: + index = self.__contexts + cursor = index.cursor() + current = cursor.first() + cursor.close() + while current: + key, value = current + context = _from_string(key) + yield context + cursor = index.cursor() + try: + cursor.set_range(key) + # Hack to stop 2to3 converting this to next(cursor) + current = getattr(cursor, 'next')() + except db.DBNotFoundError: + current = None + cursor.close() + + def _from_string(self, i): + k = self.__i2k.get(int(i)) + return self._loads(k) + + def _to_string(self, term, txn=None): + k = self._dumps(term) + i = self.__k2i.get(k, txn=txn) + if i is None: + # weird behavoir from bsddb not taking a txn as a keyword argument + # for append + if self.transaction_aware: + i = "%s" % self.__i2k.append(k, txn) + else: + i = "%s" % self.__i2k.append(k) + + self.__k2i.put(k, i, txn=txn) + else: + i = i.decode() + return i + + def __lookup(self, (subject, predicate, object), context, txn=None): + _to_string = self._to_string + if context is not None: + context = _to_string(context, txn=txn) + i = 0 + if subject is not None: + i += 1 + subject = _to_string(subject, txn=txn) + if predicate is not None: + i += 2 + predicate = _to_string(predicate, txn=txn) + if object is not None: + i += 4 + object = _to_string(object, txn=txn) + index, prefix_func, from_key, results_from_key = self.__lookup_dict[i] + #print (subject, predicate, object), context, prefix_func, index #DEBUG + prefix = bb("^".join(prefix_func((subject, predicate, object), context))) + return index, prefix, from_key, results_from_key + + +def to_key_func(i): + def to_key(triple, context): + "Takes a string; returns key" + return b("^").join((context, triple[i%3], triple[(i+1)%3], triple[(i+2)%3], b(""))) # "" to tac on the trailing ^ + return to_key + +def from_key_func(i): + def from_key(key): + "Takes a key; returns string" + parts = key.split(b("^")) + return parts[0], parts[(3-i+0)%3+1], parts[(3-i+1)%3+1], parts[(3-i+2)%3+1] + return from_key + +def results_from_key_func(i, from_string): + def from_key(key, subject, predicate, object, contexts_value): + "Takes a key and subject, predicate, object; returns tuple for yield" + parts = key.split(b("^")) + if subject is None: + # TODO: i & 1: # dis assemble and/or measure to see which is faster + # subject is None or i & 1 + s = from_string(parts[(3-i+0)%3+1]) + else: + s = subject + if predicate is None:#i & 2: + p = from_string(parts[(3-i+1)%3+1]) + else: + p = predicate + if object is None:#i & 4: + o = from_string(parts[(3-i+2)%3+1]) + else: + o = object + return (s, p, o), (from_string(c) for c in contexts_value.split(b("^")) if c) + return from_key + +def readable_index(i): + s, p, o = "?" * 3 + if i & 1: s = "s" + if i & 2: p = "p" + if i & 4: o = "o" + return "%s,%s,%s" % (s, p, o) diff --git a/creactistore/_templates/lib/rdflib_/plugins/sleepycat.py~ b/creactistore/_templates/lib/rdflib_/plugins/sleepycat.py~ new file mode 100644 index 0000000..67fcc17 --- /dev/null +++ b/creactistore/_templates/lib/rdflib_/plugins/sleepycat.py~ @@ -0,0 +1,531 @@ +from rdflib.store import Store, VALID_STORE, CORRUPTED_STORE, NO_STORE, UNKNOWN +from rdflib.term import URIRef +from rdflib.py3compat import b +def bb(u): return u.encode('utf-8') + +try: + from bsddb import db + has_bsddb = True +except ImportError: + try: + from bsddb3 import db + has_bsddb = True + except ImportError: + has_bsddb = False +from os import mkdir +from os.path import exists, abspath +from urllib import pathname2url +from threading import Thread + +import logging +_logger = logging.getLogger(__name__) + +__all__ = ['Sleepycat'] + +class Sleepycat(Store): + context_aware = True + formula_aware = True + transaction_aware = False + db_env = None + + def __init__(self, configuration=None, identifier=None): + if not has_bsddb: raise Exception("Unable to import bsddb/bsddb3, store is unusable.") + self.__open = False + self.__identifier = identifier + super(Sleepycat, self).__init__(configuration) + self._loads = self.node_pickler.loads + self._dumps = self.node_pickler.dumps + + def __get_identifier(self): + return self.__identifier + identifier = property(__get_identifier) + + def _init_db_environment(self, homeDir, create=True): + envsetflags = db.DB_CDB_ALLDB + envflags = db.DB_INIT_MPOOL | db.DB_INIT_CDB | db.DB_THREAD + if not exists(homeDir): + if create==True: + mkdir(homeDir) # TODO: implement create method and refactor this to it + self.create(homeDir) + else: + return NO_STORE + db_env = db.DBEnv() + db_env.set_cachesize(0, 1024*1024*50) # TODO + #db_env.set_lg_max(1024*1024) + db_env.set_flags(envsetflags, 1) + db_env.open(homeDir, envflags | db.DB_CREATE) + return db_env + + def is_open(self): + return self.__open + + def open(self, path, create=True): + if not has_bsddb: return NO_STORE + homeDir = path + + if self.__identifier is None: + self.__identifier = URIRef(pathname2url(abspath(homeDir))) + + db_env = self._init_db_environment(homeDir, create) + if db_env == NO_STORE: + return NO_STORE + self.db_env = db_env + self.__open = True + + dbname = None + dbtype = db.DB_BTREE + # auto-commit ensures that the open-call commits when transactions are enabled + dbopenflags = db.DB_THREAD + if self.transaction_aware == True: + dbopenflags |= db.DB_AUTO_COMMIT + + dbmode = 0660 + dbsetflags = 0 + + # create and open the DBs + self.__indicies = [None,] * 3 + self.__indicies_info = [None,] * 3 + for i in xrange(0, 3): + index_name = to_key_func(i)((b("s"), b("p"), b("o")), b("c")).decode() + index = db.DB(db_env) + index.set_flags(dbsetflags) + index.open(index_name, dbname, dbtype, dbopenflags|db.DB_CREATE, dbmode) + self.__indicies[i] = index + self.__indicies_info[i] = (index, to_key_func(i), from_key_func(i)) + + lookup = {} + for i in xrange(0, 8): + results = [] + for start in xrange(0, 3): + score = 1 + len = 0 + for j in xrange(start, start+3): + if i & (1<<(j%3)): + score = score << 1 + len += 1 + else: + break + tie_break = 2-start + results.append(((score, tie_break), start, len)) + + results.sort() + score, start, len = results[-1] + + def get_prefix_func(start, end): + def get_prefix(triple, context): + if context is None: + yield "" + else: + yield context + i = start + while i<end: + yield triple[i%3] + i += 1 + yield "" + return get_prefix + + lookup[i] = (self.__indicies[start], get_prefix_func(start, start + len), from_key_func(start), results_from_key_func(start, self._from_string)) + + + self.__lookup_dict = lookup + + self.__contexts = db.DB(db_env) + self.__contexts.set_flags(dbsetflags) + self.__contexts.open("contexts", dbname, dbtype, dbopenflags|db.DB_CREATE, dbmode) + + self.__namespace = db.DB(db_env) + self.__namespace.set_flags(dbsetflags) + self.__namespace.open("namespace", dbname, dbtype, dbopenflags|db.DB_CREATE, dbmode) + + self.__prefix = db.DB(db_env) + self.__prefix.set_flags(dbsetflags) + self.__prefix.open("prefix", dbname, dbtype, dbopenflags|db.DB_CREATE, dbmode) + + self.__k2i = db.DB(db_env) + self.__k2i.set_flags(dbsetflags) + self.__k2i.open("k2i", dbname, db.DB_HASH, dbopenflags|db.DB_CREATE, dbmode) + + self.__i2k = db.DB(db_env) + self.__i2k.set_flags(dbsetflags) + self.__i2k.open("i2k", dbname, db.DB_RECNO, dbopenflags|db.DB_CREATE, dbmode) + + self.__needs_sync = False + t = Thread(target=self.__sync_run) + t.setDaemon(True) + t.start() + self.__sync_thread = t + return VALID_STORE + + + def __sync_run(self): + from time import sleep, time + try: + min_seconds, max_seconds = 10, 300 + while self.__open: + if self.__needs_sync: + t0 = t1 = time() + self.__needs_sync = False + while self.__open: + sleep(.1) + if self.__needs_sync: + t1 = time() + self.__needs_sync = False + if time()-t1 > min_seconds or time()-t0 > max_seconds: + self.__needs_sync = False + _logger.debug("sync") + self.sync() + break + else: + sleep(1) + except Exception, e: + _logger.exception(e) + + def sync(self): + if self.__open: + for i in self.__indicies: + i.sync() + self.__contexts.sync() + self.__namespace.sync() + self.__prefix.sync() + self.__i2k.sync() + self.__k2i.sync() + + def close(self, commit_pending_transaction=False): + self.__open = False + self.__sync_thread.join() + for i in self.__indicies: + i.close() + self.__contexts.close() + self.__namespace.close() + self.__prefix.close() + self.__i2k.close() + self.__k2i.close() + self.db_env.close() + + def add(self, (subject, predicate, object), context, quoted=False, txn=None): + """\ + Add a triple to the store of triples. + """ + assert self.__open, "The Store must be open." + assert context!=self, "Can not add triple directly to store" + Store.add(self, (subject, predicate, object), context, quoted) + + _to_string = self._to_string + + s = _to_string(subject, txn=txn) + p = _to_string(predicate, txn=txn) + o = _to_string(object, txn=txn) + c = _to_string(context, txn=txn) + + cspo, cpos, cosp = self.__indicies + + value = cspo.get(bb("%s^%s^%s^%s^" % (c, s, p, o)), txn=txn) + if value is None: + self.__contexts.put(bb(c), "", txn=txn) + + contexts_value = cspo.get(bb("%s^%s^%s^%s^" % ("", s, p, o)), txn=txn) or b("") + contexts = set(contexts_value.split(b("^"))) + contexts.add(bb(c)) + contexts_value = b("^").join(contexts) + assert contexts_value!=None + + cspo.put(bb("%s^%s^%s^%s^" % (c, s, p, o)), "", txn=txn) + cpos.put(bb("%s^%s^%s^%s^" % (c, p, o, s)), "", txn=txn) + cosp.put(bb("%s^%s^%s^%s^" % (c, o, s, p)), "", txn=txn) + if not quoted: + cspo.put(bb("%s^%s^%s^%s^" % ("", s, p, o)), contexts_value, txn=txn) + cpos.put(bb("%s^%s^%s^%s^" % ("", p, o, s)), contexts_value, txn=txn) + cosp.put(bb("%s^%s^%s^%s^" % ("", o, s, p)), contexts_value, txn=txn) + + self.__needs_sync = True + + def __remove(self, (s, p, o), c, quoted=False, txn=None): + cspo, cpos, cosp = self.__indicies + contexts_value = cspo.get(b("^").join([b(""), s, p, o, b("")]), txn=txn) or b("") + contexts = set(contexts_value.split(b("^"))) + contexts.discard(c) + contexts_value = b("^").join(contexts) + for i, _to_key, _from_key in self.__indicies_info: + i.delete(_to_key((s, p, o), c), txn=txn) + if not quoted: + if contexts_value: + for i, _to_key, _from_key in self.__indicies_info: + i.put(_to_key((s, p, o), b("")), contexts_value, txn=txn) + else: + for i, _to_key, _from_key in self.__indicies_info: + try: + i.delete(_to_key((s, p, o), b("")), txn=txn) + except db.DBNotFoundError, e: + pass # TODO: is it okay to ignore these? + + def remove(self, (subject, predicate, object), context, txn=None): + assert self.__open, "The Store must be open." + Store.remove(self, (subject, predicate, object), context) + _to_string = self._to_string + + if context is not None: + if context == self: + context = None + + if subject is not None and predicate is not None and object is not None and context is not None: + s = _to_string(subject, txn=txn) + p = _to_string(predicate, txn=txn) + o = _to_string(object, txn=txn) + c = _to_string(context, txn=txn) + value = self.__indicies[0].get(bb("%s^%s^%s^%s^" % (c, s, p, o)), txn=txn) + if value is not None: + self.__remove((bb(s), bb(p), bb(o)), bb(c), txn=txn) + self.__needs_sync = True + else: + cspo, cpos, cosp = self.__indicies + index, prefix, from_key, results_from_key = self.__lookup((subject, predicate, object), context, txn=txn) + + cursor = index.cursor(txn=txn) + try: + current = cursor.set_range(prefix) + needs_sync = True + except db.DBNotFoundError: + current = None + needs_sync = False + cursor.close() + while current: + key, value = current + cursor = index.cursor(txn=txn) + try: + cursor.set_range(key) + # Hack to stop 2to3 converting this to next(cursor) + current = getattr(cursor, 'next')() + except db.DBNotFoundError: + current = None + cursor.close() + if key.startswith(prefix): + c, s, p, o = from_key(key) + if context is None: + contexts_value = index.get(key, txn=txn) or b("") + contexts = set(contexts_value.split(b("^"))) # remove triple from all non quoted contexts + contexts.add(b("")) # and from the conjunctive index + for c in contexts: + for i, _to_key, _ in self.__indicies_info: + i.delete(_to_key((s, p, o), c), txn=txn) + else: + self.__remove((s, p, o), c, txn=txn) + else: + break + + if context is not None: + if subject is None and predicate is None and object is None: + # TODO: also if context becomes empty and not just on remove((None, None, None), c) + try: + self.__contexts.delete(bb(_to_string(context, txn=txn)), txn=txn) + except db.DBNotFoundError, e: + pass + + self.__needs_sync = needs_sync + + def triples(self, (subject, predicate, object), context=None, txn=None): + """A generator over all the triples matching """ + assert self.__open, "The Store must be open." + + if context is not None: + if context == self: + context = None + + _from_string = self._from_string + index, prefix, from_key, results_from_key = self.__lookup((subject, predicate, object), context, txn=txn) + + cursor = index.cursor(txn=txn) + try: + current = cursor.set_range(prefix) + except db.DBNotFoundError: + current = None + cursor.close() + while current: + key, value = current + cursor = index.cursor(txn=txn) + try: + cursor.set_range(key) + # Cheap hack so 2to3 doesn't convert to next(cursor) + current = getattr(cursor, 'next')() + except db.DBNotFoundError: + current = None + cursor.close() + if key and key.startswith(prefix): + contexts_value = index.get(key, txn=txn) + yield results_from_key(key, subject, predicate, object, contexts_value) + else: + break + + def __len__(self, context=None): + assert self.__open, "The Store must be open." + if context is not None: + if context == self: + context = None + + if context is None: + prefix = b("^") + else: + prefix = bb("%s^" % self._to_string(context)) + + index = self.__indicies[0] + cursor = index.cursor() + current = cursor.set_range(prefix) + count = 0 + while current: + key, value = current + if key.startswith(prefix): + count +=1 + # Hack to stop 2to3 converting this to next(cursor) + current = getattr(cursor, 'next')() + else: + break + cursor.close() + return count + + def bind(self, prefix, namespace): + prefix = prefix.encode("utf-8") + namespace = namespace.encode("utf-8") + bound_prefix = self.__prefix.get(namespace) + if bound_prefix: + self.__namespace.delete(bound_prefix) + self.__prefix[namespace] = prefix + self.__namespace[prefix] = namespace + + def namespace(self, prefix): + prefix = prefix.encode("utf-8") + ns = self.__namespace.get(prefix, None) + if ns is not None: + return ns.decode('utf-8') + return None + + def prefix(self, namespace): + namespace = namespace.encode("utf-8") + prefix = self.__prefix.get(namespace, None) + if prefix is not None: + return prefix.decode('utf-8') + return None + + def namespaces(self): + cursor = self.__namespace.cursor() + results = [] + current = cursor.first() + while current: + prefix, namespace = current + results.append((prefix.decode('utf-8'), namespace.decode('utf-8'))) + # Hack to stop 2to3 converting this to next(cursor) + current = getattr(cursor, 'next')() + cursor.close() + for prefix, namespace in results: + yield prefix, URIRef(namespace) + + def contexts(self, triple=None): + _from_string = self._from_string + _to_string = self._to_string + + if triple: + s, p, o = triple + s = _to_string(s) + p = _to_string(p) + o = _to_string(o) + contexts = self.__indicies[0].get(bb("%s^%s^%s^%s^" % ("", s, p, o))) + if contexts: + for c in contexts.split(b("^")): + if c: + yield _from_string(c) + else: + index = self.__contexts + cursor = index.cursor() + current = cursor.first() + cursor.close() + while current: + key, value = current + context = _from_string(key) + yield context + cursor = index.cursor() + try: + cursor.set_range(key) + # Hack to stop 2to3 converting this to next(cursor) + current = getattr(cursor, 'next')() + except db.DBNotFoundError: + current = None + cursor.close() + + def _from_string(self, i): + k = self.__i2k.get(int(i)) + return self._loads(k) + + def _to_string(self, term, txn=None): + k = self._dumps(term) + i = self.__k2i.get(k, txn=txn) + if i is None: + # weird behavoir from bsddb not taking a txn as a keyword argument + # for append + if self.transaction_aware: + i = "%s" % self.__i2k.append(k, txn) + else: + i = "%s" % self.__i2k.append(k) + + self.__k2i.put(k, i, txn=txn) + else: + i = i.decode() + return i + + def __lookup(self, (subject, predicate, object), context, txn=None): + _to_string = self._to_string + if context is not None: + context = _to_string(context, txn=txn) + i = 0 + if subject is not None: + i += 1 + subject = _to_string(subject, txn=txn) + if predicate is not None: + i += 2 + predicate = _to_string(predicate, txn=txn) + if object is not None: + i += 4 + object = _to_string(object, txn=txn) + index, prefix_func, from_key, results_from_key = self.__lookup_dict[i] + #print (subject, predicate, object), context, prefix_func, index #DEBUG + prefix = bb("^".join(prefix_func((subject, predicate, object), context))) + return index, prefix, from_key, results_from_key + + +def to_key_func(i): + def to_key(triple, context): + "Takes a string; returns key" + return b("^").join((context, triple[i%3], triple[(i+1)%3], triple[(i+2)%3], b(""))) # "" to tac on the trailing ^ + return to_key + +def from_key_func(i): + def from_key(key): + "Takes a key; returns string" + parts = key.split(b("^")) + return parts[0], parts[(3-i+0)%3+1], parts[(3-i+1)%3+1], parts[(3-i+2)%3+1] + return from_key + +def results_from_key_func(i, from_string): + def from_key(key, subject, predicate, object, contexts_value): + "Takes a key and subject, predicate, object; returns tuple for yield" + parts = key.split(b("^")) + if subject is None: + # TODO: i & 1: # dis assemble and/or measure to see which is faster + # subject is None or i & 1 + s = from_string(parts[(3-i+0)%3+1]) + else: + s = subject + if predicate is None:#i & 2: + p = from_string(parts[(3-i+1)%3+1]) + else: + p = predicate + if object is None:#i & 4: + o = from_string(parts[(3-i+2)%3+1]) + else: + o = object + return (s, p, o), (from_string(c) for c in contexts_value.split(b("^")) if c) + return from_key + +def readable_index(i): + s, p, o = "?" * 3 + if i & 1: s = "s" + if i & 2: p = "p" + if i & 4: o = "o" + return "%s,%s,%s" % (s, p, o) |