diff options
Diffstat (limited to 'creactistore/_templates/lib/rdflib/plugins')
26 files changed, 0 insertions, 6933 deletions
diff --git a/creactistore/_templates/lib/rdflib/plugins/__init__.py b/creactistore/_templates/lib/rdflib/plugins/__init__.py deleted file mode 100644 index 4622bb0..0000000 --- a/creactistore/_templates/lib/rdflib/plugins/__init__.py +++ /dev/null @@ -1,7 +0,0 @@ -""" -Default plugins for rdflib. - -This is a namespace package and contains the default plugins for -rdflib. - -""" diff --git a/creactistore/_templates/lib/rdflib/plugins/memory.py b/creactistore/_templates/lib/rdflib/plugins/memory.py deleted file mode 100644 index 3a9d9f8..0000000 --- a/creactistore/_templates/lib/rdflib/plugins/memory.py +++ /dev/null @@ -1,563 +0,0 @@ -from __future__ import generators -from rdflib.term import BNode -from rdflib.store import Store, NO_STORE, VALID_STORE - -__all__ = ['Memory', 'IOMemory'] - -ANY = Any = None - -class Memory(Store): - """\ - An in memory implementation of a triple store. - - This triple store uses nested dictionaries to store triples. Each - triple is stored in two such indices as follows spo[s][p][o] = 1 and - pos[p][o][s] = 1. - - Authors: Michel Pelletier, Daniel Krech, Stefan Niederhauser - """ - def __init__(self, configuration=None, identifier=None): - super(Memory, self).__init__(configuration) - self.identifier = identifier - - # indexed by [subject][predicate][object] - self.__spo = {} - - # indexed by [predicate][object][subject] - self.__pos = {} - - # indexed by [predicate][object][subject] - self.__osp = {} - - self.__namespace = {} - self.__prefix = {} - - def add(self, (subject, predicate, object), context, quoted=False): - """\ - Add a triple to the store of triples. - """ - # add dictionary entries for spo[s][p][p] = 1 and pos[p][o][s] - # = 1, creating the nested dictionaries where they do not yet - # exits. - spo = self.__spo - try: - po = spo[subject] - except: - po = spo[subject] = {} - try: - o = po[predicate] - except: - o = po[predicate] = {} - o[object] = 1 - - pos = self.__pos - try: - os = pos[predicate] - except: - os = pos[predicate] = {} - try: - s = os[object] - except: - s = os[object] = {} - s[subject] = 1 - - osp = self.__osp - try: - sp = osp[object] - except: - sp = osp[object] = {} - try: - p = sp[subject] - except: - p = sp[subject] = {} - p[predicate] = 1 - - def remove(self, (subject, predicate, object), context=None): - for (subject, predicate, object), c in self.triples( - (subject, predicate, object)): - del self.__spo[subject][predicate][object] - del self.__pos[predicate][object][subject] - del self.__osp[object][subject][predicate] - - def triples(self, (subject, predicate, object), context=None): - """A generator over all the triples matching """ - if subject!=ANY: # subject is given - spo = self.__spo - if subject in spo: - subjectDictionary = spo[subject] - if predicate!=ANY: # subject+predicate is given - if predicate in subjectDictionary: - if object!=ANY: # subject+predicate+object is given - if object in subjectDictionary[predicate]: - yield (subject, predicate, object), \ - self.__contexts() - else: # given object not found - pass - else: # subject+predicate is given, object unbound - for o in subjectDictionary[predicate].keys(): - yield (subject, predicate, o), \ - self.__contexts() - else: # given predicate not found - pass - else: # subject given, predicate unbound - for p in subjectDictionary.keys(): - if object!=ANY: # object is given - if object in subjectDictionary[p]: - yield (subject, p, object), self.__contexts() - else: # given object not found - pass - else: # object unbound - for o in subjectDictionary[p].keys(): - yield (subject, p, o), self.__contexts() - else: # given subject not found - pass - elif predicate!=ANY: # predicate is given, subject unbound - pos = self.__pos - if predicate in pos: - predicateDictionary = pos[predicate] - if object!=ANY: # predicate+object is given, subject unbound - if object in predicateDictionary: - for s in predicateDictionary[object].keys(): - yield (s, predicate, object), self.__contexts() - else: # given object not found - pass - else: # predicate is given, object+subject unbound - for o in predicateDictionary.keys(): - for s in predicateDictionary[o].keys(): - yield (s, predicate, o), self.__contexts() - elif object!=ANY: # object is given, subject+predicate unbound - osp = self.__osp - if object in osp: - objectDictionary = osp[object] - for s in objectDictionary.keys(): - for p in objectDictionary[s].keys(): - yield (s, p, object), self.__contexts() - else: # subject+predicate+object unbound - spo = self.__spo - for s in spo.keys(): - subjectDictionary = spo[s] - for p in subjectDictionary.keys(): - for o in subjectDictionary[p].keys(): - yield (s, p, o), self.__contexts() - - def __len__(self, context=None): - #@@ optimize - i = 0 - for triple in self.triples((None, None, None)): - i += 1 - return i - - def bind(self, prefix, namespace): - self.__prefix[namespace] = prefix - self.__namespace[prefix] = namespace - - def namespace(self, prefix): - return self.__namespace.get(prefix, None) - - def prefix(self, namespace): - return self.__prefix.get(namespace, None) - - def namespaces(self): - for prefix, namespace in self.__namespace.iteritems(): - yield prefix, namespace - - def __contexts(self): - return (c for c in []) # TODO: best way to return empty generator - -class IOMemory(Store): - """\ - An integer-key-optimized-context-aware-in-memory store. - - Uses nested dictionaries to store triples and context. Each triple - is stored in six such indices as follows cspo[c][s][p][o] = 1 - and cpos[c][p][o][s] = 1 and cosp[c][o][s][p] = 1 as well as - spo[s][p][o] = [c] and pos[p][o][s] = [c] and pos[o][s][p] = [c] - - Context information is used to track the 'source' of the triple - data for merging, unmerging, remerging purposes. context aware - store stores consume more memory size than non context stores. - - """ - - context_aware = True - formula_aware = True - - def __init__(self, configuration=None, identifier=None): - super(IOMemory, self).__init__() - - # indexed by [context][subject][predicate][object] = 1 - self.cspo = self.createIndex() - - # indexed by [context][predicate][object][subject] = 1 - self.cpos = self.createIndex() - - # indexed by [context][object][subject][predicate] = 1 - self.cosp = self.createIndex() - - # indexed by [subject][predicate][object] = [context] - self.spo = self.createIndex() - - # indexed by [predicate][object][subject] = [context] - self.pos = self.createIndex() - - # indexed by [object][subject][predicate] = [context] - self.osp = self.createIndex() - - # indexes integer keys to identifiers - self.forward = self.createForward() - - # reverse index of forward - self.reverse = self.createReverse() - - self.identifier = identifier or BNode() - - self.__namespace = self.createPrefixMap() - self.__prefix = self.createPrefixMap() - - def open(self, configuration, create=False): - if not create: - # An IOMemory Store never exists. - return NO_STORE - else: - return VALID_STORE - - def bind(self, prefix, namespace): - self.__prefix[namespace] = prefix - self.__namespace[prefix] = namespace - - def namespace(self, prefix): - return self.__namespace.get(prefix, None) - - def prefix(self, namespace): - return self.__prefix.get(namespace, None) - - def namespaces(self): - for prefix, namespace in self.__namespace.iteritems(): - yield prefix, namespace - - def defaultContext(self): - return self.default_context - - def addContext(self, context): - """ Add context w/o adding statement. Dan you can remove this if you want """ - - if not self.reverse.has_key(context): - ci=randid() - while not self.forward.insert(ci, context): - ci=randid() - self.reverse[context] = ci - - def intToIdentifier(self, (si, pi, oi)): - """ Resolve an integer triple into identifers. """ - return (self.forward[si], self.forward[pi], self.forward[oi]) - - def identifierToInt(self, (s, p, o)): - """ Resolve an identifier triple into integers. """ - return (self.reverse[s], self.reverse[p], self.reverse[o]) - - def uniqueSubjects(self, context=None): - if context is None: - index = self.spo - else: - index = self.cspo[context] - for si in index.keys(): - yield self.forward[si] - - def uniquePredicates(self, context=None): - if context is None: - index = self.pos - else: - index = self.cpos[context] - for pi in index.keys(): - yield self.forward[pi] - - def uniqueObjects(self, context=None): - if context is None: - index = self.osp - else: - index = self.cosp[context] - for oi in index.keys(): - yield self.forward[oi] - - def createForward(self): - return {} - - def createReverse(self): - return {} - - def createIndex(self): - return {} - - def createPrefixMap(self): - return {} - - def add(self, triple, context, quoted=False): - """\ - Add a triple to the store. - """ - Store.add(self, triple, context, quoted) - for triple, cg in self.triples(triple, context): - #triple is already in the store. - return - - subject, predicate, object = triple - - f = self.forward - r = self.reverse - - # assign keys for new identifiers - - if not r.has_key(subject): - si=randid() - while f.has_key(si): - si=randid() - f[si] = subject - r[subject] = si - else: - si = r[subject] - - if not r.has_key(predicate): - pi=randid() - while f.has_key(pi): - pi=randid() - f[pi] = predicate - r[predicate] = pi - else: - pi = r[predicate] - - if not r.has_key(object): - oi=randid() - while f.has_key(oi): - oi=randid() - f[oi] = object - r[object] = oi - else: - oi = r[object] - - if not r.has_key(context): - ci=randid() - while f.has_key(ci): - ci=randid() - f[ci] = context - r[context] = ci - else: - ci = r[context] - - # add dictionary entries for cspo[c][s][p][o] = 1, - # cpos[c][p][o][s] = 1, and cosp[c][o][s][p] = 1, creating the - # nested {} where they do not yet exits. - self._setNestedIndex(self.cspo, ci, si, pi, oi) - self._setNestedIndex(self.cpos, ci, pi, oi, si) - self._setNestedIndex(self.cosp, ci, oi, si, pi) - - if not quoted: - self._setNestedIndex(self.spo, si, pi, oi, ci) - self._setNestedIndex(self.pos, pi, oi, si, ci) - self._setNestedIndex(self.osp, oi, si, pi, ci) - - def _setNestedIndex(self, index, *keys): - for key in keys[:-1]: - if not index.has_key(key): - index[key] = self.createIndex() - index = index[key] - index[keys[-1]] = 1 - - - def _removeNestedIndex(self, index, *keys): - """ Remove context from the list of contexts in a nested index. - - Afterwards, recursively remove nested indexes when they became empty. - """ - parents = [] - for key in keys[:-1]: - parents.append(index) - index = index[key] - del index[keys[-1]] - - n = len(parents) - for i in xrange(n): - index = parents[n-1-i] - key = keys[n-1-i] - if len(index[key]) == 0: - del index[key] - - def remove(self, triple, context=None): - Store.remove(self, triple, context) - if context is not None: - if context == self: - context = None - - f = self.forward - r = self.reverse - if context is None: - for triple, cg in self.triples(triple): - subject, predicate, object = triple - si, pi, oi = self.identifierToInt((subject, predicate, object)) - contexts = list(self.contexts(triple)) - for context in contexts: - ci = r[context] - del self.cspo[ci][si][pi][oi] - del self.cpos[ci][pi][oi][si] - del self.cosp[ci][oi][si][pi] - - self._removeNestedIndex(self.spo, si, pi, oi, ci) - self._removeNestedIndex(self.pos, pi, oi, si, ci) - self._removeNestedIndex(self.osp, oi, si, pi, ci) - # grr!! hafta ref-count these before you can collect them dumbass! - #del f[si], f[pi], f[oi] - #del r[subject], r[predicate], r[object] - else: - subject, predicate, object = triple - ci = r.get(context, None) - if ci: - for triple, cg in self.triples(triple, context): - si, pi, oi = self.identifierToInt(triple) - del self.cspo[ci][si][pi][oi] - del self.cpos[ci][pi][oi][si] - del self.cosp[ci][oi][si][pi] - - try: - self._removeNestedIndex(self.spo, si, pi, oi, ci) - self._removeNestedIndex(self.pos, pi, oi, si, ci) - self._removeNestedIndex(self.osp, oi, si, pi, ci) - except KeyError: - # the context may be a quoted one in which - # there will not be a triple in spo, pos or - # osp. So ignore any KeyErrors - pass - # TODO delete references to resources in self.forward/self.reverse - # that are not in use anymore... - - if subject is None and predicate is None and object is None: - # remove context - try: - ci = self.reverse[context] - del self.cspo[ci], self.cpos[ci], self.cosp[ci] - except KeyError: - # TODO: no exception when removing non-existant context? - pass - - - def triples(self, triple, context=None): - """A generator over all the triples matching """ - - if context is not None: - if context == self: - context = None - - subject, predicate, object = triple - ci = si = pi = oi = Any - - if context is None: - spo = self.spo - pos = self.pos - osp = self.osp - else: - try: - ci = self.reverse[context] # TODO: Really ignore keyerror here - spo = self.cspo[ci] - pos = self.cpos[ci] - osp = self.cosp[ci] - except KeyError: - return - try: - if subject is not Any: - si = self.reverse[subject] # throws keyerror if subject doesn't exist ;( - if predicate is not Any: - pi = self.reverse[predicate] - if object is not Any: - oi = self.reverse[object] - except KeyError, e: - return #raise StopIteration - - if si != Any: # subject is given - if spo.has_key(si): - subjectDictionary = spo[si] - if pi != Any: # subject+predicate is given - if subjectDictionary.has_key(pi): - if oi!= Any: # subject+predicate+object is given - if subjectDictionary[pi].has_key(oi): - ss, pp, oo = self.intToIdentifier((si, pi, oi)) - yield (ss, pp, oo), (c for c in self.contexts((ss, pp, oo))) - else: # given object not found - pass - else: # subject+predicate is given, object unbound - for o in subjectDictionary[pi].keys(): - ss, pp, oo = self.intToIdentifier((si, pi, o)) - yield (ss, pp, oo), (c for c in self.contexts((ss, pp, oo))) - else: # given predicate not found - pass - else: # subject given, predicate unbound - for p in subjectDictionary.keys(): - if oi != Any: # object is given - if subjectDictionary[p].has_key(oi): - ss, pp, oo = self.intToIdentifier((si, p, oi)) - yield (ss, pp, oo), (c for c in self.contexts((ss, pp, oo))) - else: # given object not found - pass - else: # object unbound - for o in subjectDictionary[p].keys(): - ss, pp, oo = self.intToIdentifier((si, p, o)) - yield (ss, pp, oo), (c for c in self.contexts((ss, pp, oo))) - else: # given subject not found - pass - elif pi != Any: # predicate is given, subject unbound - if pos.has_key(pi): - predicateDictionary = pos[pi] - if oi != Any: # predicate+object is given, subject unbound - if predicateDictionary.has_key(oi): - for s in predicateDictionary[oi].keys(): - ss, pp, oo = self.intToIdentifier((s, pi, oi)) - yield (ss, pp, oo), (c for c in self.contexts((ss, pp, oo))) - else: # given object not found - pass - else: # predicate is given, object+subject unbound - for o in predicateDictionary.keys(): - for s in predicateDictionary[o].keys(): - ss, pp, oo = self.intToIdentifier((s, pi, o)) - yield (ss, pp, oo), (c for c in self.contexts((ss, pp, oo))) - elif oi != Any: # object is given, subject+predicate unbound - if osp.has_key(oi): - objectDictionary = osp[oi] - for s in objectDictionary.keys(): - for p in objectDictionary[s].keys(): - ss, pp, oo = self.intToIdentifier((s, p, oi)) - yield (ss, pp, oo), (c for c in self.contexts((ss, pp, oo))) - else: # subject+predicate+object unbound - for s in spo.keys(): - subjectDictionary = spo[s] - for p in subjectDictionary.keys(): - for o in subjectDictionary[p].keys(): - ss, pp, oo = self.intToIdentifier((s, p, o)) - yield (ss, pp, oo), (c for c in self.contexts((ss, pp, oo))) - - def __len__(self, context=None): - - if context is not None: - if context == self: - context = None - - # TODO: for eff. implementation - count = 0 - for triple, cg in self.triples((Any, Any, Any), context): - count += 1 - return count - - def contexts(self, triple=None): - if triple: - si, pi, oi = self.identifierToInt(triple) - for ci in self.spo[si][pi][oi]: - yield self.forward[ci] - else: - for ci in self.cspo.keys(): - yield self.forward[ci] - - - - -import random - -def randid(randint=random.randint, choice=random.choice, signs=(-1,1)): - return choice(signs)*randint(1,2000000000) - -del random diff --git a/creactistore/_templates/lib/rdflib/plugins/parsers/__init__.py b/creactistore/_templates/lib/rdflib/plugins/parsers/__init__.py deleted file mode 100644 index 8062daa..0000000 --- a/creactistore/_templates/lib/rdflib/plugins/parsers/__init__.py +++ /dev/null @@ -1,3 +0,0 @@ -""" - -""" diff --git a/creactistore/_templates/lib/rdflib/plugins/parsers/notation3.py b/creactistore/_templates/lib/rdflib/plugins/parsers/notation3.py deleted file mode 100644 index ac48340..0000000 --- a/creactistore/_templates/lib/rdflib/plugins/parsers/notation3.py +++ /dev/null @@ -1,2314 +0,0 @@ -#!/usr/bin/env python -u""" -notation3.py - Standalone Notation3 Parser -Derived from CWM, the Closed World Machine - -Authors of the original suite: - -* Dan Connolly <@@> -* Tim Berners-Lee <@@> -* Yosi Scharf <@@> -* Joseph M. Reagle Jr. <reagle@w3.org> -* Rich Salz <rsalz@zolera.com> - -http://www.w3.org/2000/10/swap/notation3.py - -Copyright 2000-2007, World Wide Web Consortium. -Copyright 2001, MIT. -Copyright 2001, Zolera Systems Inc. - -License: W3C Software License -http://www.w3.org/Consortium/Legal/copyright-software - -Modified by Sean B. Palmer -Copyright 2007, Sean B. Palmer. \u32E1 - -Modified to work with rdflib by Gunnar Aastrand Grimnes -Copyright 2010, Gunnar A. Grimnes - -""" - -# Python standard libraries -import types -import sys -import os -import string -import re -import time -import StringIO -import codecs - -from binascii import a2b_hex -from decimal import Decimal - -from rdflib.term import URIRef, BNode, Literal, Variable, _XSD_PFX, _unique_id -from rdflib.graph import QuotedGraph, ConjunctiveGraph -from rdflib import py3compat -b = py3compat.b - -__all__ = ['URISyntaxError', 'BadSyntax', 'N3Parser', "verbosity", "setVerbosity", "progress", "splitFrag", "splitFragP", "join", "refTo", "base", "canonical", "runNamespace", "uniqueURI", "Canonicalize", "stripCR", "dummyWrite", "toBool", "stringToN3", "backslashUify", "hexify", "dummy"] - -from rdflib.parser import Parser - -# Incestuous.. would be nice to separate N3 and XML -# from sax2rdf import XMLtoDOM -def XMLtoDOM(*args, **kargs): - # print >> sys.stderr, args, kargs - pass - -# SWAP http://www.w3.org/2000/10/swap -# from diag import verbosity, setVerbosity, progress -def verbosity(*args, **kargs): - # print >> sys.stderr, args, kargs - pass -def setVerbosity(*args, **kargs): - # print >> sys.stderr, args, kargs - pass -def progress(*args, **kargs): - # print >> sys.stderr, args, kargs - pass - - - -def splitFrag(uriref): - """split a URI reference between the fragment and the rest. - - Punctuation is thrown away. - - e.g. - - >>> splitFrag("abc#def") - ('abc', 'def') - - >>> splitFrag("abcdef") - ('abcdef', None) - - """ - - i = uriref.rfind("#") - if i>= 0: return uriref[:i], uriref[i+1:] - else: return uriref, None - -def splitFragP(uriref, punct=0): - """split a URI reference before the fragment - - Punctuation is kept. - - e.g. - - >>> splitFragP("abc#def") - ('abc', '#def') - - >>> splitFragP("abcdef") - ('abcdef', '') - - """ - - i = uriref.rfind("#") - if i>= 0: return uriref[:i], uriref[i:] - else: return uriref, '' - -@py3compat.format_doctest_out -def join(here, there): - """join an absolute URI and URI reference - (non-ascii characters are supported/doctested; - haven't checked the details of the IRI spec though) - - here is assumed to be absolute. - there is URI reference. - - >>> join('http://example/x/y/z', '../abc') - 'http://example/x/abc' - - Raise ValueError if there uses relative path - syntax but here has no hierarchical path. - - >>> join('mid:foo@example', '../foo') - Traceback (most recent call last): - raise ValueError, here - ValueError: Base <mid:foo@example> has no slash after colon - with relative '../foo'. - - >>> join('http://example/x/y/z', '') - 'http://example/x/y/z' - - >>> join('mid:foo@example', '#foo') - 'mid:foo@example#foo' - - We grok IRIs - - >>> len(u'Andr\\xe9') - 5 - - >>> join('http://example.org/', u'#Andr\\xe9') - %(u)s'http://example.org/#Andr\\xe9' - """ - - assert(here.find("#") < 0), "Base may not contain hash: '%s'"% here # caller must splitFrag (why?) - - slashl = there.find('/') - colonl = there.find(':') - - # join(base, 'foo:/') -- absolute - if colonl >= 0 and (slashl < 0 or colonl < slashl): - return there - - bcolonl = here.find(':') - assert(bcolonl >= 0), "Base uri '%s' is not absolute" % here # else it's not absolute - - path, frag = splitFragP(there) - if not path: return here + frag - - # join('mid:foo@example', '../foo') bzzt - if here[bcolonl+1:bcolonl+2] <> '/': - raise ValueError ("Base <%s> has no slash after colon - with relative '%s'." %(here, there)) - - if here[bcolonl+1:bcolonl+3] == '//': - bpath = here.find('/', bcolonl+3) - else: - bpath = bcolonl+1 - - # join('http://xyz', 'foo') - if bpath < 0: - bpath = len(here) - here = here + '/' - - # join('http://xyz/', '//abc') => 'http://abc' - if there[:2] == '//': - return here[:bcolonl+1] + there - - # join('http://xyz/', '/abc') => 'http://xyz/abc' - if there[:1] == '/': - return here[:bpath] + there - - slashr = here.rfind('/') - - while 1: - if path[:2] == './': - path = path[2:] - if path == '.': - path = '' - elif path[:3] == '../' or path == '..': - path = path[3:] - i = here.rfind('/', bpath, slashr) - if i >= 0: - here = here[:i+1] - slashr = i - else: - break - - return here[:slashr+1] + path + frag - -commonHost = re.compile(r'^[-_a-zA-Z0-9.]+:(//[^/]*)?/[^/]*$') - -def refTo(base, uri): - """figure out a relative URI reference from base to uri - - >>> refTo('http://example/x/y/z', 'http://example/x/abc') - '../abc' - - >>> refTo('file:/ex/x/y', 'file:/ex/x/q/r#s') - 'q/r#s' - - >>> refTo(None, 'http://ex/x/y') - 'http://ex/x/y' - - >>> refTo('http://ex/x/y', 'http://ex/x/y') - '' - - Note the relationship between refTo and join: - join(x, refTo(x, y)) == y - which points out certain strings which cannot be URIs. e.g. - >>> x='http://ex/x/y';y='http://ex/x/q:r';join(x, refTo(x, y)) == y - 0 - - So 'http://ex/x/q:r' is not a URI. Use 'http://ex/x/q%3ar' instead: - >>> x='http://ex/x/y';y='http://ex/x/q%3ar';join(x, refTo(x, y)) == y - 1 - - This one checks that it uses a root-realtive one where that is - all they share. Now uses root-relative where no path is shared. - This is a matter of taste but tends to give more resilience IMHO - -- and shorter paths - - Note that base may be None, meaning no base. In some situations, there - just ain't a base. Slife. In these cases, relTo returns the absolute value. - The axiom abs(,rel(b,x))=x still holds. - This saves people having to set the base to "bogus:". - - >>> refTo('http://ex/x/y/z', 'http://ex/r') - '/r' - - """ - -# assert base # don't mask bugs -danc # not a bug. -tim - if not base: return uri - if base == uri: return "" - - # Find how many path segments in common - i=0 - while i<len(uri) and i<len(base): - if uri[i] == base[i]: i = i + 1 - else: break - # print "# relative", base, uri, " same up to ", i - # i point to end of shortest one or first difference - - m = commonHost.match(base[:i]) - if m: - k=uri.find("//") - if k<0: k=-2 # no host - l=uri.find("/", k+2) - if uri[l+1:l+2] != "/" and base[l+1:l+2] != "/" and uri[:l]==base[:l]: - return uri[l:] - - if uri[i:i+1] =="#" and len(base) == i: return uri[i:] # fragment of base - - while i>0 and uri[i-1] != '/' : i=i-1 # scan for slash - - if i < 3: return uri # No way. - if base.find("//", i-2)>0 \ - or uri.find("//", i-2)>0: return uri # An unshared "//" - if base.find(":", i)>0: return uri # An unshared ":" - n = base.count("/", i) - if n == 0 and i<len(uri) and uri[i] == '#': - return "./" + uri[i:] - elif n == 0 and i == len(uri): - return "./" - else: - return ("../" * n) + uri[i:] - - -def base(): - """The base URI for this process - the Web equiv of cwd - - Relative or abolute unix-standard filenames parsed relative to - this yeild the URI of the file. - If we had a reliable way of getting a computer name, - we should put it in the hostname just to prevent ambiguity - - """ -# return "file://" + hostname + os.getcwd() + "/" - return "file://" + _fixslash(os.getcwd()) + "/" - - -def _fixslash(str): - """ Fix windowslike filename to unixlike - (#ifdef WINDOWS)""" - s = str - for i in range(len(s)): - if s[i] == "\\": s = s[:i] + "/" + s[i+1:] - if s[0] != "/" and s[1] == ":": s = s[2:] # @@@ Hack when drive letter present - return s - -URI_unreserved = b("ABCDEFGHIJJLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-._~") - # unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~" - -@py3compat.format_doctest_out -def canonical(str_in): - """Convert equivalent URIs (or parts) to the same string - - There are many differenet levels of URI canonicalization - which are possible. See http://www.ietf.org/rfc/rfc3986.txt - Done: - - Converfting unicode IRI to utf-8 - - Escaping all non-ASCII - - De-escaping, if escaped, ALPHA (%%41-%%5A and %%61-%%7A), DIGIT (%%30-%%39), - hyphen (%%2D), period (%%2E), underscore (%%5F), or tilde (%%7E) (Sect 2.4) - - Making all escapes uppercase hexadecimal - - Not done: - - Making URI scheme lowercase - - changing /./ or /foo/../ to / with care not to change host part - - - >>> canonical("foo bar") - %(b)s'foo%%20bar' - - >>> canonical(u'http:') - %(b)s'http:' - - >>> canonical('fran%%c3%%83%%c2%%a7ois') - %(b)s'fran%%C3%%83%%C2%%A7ois' - - >>> canonical('a') - %(b)s'a' - - >>> canonical('%%4e') - %(b)s'N' - - >>> canonical('%%9d') - %(b)s'%%9D' - - >>> canonical('%%2f') - %(b)s'%%2F' - - >>> canonical('%%2F') - %(b)s'%%2F' - - """ - if type(str_in) == type(u''): - s8 = str_in.encode('utf-8') - else: - s8 = str_in - s = b('') - i = 0 - while i < len(s8): - if py3compat.PY3: - n = s8[i]; ch = bytes([n]) - else: - ch = s8[i]; n = ord(ch) - if (n > 126) or (n < 33) : # %-encode controls, SP, DEL, and utf-8 - s += b("%%%02X" % ord(ch)) - elif ch == b('%') and i+2 < len(s8): - ch2 = a2b_hex(s8[i+1:i+3]) - if ch2 in URI_unreserved: s += ch2 - else: s += b("%%%02X" % ord(ch2)) - i = i+3 - continue - else: - s += ch - i = i +1 - return s - - - - - - -CONTEXT = 0 -PRED = 1 -SUBJ = 2 -OBJ = 3 - -PARTS = PRED, SUBJ, OBJ -ALL4 = CONTEXT, PRED, SUBJ, OBJ - -SYMBOL = 0 -FORMULA = 1 -LITERAL = 2 -LITERAL_DT = 21 -LITERAL_LANG = 22 -ANONYMOUS = 3 -XMLLITERAL = 25 - -Logic_NS = "http://www.w3.org/2000/10/swap/log#" -NODE_MERGE_URI = Logic_NS + "is" # Pseudo-property indicating node merging -forSomeSym = Logic_NS + "forSome" -forAllSym = Logic_NS + "forAll" - -RDF_type_URI = "http://www.w3.org/1999/02/22-rdf-syntax-ns#type" -RDF_NS_URI = "http://www.w3.org/1999/02/22-rdf-syntax-ns#" -OWL_NS = "http://www.w3.org/2002/07/owl#" -DAML_sameAs_URI = OWL_NS+"sameAs" -parsesTo_URI = Logic_NS + "parsesTo" -RDF_spec = "http://www.w3.org/TR/REC-rdf-syntax/" - -List_NS = RDF_NS_URI # From 20030808 -_Old_Logic_NS = "http://www.w3.org/2000/10/swap/log.n3#" - -N3_first = (SYMBOL, List_NS + "first") -N3_rest = (SYMBOL, List_NS + "rest") -N3_li = (SYMBOL, List_NS + "li") -N3_nil = (SYMBOL, List_NS + "nil") -N3_List = (SYMBOL, List_NS + "List") -N3_Empty = (SYMBOL, List_NS + "Empty") - - - -runNamespaceValue = None - -def runNamespace(): - "Return a URI suitable as a namespace for run-local objects" - # @@@ include hostname (privacy?) (hash it?) - global runNamespaceValue - if runNamespaceValue == None: - runNamespaceValue = join(base(), _unique_id()) + '#' - return runNamespaceValue - -nextu = 0 -def uniqueURI(): - "A unique URI" - global nextu - nextu += 1 - return runNamespace() + "u_" + `nextu` - -class URISyntaxError(ValueError): - """A parameter is passed to a routine that requires a URI reference""" - pass - - -tracking = False -chatty_flag = 50 - - -from xml.dom import Node -try: - from xml.ns import XMLNS -except: - class XMLNS: - BASE = "http://www.w3.org/2000/xmlns/" - XML = "http://www.w3.org/XML/1998/namespace" - - -_attrs = lambda E: (E.attributes and E.attributes.values()) or [] -_children = lambda E: E.childNodes or [] -_IN_XML_NS = lambda n: n.namespaceURI == XMLNS.XML -_inclusive = lambda n: n.unsuppressedPrefixes == None - -# Does a document/PI has lesser/greater document order than the -# first element? -_LesserElement, _Element, _GreaterElement = range(3) - -def _sorter(n1,n2): - '''_sorter(n1,n2) -> int - Sorting predicate for non-NS attributes.''' - - i = cmp(n1.namespaceURI, n2.namespaceURI) - if i: return i - return cmp(n1.localName, n2.localName) - - -def _sorter_ns(n1,n2): - '''_sorter_ns((n,v),(n,v)) -> int - "(an empty namespace URI is lexicographically least)."''' - - if n1[0] == 'xmlns': return -1 - if n2[0] == 'xmlns': return 1 - return cmp(n1[0], n2[0]) - -def _utilized(n, node, other_attrs, unsuppressedPrefixes): - '''_utilized(n, node, other_attrs, unsuppressedPrefixes) -> boolean - Return true if that nodespace is utilized within the node''' - - if n.startswith('xmlns:'): - n = n[6:] - elif n.startswith('xmlns'): - n = n[5:] - if (n=="" and node.prefix in ["#default", None]) or \ - n == node.prefix or n in unsuppressedPrefixes: - return 1 - for attr in other_attrs: - if n == attr.prefix: return 1 - return 0 - -#_in_subset = lambda subset, node: not subset or node in subset -_in_subset = lambda subset, node: subset is None or node in subset # rich's tweak - -class _implementation: - '''Implementation class for C14N. This accompanies a node during it's - processing and includes the parameters and processing state.''' - - # Handler for each node type; populated during module instantiation. - handlers = {} - - def __init__(self, node, write, **kw): - '''Create and run the implementation.''' - self.write = write - self.subset = kw.get('subset') - self.comments = kw.get('comments', 0) - self.unsuppressedPrefixes = kw.get('unsuppressedPrefixes') - nsdict = kw.get('nsdict', { 'xml': XMLNS.XML, 'xmlns': XMLNS.BASE }) - - # Processing state. - self.state = (nsdict, {'xml':''}, {}) #0422 - - if node.nodeType == Node.DOCUMENT_NODE: - self._do_document(node) - elif node.nodeType == Node.ELEMENT_NODE: - self.documentOrder = _Element # At document element - if not _inclusive(self): - self._do_element(node) - else: - inherited = self._inherit_context(node) - self._do_element(node, inherited) - elif node.nodeType == Node.DOCUMENT_TYPE_NODE: - pass - elif node.nodeType == Node.TEXT_NODE: - self._do_text(node) - else: - raise TypeError, str(node) - - - def _inherit_context(self, node): - '''_inherit_context(self, node) -> list - Scan ancestors of attribute and namespace context. Used only - for single element node canonicalization, not for subset - canonicalization.''' - - # Collect the initial list of xml:foo attributes. - xmlattrs = filter(_IN_XML_NS, _attrs(node)) - - # Walk up and get all xml:XXX attributes we inherit. - inherited, parent = [], node.parentNode - while parent and parent.nodeType == Node.ELEMENT_NODE: - for a in filter(_IN_XML_NS, _attrs(parent)): - n = a.localName - if n not in xmlattrs: - xmlattrs.append(n) - inherited.append(a) - parent = parent.parentNode - return inherited - - - def _do_document(self, node): - '''_do_document(self, node) -> None - Process a document node. documentOrder holds whether the document - element has been encountered such that PIs/comments can be written - as specified.''' - - self.documentOrder = _LesserElement - for child in node.childNodes: - if child.nodeType == Node.ELEMENT_NODE: - self.documentOrder = _Element # At document element - self._do_element(child) - self.documentOrder = _GreaterElement # After document element - elif child.nodeType == Node.PROCESSING_INSTRUCTION_NODE: - self._do_pi(child) - elif child.nodeType == Node.COMMENT_NODE: - self._do_comment(child) - elif child.nodeType == Node.DOCUMENT_TYPE_NODE: - pass - else: - raise TypeError, str(child) - handlers[Node.DOCUMENT_NODE] = _do_document - - - def _do_text(self, node): - '''_do_text(self, node) -> None - Process a text or CDATA node. Render various special characters - as their C14N entity representations.''' - if not _in_subset(self.subset, node): return - s = node.data.replace("&", "&") - s = s.replace("<", "<") - s = s.replace(">", ">") - s = s.replace("\015", "
") - if s: self.write(s) - handlers[Node.TEXT_NODE] = _do_text - handlers[Node.CDATA_SECTION_NODE] = _do_text - - - def _do_pi(self, node): - '''_do_pi(self, node) -> None - Process a PI node. Render a leading or trailing #xA if the - document order of the PI is greater or lesser (respectively) - than the document element. - ''' - if not _in_subset(self.subset, node): return - W = self.write - if self.documentOrder == _GreaterElement: W('\n') - W('<?') - W(node.nodeName) - s = node.data - if s: - W(' ') - W(s) - W('?>') - if self.documentOrder == _LesserElement: W('\n') - handlers[Node.PROCESSING_INSTRUCTION_NODE] = _do_pi - - - def _do_comment(self, node): - '''_do_comment(self, node) -> None - Process a comment node. Render a leading or trailing #xA if the - document order of the comment is greater or lesser (respectively) - than the document element. - ''' - if not _in_subset(self.subset, node): return - if self.comments: - W = self.write - if self.documentOrder == _GreaterElement: W('\n') - W('<!--') - W(node.data) - W('-->') - if self.documentOrder == _LesserElement: W('\n') - handlers[Node.COMMENT_NODE] = _do_comment - - - def _do_attr(self, n, value): - ''''_do_attr(self, node) -> None - Process an attribute.''' - - W = self.write - W(' ') - W(n) - W('="') - s = value.replace(value, "&", "&") - s = s.replace("<", "<") - s = s.replace('"', '"') - s = s.replace('\011', '	') - s = s.replace('\012', '
') - s = s.replace('\015', '
') - W(s) - W('"') - - - def _do_element(self, node, initial_other_attrs = []): - '''_do_element(self, node, initial_other_attrs = []) -> None - Process an element (and its children).''' - - # Get state (from the stack) make local copies. - # ns_parent -- NS declarations in parent - # ns_rendered -- NS nodes rendered by ancestors - # ns_local -- NS declarations relevant to this element - # xml_attrs -- Attributes in XML namespace from parent - # xml_attrs_local -- Local attributes in XML namespace. - ns_parent, ns_rendered, xml_attrs = \ - self.state[0], self.state[1].copy(), self.state[2].copy() #0422 - ns_local = ns_parent.copy() - xml_attrs_local = {} - - # progress("_do_element node.nodeName=", node.nodeName) - # progress("_do_element node.namespaceURI", node.namespaceURI) - # progress("_do_element node.tocml()", node.toxml()) - # Divide attributes into NS, XML, and others. - other_attrs = initial_other_attrs[:] - in_subset = _in_subset(self.subset, node) - for a in _attrs(node): - # progress("\t_do_element a.nodeName=", a.nodeName) - if a.namespaceURI == XMLNS.BASE: - n = a.nodeName - if n == "xmlns:": n = "xmlns" # DOM bug workaround - ns_local[n] = a.nodeValue - elif a.namespaceURI == XMLNS.XML: - if _inclusive(self) or in_subset: - xml_attrs_local[a.nodeName] = a #0426 - else: - other_attrs.append(a) - #add local xml:foo attributes to ancestor's xml:foo attributes - xml_attrs.update(xml_attrs_local) - - # Render the node - W, name = self.write, None - if in_subset: - name = node.nodeName - W('<') - W(name) - - # Create list of NS attributes to render. - ns_to_render = [] - for n,v in ns_local.items(): - - # If default namespace is XMLNS.BASE or empty, - # and if an ancestor was the same - if n == "xmlns" and v in [ XMLNS.BASE, '' ] \ - and ns_rendered.get('xmlns') in [ XMLNS.BASE, '', None ]: - continue - - # "omit namespace node with local name xml, which defines - # the xml prefix, if its string value is - # http://www.w3.org/XML/1998/namespace." - if n in ["xmlns:xml", "xml"] \ - and v in [ 'http://www.w3.org/XML/1998/namespace' ]: - continue - - - # If not previously rendered - # and it's inclusive or utilized - if (n,v) not in ns_rendered.items() \ - and (_inclusive(self) or \ - _utilized(n, node, other_attrs, self.unsuppressedPrefixes)): - ns_to_render.append((n, v)) - - # Sort and render the ns, marking what was rendered. - ns_to_render.sort(_sorter_ns) - for n,v in ns_to_render: - self._do_attr(n, v) - ns_rendered[n]=v #0417 - - # If exclusive or the parent is in the subset, add the local xml attributes - # Else, add all local and ancestor xml attributes - # Sort and render the attributes. - if not _inclusive(self) or _in_subset(self.subset,node.parentNode): #0426 - other_attrs.extend(xml_attrs_local.values()) - else: - other_attrs.extend(xml_attrs.values()) - other_attrs.sort(_sorter) - for a in other_attrs: - self._do_attr(a.nodeName, a.value) - W('>') - - # Push state, recurse, pop state. - state, self.state = self.state, (ns_local, ns_rendered, xml_attrs) - for c in _children(node): - _implementation.handlers[c.nodeType](self, c) - self.state = state - - if name: W('</%s>' % name) - handlers[Node.ELEMENT_NODE] = _do_element - - -def Canonicalize(node, output=None, **kw): - '''Canonicalize(node, output=None, **kw) -> UTF-8 - - Canonicalize a DOM document/element node and all descendents. - Return the text; if output is specified then output.write will - be called to output the text and None will be returned - Keyword parameters: - nsdict -- a dictionary of prefix:uri namespace entries - assumed to exist in the surrounding context - comments -- keep comments if non-zero (default is 0) - subset -- Canonical XML subsetting resulting from XPath (default is []) - unsuppressedPrefixes -- do exclusive C14N, and this specifies the - prefixes that should be inherited. - ''' - if output: - apply(_implementation, (node, output.write), kw) - else: - s = StringIO.StringIO() - apply(_implementation, (node, s.write), kw) - return s.getvalue() - -# end of xmlC14n.py - -# from why import BecauseOfData, becauseSubexpression -def BecauseOfData(*args, **kargs): - # print args, kargs - pass -def becauseSubexpression(*args, **kargs): - # print args, kargs - pass - -N3_forSome_URI = forSomeSym -N3_forAll_URI = forAllSym - -# Magic resources we know about - - - -ADDED_HASH = "#" # Stop where we use this in case we want to remove it! -# This is the hash on namespace URIs - -RDF_type = ( SYMBOL , RDF_type_URI ) -DAML_sameAs = ( SYMBOL, DAML_sameAs_URI ) - -LOG_implies_URI = "http://www.w3.org/2000/10/swap/log#implies" - -BOOLEAN_DATATYPE = _XSD_PFX + "boolean" -DECIMAL_DATATYPE = _XSD_PFX + "decimal" -DOUBLE_DATATYPE = _XSD_PFX + "double" -FLOAT_DATATYPE = _XSD_PFX + "float" -INTEGER_DATATYPE = _XSD_PFX + "integer" - -option_noregen = 0 # If set, do not regenerate genids on output - -# @@ I18n - the notname chars need extending for well known unicode non-text -# characters. The XML spec switched to assuming unknown things were name -# characaters. -# _namechars = string.lowercase + string.uppercase + string.digits + '_-' -_notQNameChars = "\t\r\n !\"#$%&'()*.,+/;<=>?@[\\]^`{|}~" # else valid qname :-/ -_notNameChars = _notQNameChars + ":" # Assume anything else valid name :-/ -_rdfns = 'http://www.w3.org/1999/02/22-rdf-syntax-ns#' - - -N3CommentCharacter = "#" # For unix script #! compatabilty - -########################################## Parse string to sink -# -# Regular expressions: -eol = re.compile(r'[ \t]*(#[^\n]*)?\r?\n') # end of line, poss. w/comment -eof = re.compile(r'[ \t]*(#[^\n]*)?$') # end of file, poss. w/comment -ws = re.compile(r'[ \t]*') # Whitespace not including NL -signed_integer = re.compile(r'[-+]?[0-9]+') # integer -number_syntax = re.compile(r'(?P<integer>[-+]?[0-9]+)(?P<decimal>\.[0-9]+)?(?P<exponent>e[-+]?[0-9]+)?') -digitstring = re.compile(r'[0-9]+') # Unsigned integer -interesting = re.compile(r'[\\\r\n\"]') -langcode = re.compile(r'[a-zA-Z0-9]+(-[a-zA-Z0-9]+)?') -#" - - - -class SinkParser: - def __init__(self, store, openFormula=None, thisDoc="", baseURI=None, - genPrefix = "", flags="", - why=None): - """ note: namespace names should *not* end in #; - the # will get added during qname processing """ - - self._bindings = {} - self._flags = flags - if thisDoc != "": - assert ':' in thisDoc, "Document URI not absolute: <%s>" % thisDoc - self._bindings[""] = thisDoc + "#" # default - - self._store = store - if genPrefix: store.setGenPrefix(genPrefix) # pass it on - - self._thisDoc = thisDoc - self.lines = 0 # for error handling - self.startOfLine = 0 # For calculating character number - self._genPrefix = genPrefix - self.keywords = ['a', 'this', 'bind', 'has', 'is', 'of', 'true', 'false' ] - self.keywordsSet = 0 # Then only can others be considerd qnames - self._anonymousNodes = {} # Dict of anon nodes already declared ln: Term - self._variables = {} - self._parentVariables = {} - self._reason = why # Why the parser was asked to parse this - - self._reason2 = None # Why these triples - # was: diag.tracking - if tracking: self._reason2 = BecauseOfData( - store.newSymbol(thisDoc), because=self._reason) - - if baseURI: self._baseURI = baseURI - else: - if thisDoc: - self._baseURI = thisDoc - else: - self._baseURI = None - - assert not self._baseURI or ':' in self._baseURI - - if not self._genPrefix: - if self._thisDoc: self._genPrefix = self._thisDoc + "#_g" - else: self._genPrefix = uniqueURI() - - if openFormula ==None: - if self._thisDoc: - self._formula = store.newFormula(thisDoc + "#_formula") - else: - self._formula = store.newFormula() - else: - self._formula = openFormula - - - self._context = self._formula - self._parentContext = None - - - def here(self, i): - """String generated from position in file - - This is for repeatability when refering people to bnodes in a document. - This has diagnostic uses less formally, as it should point one to which - bnode the arbitrary identifier actually is. It gives the - line and character number of the '[' charcacter or path character - which introduced the blank node. The first blank node is boringly _L1C1. - It used to be used only for tracking, but for tests in general - it makes the canonical ordering of bnodes repeatable.""" - - return "%s_L%iC%i" % (self._genPrefix , self.lines, - i - self.startOfLine + 1) - - def formula(self): - return self._formula - - def loadStream(self, stream): - return self.loadBuf(stream.read()) # Not ideal - - def loadBuf(self, buf): - """Parses a buffer and returns its top level formula""" - self.startDoc() - - self.feed(buf) - return self.endDoc() # self._formula - - - def feed(self, octets): - """Feed an octet stream tothe parser - - if BadSyntax is raised, the string - passed in the exception object is the - remainder after any statements have been parsed. - So if there is more data to feed to the - parser, it should be straightforward to recover.""" - - if not isinstance(octets, unicode): - s = octets.decode('utf-8') - # NB already decoded, so \ufeff - if len(s) > 0 and s[0] == codecs.BOM_UTF8.decode('utf-8'): - s = s[1:] - else: - s=octets - - i = 0 - while i >= 0: - j = self.skipSpace(s, i) - if j<0: return - - i = self.directiveOrStatement(s,j) - if i<0: - print "# next char: ", `s[j]` - raise BadSyntax(self._thisDoc, self.lines, s, j, - "expected directive or statement") - - def directiveOrStatement(self, str,h): - - i = self.skipSpace(str, h) - if i<0: return i # EOF - - j = self.directive(str, i) - if j>=0: return self.checkDot(str,j) - - j = self.statement(str, i) - if j>=0: return self.checkDot(str,j) - - return j - - - #@@I18N - global _notNameChars - #_namechars = string.lowercase + string.uppercase + string.digits + '_-' - - def tok(self, tok, str, i): - """Check for keyword. Space must have been stripped on entry and - we must not be at end of file.""" - - assert tok[0] not in _notNameChars # not for punctuation - if str[i:i+1] == "@": - i = i+1 - else: - if tok not in self.keywords: - return -1 # No, this has neither keywords declaration nor "@" - - if (str[i:i+len(tok)] == tok - and (str[i+len(tok)] in _notQNameChars )): - i = i + len(tok) - return i - else: - return -1 - - def directive(self, str, i): - j = self.skipSpace(str, i) - if j<0: return j # eof - res = [] - - j = self.tok('bind', str, i) # implied "#". Obsolete. - if j>0: raise BadSyntax(self._thisDoc, self.lines, str, i, - "keyword bind is obsolete: use @prefix") - - j = self.tok('keywords', str, i) - if j>0: - i = self.commaSeparatedList(str, j, res, self.bareWord) - if i < 0: - raise BadSyntax(self._thisDoc, self.lines, str, i, - "'@keywords' needs comma separated list of words") - self.setKeywords(res[:]) - # was: diag.chatty_flag - if chatty_flag > 80: progress("Keywords ", self.keywords) - return i - - - j = self.tok('forAll', str, i) - if j > 0: - i = self.commaSeparatedList(str, j, res, self.uri_ref2) - if i <0: raise BadSyntax(self._thisDoc, self.lines, str, i, - "Bad variable list after @forAll") - for x in res: - #self._context.declareUniversal(x) - if x not in self._variables or x in self._parentVariables: - self._variables[x] = self._context.newUniversal(x) - return i - - j = self.tok('forSome', str, i) - if j > 0: - i = self. commaSeparatedList(str, j, res, self.uri_ref2) - if i <0: raise BadSyntax(self._thisDoc, self.lines, str, i, - "Bad variable list after @forSome") - for x in res: - self._context.declareExistential(x) - return i - - - j=self.tok('prefix', str, i) # no implied "#" - if j>=0: - t = [] - i = self.qname(str, j, t) - if i<0: raise BadSyntax(self._thisDoc, self.lines, str, j, - "expected qname after @prefix") - j = self.uri_ref2(str, i, t) - if j<0: raise BadSyntax(self._thisDoc, self.lines, str, i, - "expected <uriref> after @prefix _qname_") - ns = self.uriOf(t[1]) - - if self._baseURI: - ns = join(self._baseURI, ns) - elif ":" not in ns: - raise BadSyntax(self._thisDoc, self.lines, str, j, - "With no base URI, cannot use relative URI in @prefix <"+ns+">") - assert ':' in ns # must be absolute - self._bindings[t[0][0]] = ns - self.bind(t[0][0], hexify(ns)) - return j - - j=self.tok('base', str, i) # Added 2007/7/7 - if j >= 0: - t = [] - i = self.uri_ref2(str, j, t) - if i<0: raise BadSyntax(self._thisDoc, self.lines, str, j, - "expected <uri> after @base ") - ns = self.uriOf(t[0]) - - if self._baseURI: - ns = join(self._baseURI, ns) - else: - raise BadSyntax(self._thisDoc, self.lines, str, j, - "With no previous base URI, cannot use relative URI in @base <"+ns+">") - assert ':' in ns # must be absolute - self._baseURI = ns - return i - - return -1 # Not a directive, could be something else. - - def bind(self, qn, uri): - assert isinstance(uri, - types.StringType), "Any unicode must be %x-encoded already" - if qn == "": - self._store.setDefaultNamespace(uri) - else: - self._store.bind(qn, uri) - - def setKeywords(self, k): - "Takes a list of strings" - if k == None: - self.keywordsSet = 0 - else: - self.keywords = k - self.keywordsSet = 1 - - - def startDoc(self): - # was: self._store.startDoc() - self._store.startDoc(self._formula) - - def endDoc(self): - """Signal end of document and stop parsing. returns formula""" - self._store.endDoc(self._formula) # don't canonicalize yet - return self._formula - - def makeStatement(self, quadruple): - #$$$$$$$$$$$$$$$$$$$$$ -# print "# Parser output: ", `quadruple` - self._store.makeStatement(quadruple, why=self._reason2) - - - - def statement(self, str, i): - r = [] - - i = self.object(str, i, r) # Allow literal for subject - extends RDF - if i<0: return i - - j = self.property_list(str, i, r[0]) - - if j<0: raise BadSyntax(self._thisDoc, self.lines, - str, i, "expected propertylist") - return j - - def subject(self, str, i, res): - return self.item(str, i, res) - - def verb(self, str, i, res): - """ has _prop_ - is _prop_ of - a - = - _prop_ - >- prop -> - <- prop -< - _operator_""" - - j = self.skipSpace(str, i) - if j<0:return j # eof - - r = [] - - j = self.tok('has', str, i) - if j>=0: - i = self.prop(str, j, r) - if i < 0: raise BadSyntax(self._thisDoc, self.lines, - str, j, "expected property after 'has'") - res.append(('->', r[0])) - return i - - j = self.tok('is', str, i) - if j>=0: - i = self.prop(str, j, r) - if i < 0: raise BadSyntax(self._thisDoc, self.lines, str, j, - "expected <property> after 'is'") - j = self.skipSpace(str, i) - if j<0: - raise BadSyntax(self._thisDoc, self.lines, str, i, - "End of file found, expected property after 'is'") - return j # eof - i=j - j = self.tok('of', str, i) - if j<0: raise BadSyntax(self._thisDoc, self.lines, str, i, - "expected 'of' after 'is' <prop>") - res.append(('<-', r[0])) - return j - - j = self.tok('a', str, i) - if j>=0: - res.append(('->', RDF_type)) - return j - - - if str[i:i+2] == "<=": - res.append(('<-', self._store.newSymbol(Logic_NS+"implies"))) - return i+2 - - if str[i:i+1] == "=": - if str[i+1:i+2] == ">": - res.append(('->', self._store.newSymbol(Logic_NS+"implies"))) - return i+2 - res.append(('->', DAML_sameAs)) - return i+1 - - if str[i:i+2] == ":=": - # patch file relates two formulae, uses this @@ really? - res.append(('->', Logic_NS+"becomes")) - return i+2 - - j = self.prop(str, i, r) - if j >= 0: - res.append(('->', r[0])) - return j - - if str[i:i+2] == ">-" or str[i:i+2] == "<-": - raise BadSyntax(self._thisDoc, self.lines, str, j, - ">- ... -> syntax is obsolete.") - - return -1 - - def prop(self, str, i, res): - return self.item(str, i, res) - - def item(self, str, i, res): - return self.path(str, i, res) - - def blankNode(self, uri=None): - if "B" not in self._flags: - return self._context.newBlankNode(uri, why=self._reason2) - x = self._context.newSymbol(uri) - self._context.declareExistential(x) - return x - - def path(self, str, i, res): - """Parse the path production. - """ - j = self.nodeOrLiteral(str, i, res) - if j<0: return j # nope - - while str[j:j+1] in "!^.": # no spaces, must follow exactly (?) - ch = str[j:j+1] # @@ Allow "." followed IMMEDIATELY by a node. - if ch == ".": - ahead = str[j+1:j+2] - if not ahead or (ahead in _notNameChars - and ahead not in ":?<[{("): break - subj = res.pop() - obj = self.blankNode(uri=self.here(j)) - j = self.node(str, j+1, res) - if j<0: raise BadSyntax(self._thisDoc, self.lines, str, j, - "EOF found in middle of path syntax") - pred = res.pop() - if ch == "^": # Reverse traverse - self.makeStatement((self._context, pred, obj, subj)) - else: - self.makeStatement((self._context, pred, subj, obj)) - res.append(obj) - return j - - def anonymousNode(self, ln): - """Remember or generate a term for one of these _: anonymous nodes""" - term = self._anonymousNodes.get(ln, None) - if term != None: return term - term = self._store.newBlankNode(self._context, why=self._reason2) - self._anonymousNodes[ln] = term - return term - - def node(self, str, i, res, subjectAlready=None): - """Parse the <node> production. - Space is now skipped once at the beginning - instead of in multipe calls to self.skipSpace(). - """ - subj = subjectAlready - - j = self.skipSpace(str,i) - if j<0: return j #eof - i=j - ch = str[i:i+1] # Quick 1-character checks first: - - if ch == "[": - bnodeID = self.here(i) - j=self.skipSpace(str,i+1) - if j<0: raise BadSyntax(self._thisDoc, - self.lines, str, i, "EOF after '['") - if str[j:j+1] == "=": # Hack for "is" binding name to anon node - i = j+1 - objs = [] - j = self.objectList(str, i, objs); - if j>=0: - subj = objs[0] - if len(objs)>1: - for obj in objs: - self.makeStatement((self._context, - DAML_sameAs, subj, obj)) - j = self.skipSpace(str, j) - if j<0: raise BadSyntax(self._thisDoc, self.lines, str, i, - "EOF when objectList expected after [ = ") - if str[j:j+1] == ";": - j=j+1 - else: - raise BadSyntax(self._thisDoc, self.lines, str, i, - "objectList expected after [= ") - - if subj is None: - subj=self.blankNode(uri= bnodeID) - - i = self.property_list(str, j, subj) - if i<0: raise BadSyntax(self._thisDoc, self.lines, str, j, - "property_list expected") - - j = self.skipSpace(str, i) - if j<0: raise BadSyntax(self._thisDoc, self.lines, str, i, - "EOF when ']' expected after [ <propertyList>") - if str[j:j+1] != "]": - raise BadSyntax(self._thisDoc, - self.lines, str, j, "']' expected") - res.append(subj) - return j+1 - - if ch == "{": - ch2 = str[i+1:i+2] - if ch2 == '$': - i += 1 - j = i + 1 - List = [] - first_run = True - while 1: - i = self.skipSpace(str, j) - if i<0: raise BadSyntax(self._thisDoc, self.lines, str, i, - "needed '$}', found end.") - if str[i:i+2] == '$}': - j = i+2 - break - - if not first_run: - if str[i:i+1] == ',': - i+=1 - else: - raise BadSyntax(self._thisDoc, self.lines, - str, i, "expected: ','") - else: first_run = False - - item = [] - j = self.item(str,i, item) #@@@@@ should be path, was object - if j<0: raise BadSyntax(self._thisDoc, self.lines, str, i, - "expected item in set or '$}'") - List.append(self._store.intern(item[0])) - res.append(self._store.newSet(List, self._context)) - return j - else: - j=i+1 - oldParentContext = self._parentContext - self._parentContext = self._context - parentAnonymousNodes = self._anonymousNodes - grandParentVariables = self._parentVariables - self._parentVariables = self._variables - self._anonymousNodes = {} - self._variables = self._variables.copy() - reason2 = self._reason2 - self._reason2 = becauseSubexpression - if subj is None: subj = self._store.newFormula() - self._context = subj - - while 1: - i = self.skipSpace(str, j) - if i<0: raise BadSyntax(self._thisDoc, self.lines, - str, i, "needed '}', found end.") - - if str[i:i+1] == "}": - j = i+1 - break - - j = self.directiveOrStatement(str,i) - if j<0: raise BadSyntax(self._thisDoc, self.lines, - str, i, "expected statement or '}'") - - self._anonymousNodes = parentAnonymousNodes - self._variables = self._parentVariables - self._parentVariables = grandParentVariables - self._context = self._parentContext - self._reason2 = reason2 - self._parentContext = oldParentContext - res.append(subj.close()) # No use until closed - return j - - if ch == "(": - thing_type = self._store.newList - ch2 = str[i+1:i+2] - if ch2 == '$': - thing_type = self._store.newSet - i += 1 - j=i+1 - - List = [] - while 1: - i = self.skipSpace(str, j) - if i<0: raise BadSyntax(self._thisDoc, self.lines, - str, i, "needed ')', found end.") - if str[i:i+1] == ')': - j = i+1 - break - - item = [] - j = self.item(str,i, item) #@@@@@ should be path, was object - if j<0: raise BadSyntax(self._thisDoc, self.lines, str, i, - "expected item in list or ')'") - List.append(self._store.intern(item[0])) - res.append(thing_type(List, self._context)) - return j - - j = self.tok('this', str, i) # This context - if j>=0: - raise BadSyntax(self._thisDoc, self.lines, str, i, - "Keyword 'this' was ancient N3. Now use @forSome and @forAll keywords.") - res.append(self._context) - return j - - #booleans - j = self.tok('true', str, i) - if j>=0: - res.append(True) - return j - j = self.tok('false', str, i) - if j>=0: - res.append(False) - return j - - if subj is None: # If this can be a named node, then check for a name. - j = self.uri_ref2(str, i, res) - if j >= 0: - return j - - return -1 - - def property_list(self, str, i, subj): - """Parse property list - Leaves the terminating punctuation in the buffer - """ - while 1: - j = self.skipSpace(str, i) - if j<0: - raise BadSyntax(self._thisDoc, self.lines, str, i, - "EOF found when expected verb in property list") - return j #eof - - if str[j:j+2] ==":-": - i = j + 2 - res = [] - j = self.node(str, i, res, subj) - if j<0: raise BadSyntax(self._thisDoc, self.lines, str, i, - "bad {} or () or [] node after :- ") - i=j - continue - i=j - v = [] - j = self.verb(str, i, v) - if j<=0: - return i # void but valid - - objs = [] - i = self.objectList(str, j, objs) - if i<0: raise BadSyntax(self._thisDoc, self.lines, str, j, - "objectList expected") - for obj in objs: - dir, sym = v[0] - if dir == '->': - self.makeStatement((self._context, sym, subj, obj)) - else: - self.makeStatement((self._context, sym, obj, subj)) - - j = self.skipSpace(str, i) - if j<0: - raise BadSyntax(self._thisDoc, self.lines, str, j, - "EOF found in list of objects") - return j #eof - if str[i:i+1] != ";": - return i - i = i+1 # skip semicolon and continue - - def commaSeparatedList(self, str, j, res, what): - """return value: -1 bad syntax; >1 new position in str - res has things found appended - """ - i = self.skipSpace(str, j) - if i<0: - raise BadSyntax(self._thisDoc, self.lines, str, i, - "EOF found expecting comma sep list") - return i - if str[i] == ".": return j # empty list is OK - i = what(str, i, res) - if i<0: return -1 - - while 1: - j = self.skipSpace(str, i) - if j<0: return j # eof - ch = str[j:j+1] - if ch != ",": - if ch != ".": - return -1 - return j # Found but not swallowed "." - i = what(str, j+1, res) - if i<0: - raise BadSyntax(self._thisDoc, self.lines, str, i, - "bad list content") - return i - - def objectList(self, str, i, res): - i = self.object(str, i, res) - if i<0: return -1 - while 1: - j = self.skipSpace(str, i) - if j<0: - raise BadSyntax(self._thisDoc, self.lines, str, j, - "EOF found after object") - return j #eof - if str[j:j+1] != ",": - return j # Found something else! - i = self.object(str, j+1, res) - if i<0: return i - - def checkDot(self, str, i): - j = self.skipSpace(str, i) - if j<0: return j #eof - if str[j:j+1] == ".": - return j+1 # skip - if str[j:j+1] == "}": - return j # don't skip it - if str[j:j+1] == "]": - return j - raise BadSyntax(self._thisDoc, self.lines, - str, j, "expected '.' or '}' or ']' at end of statement") - return i - - - def uri_ref2(self, str, i, res): - """Generate uri from n3 representation. - - Note that the RDF convention of directly concatenating - NS and local name is now used though I prefer inserting a '#' - to make the namesapces look more like what XML folks expect. - """ - qn = [] - j = self.qname(str, i, qn) - if j>=0: - pfx, ln = qn[0] - if pfx is None: - assert 0, "not used?" - ns = self._baseURI + ADDED_HASH - else: - try: - ns = self._bindings[pfx] - except KeyError: - if pfx == "_": # Magic prefix 2001/05/30, can be overridden - res.append(self.anonymousNode(ln)) - return j - raise BadSyntax(self._thisDoc, self.lines, str, i, - "Prefix \"%s:\" not bound" % (pfx)) - symb = self._store.newSymbol(ns + ln) - if symb in self._variables: - res.append(self._variables[symb]) - else: - res.append(symb) # @@@ "#" CONVENTION - if not ns.find("#"):progress( - "Warning: no # on namespace %s," % ns) - return j - - - i = self.skipSpace(str, i) - if i<0: return -1 - - if str[i] == "?": - v = [] - j = self.variable(str,i,v) - if j>0: #Forget varibles as a class, only in context. - res.append(v[0]) - return j - return -1 - - elif str[i]=="<": - i = i + 1 - st = i - while i < len(str): - if str[i] == ">": - uref = str[st:i] # the join should dealt with "": - if self._baseURI: - uref = join(self._baseURI, uref) # was: uripath.join - else: - assert ":" in uref, \ - "With no base URI, cannot deal with relative URIs" - if str[i-1:i]=="#" and not uref[-1:]=="#": - uref = uref + "#" # She meant it! Weirdness in urlparse? - symb = self._store.newSymbol(uref) - if symb in self._variables: - res.append(self._variables[symb]) - else: - res.append(symb) - return i+1 - i = i + 1 - raise BadSyntax(self._thisDoc, self.lines, str, j, - "unterminated URI reference") - - elif self.keywordsSet: - v = [] - j = self.bareWord(str,i,v) - if j<0: return -1 #Forget varibles as a class, only in context. - if v[0] in self.keywords: - raise BadSyntax(self._thisDoc, self.lines, str, i, - 'Keyword "%s" not allowed here.' % v[0]) - res.append(self._store.newSymbol(self._bindings[""]+v[0])) - return j - else: - return -1 - - def skipSpace(self, str, i): - """Skip white space, newlines and comments. - return -1 if EOF, else position of first non-ws character""" - while 1: - m = eol.match(str, i) - if m == None: break - self.lines = self.lines + 1 - i = m.end() # Point to first character unmatched - self.startOfLine = i - m = ws.match(str, i) - if m != None: - i = m.end() - m = eof.match(str, i) - if m != None: return -1 - return i - - def variable(self, str, i, res): - """ ?abc -> variable(:abc) - """ - - j = self.skipSpace(str, i) - if j<0: return -1 - - if str[j:j+1] != "?": return -1 - j=j+1 - i = j - if str[j] in "0123456789-": - raise BadSyntax(self._thisDoc, self.lines, str, j, - "Varible name can't start with '%s'" % str[j]) - return -1 - while i <len(str) and str[i] not in _notNameChars: - i = i+1 - if self._parentContext == None: - varURI = self._store.newSymbol(self._baseURI + "#" +str[j:i]) - if varURI not in self._variables: - self._variables[varURI] = self._context.newUniversal(varURI - , why=self._reason2) - res.append(self._variables[varURI]) - return i - # @@ was: - # raise BadSyntax(self._thisDoc, self.lines, str, j, - # "Can't use ?xxx syntax for variable in outermost level: %s" - # % str[j-1:i]) - varURI = self._store.newSymbol(self._baseURI + "#" +str[j:i]) - if varURI not in self._parentVariables: - self._parentVariables[varURI] = self._parentContext.newUniversal(varURI - , why=self._reason2) - res.append(self._parentVariables[varURI]) - return i - - def bareWord(self, str, i, res): - """ abc -> :abc - """ - j = self.skipSpace(str, i) - if j<0: return -1 - - if str[j] in "0123456789-" or str[j] in _notNameChars: return -1 - i = j - while i <len(str) and str[i] not in _notNameChars: - i = i+1 - res.append(str[j:i]) - return i - - def qname(self, str, i, res): - """ - xyz:def -> ('xyz', 'def') - If not in keywords and keywordsSet: def -> ('', 'def') - :def -> ('', 'def') - """ - - i = self.skipSpace(str, i) - if i<0: return -1 - - c = str[i] - if c in "0123456789-+": return -1 - if c not in _notNameChars: - ln = c - i = i + 1 - while i < len(str): - c = str[i] - if c not in _notNameChars: - ln = ln + c - i = i + 1 - else: break - else: # First character is non-alpha - ln = '' # Was: None - TBL (why? useful?) - - if i<len(str) and str[i] == ':': - pfx = ln - i = i + 1 - ln = '' - while i < len(str): - c = str[i] - if c not in _notNameChars: - ln = ln + c - i = i + 1 - else: break - - res.append((pfx, ln)) - return i - - else: # delimiter was not ":" - if ln and self.keywordsSet and ln not in self.keywords: - res.append(('', ln)) - return i - return -1 - - def object(self, str, i, res): - j = self.subject(str, i, res) - if j>= 0: - return j - else: - j = self.skipSpace(str, i) - if j<0: return -1 - else: i=j - - if str[i]=='"': - if str[i:i+3] == '"""': delim = '"""' - else: delim = '"' - i = i + len(delim) - - j, s = self.strconst(str, i, delim) - - res.append(self._store.newLiteral(s)) - progress("New string const ", s, j) - return j - else: - return -1 - - def nodeOrLiteral(self, str, i, res): - j = self.node(str, i, res) - startline = self.lines # Remember where for error messages - if j>= 0: - return j - else: - j = self.skipSpace(str, i) - if j<0: return -1 - else: i=j - - ch = str[i] - if ch in "-+0987654321": - m = number_syntax.match(str, i) - if m == None: - raise BadSyntax(self._thisDoc, self.lines, str, i, - "Bad number syntax") - j = m.end() - if m.group('exponent') != None: # includes decimal exponent - res.append(float(str[i:j])) -# res.append(self._store.newLiteral(str[i:j], -# self._store.newSymbol(FLOAT_DATATYPE))) - elif m.group('decimal') != None: - res.append(Decimal(str[i:j])) - else: - res.append(long(str[i:j])) -# res.append(self._store.newLiteral(str[i:j], -# self._store.newSymbol(INTEGER_DATATYPE))) - return j - - if str[i]=='"': - if str[i:i+3] == '"""': delim = '"""' - else: delim = '"' - i = i + len(delim) - - dt = None - j, s = self.strconst(str, i, delim) - lang = None - if str[j:j+1] == "@": # Language? - m = langcode.match(str, j+1) - if m == None: - raise BadSyntax(self._thisDoc, startline, str, i, - "Bad language code syntax on string literal, after @") - i = m.end() - lang = str[j+1:i] - j = i - if str[j:j+2] == "^^": - res2 = [] - j = self.uri_ref2(str, j+2, res2) # Read datatype URI - dt = res2[0] -# if dt.uriref() == "http://www.w3.org/1999/02/22-rdf-syntax-ns#XMLLiteral": - if dt == "http://www.w3.org/1999/02/22-rdf-syntax-ns#XMLLiteral": - try: - dom = XMLtoDOM('<rdf:envelope xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns">' - + s - + '</rdf:envelope>').firstChild - except: - raise ValueError('s="%s"' % s) - res.append(self._store.newXMLLiteral(dom)) - return j - res.append(self._store.newLiteral(s, dt, lang)) - return j - else: - return -1 - - def uriOf(self, sym): - if isinstance(sym, types.TupleType): - return sym[1] # old system for --pipe - # return sym.uriref() # cwm api - return sym - - - def strconst(self, str, i, delim): - """parse an N3 string constant delimited by delim. - return index, val - """ - - - j = i - ustr = u"" # Empty unicode string - startline = self.lines # Remember where for error messages - while j<len(str): - if str[j] == '"': - if delim == '"': # done when delim is " - i = j + 1 - return i, ustr - if delim == '"""': # done when delim is """ and ... - if str[j:j+5] == '"""""': # ... we have "" before - i = j + 5 - ustr = ustr + '""' - return i, ustr - if str[j:j+4] == '""""': # ... we have " before - i = j + 4 - ustr = ustr + '"' - return i, ustr - if str[j:j+3] == '"""': # ... current " is part of delim - i = j + 3 - return i, ustr - - # we are inside of the string and current char is " - j = j + 1 - ustr = ustr + '"' - continue - - m = interesting.search(str, j) # was str[j:]. - # Note for pos param to work, MUST be compiled ... re bug? - assert m , "Quote expected in string at ^ in %s^%s" %( - str[j-20:j], str[j:j+20]) # we at least have to find a quote - - i = m.start() - try: - ustr = ustr + str[j:i] - except UnicodeError: - err = "" - for c in str[j:i]: - err = err + (" %02x" % ord(c)) - streason = sys.exc_info()[1].__str__() - raise BadSyntax(self._thisDoc, startline, str, j, - "Unicode error appending characters %s to string, because\n\t%s" - % (err, streason)) - -# print "@@@ i = ",i, " j=",j, "m.end=", m.end() - - ch = str[i] - if ch == '"': - j = i - continue - elif ch == "\r": # Strip carriage returns - j = i+1 - continue - elif ch == "\n": - if delim == '"': - raise BadSyntax(self._thisDoc, startline, str, i, - "newline found in string literal") - self.lines = self.lines + 1 - ustr = ustr + ch - j = i + 1 - self.startOfLine = j - - elif ch == "\\": - j = i + 1 - ch = str[j:j+1] # Will be empty if string ends - if not ch: - raise BadSyntax(self._thisDoc, startline, str, i, - "unterminated string literal (2)") - k = 'abfrtvn\\"'.find(ch) - if k >= 0: - uch = '\a\b\f\r\t\v\n\\"'[k] - ustr = ustr + uch - j = j + 1 - elif ch == "u": - j, ch = self.uEscape(str, j+1, startline) - ustr = ustr + ch - elif ch == "U": - j, ch = self.UEscape(str, j+1, startline) - ustr = ustr + ch - else: - raise BadSyntax(self._thisDoc, self.lines, str, i, - "bad escape") - - raise BadSyntax(self._thisDoc, self.lines, str, i, - "unterminated string literal") - - - def uEscape(self, str, i, startline): - j = i - count = 0 - value = 0 - while count < 4: # Get 4 more characters - ch = str[j:j+1].lower() - # sbp http://ilrt.org/discovery/chatlogs/rdfig/2002-07-05 - j = j + 1 - if ch == "": - raise BadSyntax(self._thisDoc, startline, str, i, - "unterminated string literal(3)") - k = "0123456789abcdef".find(ch) - if k < 0: - raise BadSyntax(self._thisDoc, startline, str, i, - "bad string literal hex escape") - value = value * 16 + k - count = count + 1 - uch = unichr(value) - return j, uch - - def UEscape(self, str, i, startline): - stringType = type('') - j = i - count = 0 - value = '\\U' - while count < 8: # Get 8 more characters - ch = str[j:j+1].lower() - # sbp http://ilrt.org/discovery/chatlogs/rdfig/2002-07-05 - j = j + 1 - if ch == "": - raise BadSyntax(self._thisDoc, startline, str, i, - "unterminated string literal(3)") - k = "0123456789abcdef".find(ch) - if k < 0: - raise BadSyntax(self._thisDoc, startline, str, i, - "bad string literal hex escape") - value = value + ch - count = count + 1 - - uch = stringType(value).decode('unicode-escape') - return j, uch - -wide_build = True -try: - unichr(0x10000) -except ValueError: - wide_build = False - -# If we are going to do operators then they should generate -# [ is operator:plus of ( \1 \2 ) ] - - -class BadSyntax(SyntaxError): - def __init__(self, uri, lines, str, i, why): - self._str = str.encode('utf-8') # Better go back to strings for errors - self._i = i - self._why = why - self.lines = lines - self._uri = uri - - def __str__(self): - str = self._str - i = self._i - st = 0 - if i>60: - pre="..." - st = i - 60 - else: pre="" - if len(str)-i > 60: post="..." - else: post="" - - return 'at line %i of <%s>:\nBad syntax (%s) at ^ in:\n"%s%s^%s%s"' \ - % (self.lines +1, self._uri, self._why, pre, - str[st:i], str[i:i+60], post) - - - -def stripCR(str): - res = "" - for ch in str: - if ch != "\r": - res = res + ch - return res - -def dummyWrite(x): - pass - -################################################################################ - - -def toBool(s): - if s == 'true' or s == 'True' or s == '1': - return True - if s == 'false' or s == 'False' or s == '0': - return False - raise ValueError(s) - - - - - -class Formula(object): - number = 0 - - def __init__(self, parent): - self.counter = 0 - Formula.number += 1 - self.number = Formula.number - self.existentials = {} - self.universals = {} - - self.quotedgraph=QuotedGraph(store=parent.store, identifier=self.id()) - - def __str__(self): - return '_:Formula%s' % self.number - - def id(self): - return BNode('_:Formula%s' % self.number) - - def newBlankNode(self, uri=None, why=None): - if uri is None: - self.counter += 1 - b = BNode('f%sb%s' % (id(self), self.counter)) - else: b = BNode(uri.split('#').pop().replace('_', 'b')) - return b - - def newUniversal(self, uri, why=None): - return Variable(uri.split('#').pop()) - - def declareExistential(self, x): - self.existentials[x] = self.newBlankNode() - - def close(self): - - return self.quotedgraph - -r_hibyte = re.compile(r'([\x80-\xff])') -def iri(uri): - return uri.decode('utf-8') - # return unicode(r_hibyte.sub(lambda m: '%%%02X' % ord(m.group(1)), uri)) - -class RDFSink(object): - def __init__(self, graph): - self.rootFormula = None - self.counter = 0 - self.graph=graph - - - def newFormula(self): - assert self.graph.store.formula_aware - f = Formula(self.graph) - return f - - def newSymbol(self, *args): - uri = args[0].encode('utf-8') - return URIRef(iri(uri)) - - def newBlankNode(self, arg=None, **kargs): - if isinstance(arg, Formula): - return arg.newBlankNode() - elif arg is None: - self.counter += 1 - b = BNode('n' + str(self.counter)) - else: b = BNode(str(arg[0]).split('#').pop().replace('_', 'b')) - return b - - def newLiteral(self, s, dt, lang): - if dt: return Literal(s, datatype=dt) - else: return Literal(s, lang=lang) - - def newList(self, n, f): - if not n: - return self.newSymbol( - 'http://www.w3.org/1999/02/22-rdf-syntax-ns#nil' - ) - - a = self.newBlankNode(f) - first = self.newSymbol( - 'http://www.w3.org/1999/02/22-rdf-syntax-ns#first' - ) - rest = self.newSymbol('http://www.w3.org/1999/02/22-rdf-syntax-ns#rest') - self.makeStatement((f, first, a, n[0])) - self.makeStatement((f, rest, a, self.newList(n[1:], f))) - return a - - def newSet(self, *args): - return set(args) - - def setDefaultNamespace(self, *args): - return ':'.join(repr(n) for n in args) - - def makeStatement(self, quadruple, why=None): - f, p, s, o = quadruple - - if hasattr(p, 'formula'): - raise Exception("Formula used as predicate") - - s = self.normalise(f, s) - p = self.normalise(f, p) - o = self.normalise(f, o) - - - if f == self.rootFormula: - # print s, p, o, '.' - self.graph.add((s, p, o)) - else: - f.quotedgraph.add((s,p,o)) - - - #return str(quadruple) - - def normalise(self, f, n): - if isinstance(n, tuple): - return URIRef(unicode(n[1])) - - # if isinstance(n, list): - # rdflist, f = n - # name = self.newBlankNode() - # if f == self.rootFormula: - # sublist = name - # for i in xrange(0, len(rdflist) - 1): - # print sublist, 'first', rdflist[i] - # rest = self.newBlankNode() - # print sublist, 'rest', rest - # sublist = rest - # print sublist, 'first', rdflist[-1] - # print sublist, 'rest', 'nil' - # return name - - if isinstance(n, bool): - s = Literal(str(n).lower(), datatype=BOOLEAN_DATATYPE) - return s - - if isinstance(n, int) or isinstance(n, long): - s = Literal(unicode(n), datatype=INTEGER_DATATYPE) - return s - - if isinstance(n, Decimal): - value = str(n.normalize()) - if value == '-0': - value = '0' - s = Literal(value, datatype=DECIMAL_DATATYPE ) - return s - - if isinstance(n, float): - s = Literal(str(n), datatype=DOUBLE_DATATYPE ) - return s - - if f.existentials.has_key(n): - return f.existentials[n] - - # if isinstance(n, Var): - # if f.universals.has_key(n): - # return f.universals[n] - # f.universals[n] = f.newBlankNode() - # return f.universals[n] - - return n - - def intern(self, something): - return something - - def bind(self, pfx, uri): - pass # print pfx, ':', uri - - def startDoc(self, formula): - self.rootFormula = formula - - def endDoc(self, formula): - pass - - -################################################### -# -# Utilities -# - -Escapes = {'a': '\a', - 'b': '\b', - 'f': '\f', - 'r': '\r', - 't': '\t', - 'v': '\v', - 'n': '\n', - '\\': '\\', - '"': '"'} - -forbidden1 = re.compile(ur'[\\\"\a\b\f\r\v\u0080-\U0000ffff]') -forbidden2 = re.compile(ur'[\\\"\a\b\f\r\v\t\n\u0080-\U0000ffff]') -#" -def stringToN3(str, singleLine=0, flags=""): - res = '' - if (len(str) > 20 and - str[-1] <> '"' and - not singleLine and - (str.find("\n") >=0 - or str.find('"') >=0)): - delim= '"""' - forbidden = forbidden1 # (allow tabs too now) - else: - delim = '"' - forbidden = forbidden2 - - i = 0 - - while i < len(str): - m = forbidden.search(str, i) - if not m: - break - - j = m.start() - res = res + str[i:j] - ch = m.group(0) - if ch == '"' and delim == '"""' and str[j:j+3] != '"""': #" - res = res + ch - else: - k = '\a\b\f\r\t\v\n\\"'.find(ch) - if k >= 0: res = res + "\\" + 'abfrtvn\\"'[k] - else: - if 'e' in flags: -# res = res + ('\\u%04x' % ord(ch)) - res = res + ('\\u%04X' % ord(ch)) - # http://www.w3.org/TR/rdf-testcases/#ntriples - else: - res = res + ch - i = j + 1 - - # The following code fixes things for really high range Unicode - newstr = "" - for ch in res + str[i:]: - if ord(ch)>65535: - newstr = newstr + ('\\U%08X' % ord(ch)) - # http://www.w3.org/TR/rdf-testcases/#ntriples - else: - newstr = newstr + ch - # - - return delim + newstr + delim - -def backslashUify(ustr): - """Use URL encoding to return an ASCII string corresponding - to the given unicode""" -# progress("String is "+`ustr`) -# s1=ustr.encode('utf-8') - s = "" - for ch in ustr: # .encode('utf-8'): - if ord(ch) > 65535: - ch = "\\U%08X" % ord(ch) - elif ord(ch) > 126: - ch = "\\u%04X" % ord(ch) - else: - ch = "%c" % ord(ch) - s = s + ch - return b(s) - -@py3compat.format_doctest_out -def hexify(ustr): - """Use URL encoding to return an ASCII string - corresponding to the given UTF8 string - - >>> hexify("http://example/a b") - %(b)s'http://example/a%%20b' - - """ #" -# progress("String is "+`ustr`) -# s1=ustr.encode('utf-8') - s = "" - for ch in ustr: # .encode('utf-8'): - if ord(ch) > 126 or ord(ch) < 33 : - ch = "%%%02X" % ord(ch) - else: - ch = "%c" % ord(ch) - s = s + ch - return b(s) - -def dummy(): - res = "" - if len(str) > 20 and (str.find("\n") >=0 - or str.find('"') >=0): - delim= '"""' - forbidden = "\\\"\a\b\f\r\v" # (allow tabs too now) - else: - delim = '"' - forbidden = "\\\"\a\b\f\r\v\t\n" - for i in range(len(str)): - ch = str[i] - j = forbidden.find(ch) - if ch == '"' and delim == '"""' \ - and i+1 < len(str) and str[i+1] != '"': - j=-1 # Single quotes don't need escaping in long format - if j>=0: ch = "\\" + '\\"abfrvtn'[j] - elif ch not in "\n\t" and (ch < " " or ch > "}"): - ch = "[[" + `ch` + "]]" #[2:-1] # Use python - res = res + ch - return delim + res + delim - - -class N3Parser(Parser): - - def __init__(self): - pass - - def parse(self, source, graph, encoding="utf-8"): - # we're currently being handed a Graph, not a ConjunctiveGraph - assert graph.store.context_aware # is this implied by formula_aware - assert graph.store.formula_aware - - if encoding not in [None, "utf-8"]: - raise Exception("N3 files are always utf-8 encoded, I was passed: %s"%encoding) - - conj_graph = ConjunctiveGraph(store=graph.store) - conj_graph.default_context = graph # TODO: CG __init__ should have a default_context arg - # TODO: update N3Processor so that it can use conj_graph as the sink - conj_graph.namespace_manager = graph.namespace_manager - sink = RDFSink(conj_graph) - - baseURI = graph.absolutize(source.getPublicId() or source.getSystemId() or "") - p = SinkParser(sink, baseURI=baseURI) - - p.loadStream(source.getByteStream()) - - for prefix, namespace in p._bindings.items(): - conj_graph.bind(prefix, namespace) - - - - -def _test(): - import doctest - doctest.testmod() - - -# if __name__ == '__main__': -# _test() - -def main(): - g=ConjunctiveGraph() - - sink = RDFSink(g) - base = 'file://' + os.path.join(os.getcwd(), sys.argv[1]) - - p = SinkParser(sink, baseURI=base) - p._bindings[''] = p._baseURI + '#' - p.startDoc() - - f = open(sys.argv[1], 'rb') - bytes = f.read() - f.close() - - p.feed(bytes) - p.endDoc() - for t in g.quads((None,None,None)): - - print t - -if __name__ == '__main__': - main() - -#ends - diff --git a/creactistore/_templates/lib/rdflib/plugins/parsers/nquads.py b/creactistore/_templates/lib/rdflib/plugins/parsers/nquads.py deleted file mode 100644 index fbb4a37..0000000 --- a/creactistore/_templates/lib/rdflib/plugins/parsers/nquads.py +++ /dev/null @@ -1,107 +0,0 @@ -""" -This is a rdflib plugin for parsing NQuad files into Conjunctive -graphs that can be used and queried. The store that backs the graph -*must* be able to handle contexts. - ->>> from rdflib import ConjunctiveGraph, URIRef, Namespace ->>> g = ConjunctiveGraph() ->>> data = open("test/example.nquads", "rb") ->>> g.parse(data, format="nquads") # doctest:+ELLIPSIS -<Graph identifier=... (<class 'rdflib.graph.Graph'>)> ->>> assert len(g.store) == 449 ->>> # There should be 16 separate contexts ->>> assert len([x for x in g.store.contexts()]) == 16 ->>> # is the name of entity E10009 "Arco Publications"? (in graph http://bibliographica.org/entity/E10009) ->>> # Looking for: ->>> # <http://bibliographica.org/entity/E10009> <http://xmlns.com/foaf/0.1/name> "Arco Publications" <http://bibliographica.org/entity/E10009> ->>> s = URIRef("http://bibliographica.org/entity/E10009") ->>> FOAF = Namespace("http://xmlns.com/foaf/0.1/") ->>> assert(g.value(s, FOAF.name) == "Arco Publications") -""" - -from rdflib.py3compat import b - -# Build up from the NTriples parser: -from rdflib.plugins.parsers.ntriples import NTriplesParser -from rdflib.plugins.parsers.ntriples import ParseError -from rdflib.plugins.parsers.ntriples import r_tail -from rdflib.plugins.parsers.ntriples import r_wspace -from rdflib.plugins.parsers.ntriples import r_wspaces - -__all__ = ['QuadSink', 'NQuadsParser'] - -class QuadSink(object): - def __init__(self): - class FakeStore(object): - def __init__(self, addn): - self.addN = addn - self.length = 0 - self.__quads = [] - self.__store = FakeStore(self.addN) - - def addN(self, quads): - self.length += 1 - self.__quads.append(quads) - - def quads(self, (s,p,o)): - for s,p,o,ctx in self.__quads: - yield s,p,o,ctx - -class NQuadsParser(NTriplesParser): - def __init__(self, sink=None): - if sink is not None: - assert sink.store.context_aware, ("NQuadsParser must be given" - " a context aware store.") - self.sink = sink - else: self.sink = QuadSink() - - def parse(self, inputsource, sink, **kwargs): - """Parse f as an N-Triples file.""" - assert sink.store.context_aware, ("NQuadsParser must be given" - " a context aware store.") - self.sink = sink - - source = inputsource.getByteStream() - - if not hasattr(source, 'read'): - raise ParseError("Item to parse must be a file-like object.") - - self.file = source - self.buffer = '' - while True: - self.line = self.readline() - if self.line is None: break - try: self.parseline() - except ParseError: - raise ParseError("Invalid line: %r" % self.line) - return self.sink - - def context(self): - context = self.uriref() - if not context: - raise ParseError("Context must be a uriref") - return context - - def parseline(self): - self.eat(r_wspace) - if (not self.line) or self.line.startswith(b('#')): - return # The line is empty or a comment - - subject = self.subject() - self.eat(r_wspaces) - - predicate = self.predicate() - self.eat(r_wspaces) - - obj = self.object() - self.eat(r_wspaces) - - context = self.context() - self.eat(r_tail) - - if self.line: - raise ParseError("Trailing garbage") - # Must have a context aware store - add on a normal Graph - # discards anything where the ctx != graph.identifier - self.sink.store.add((subject, predicate, obj), context) - diff --git a/creactistore/_templates/lib/rdflib/plugins/parsers/nt.py b/creactistore/_templates/lib/rdflib/plugins/parsers/nt.py deleted file mode 100644 index 1ec2282..0000000 --- a/creactistore/_templates/lib/rdflib/plugins/parsers/nt.py +++ /dev/null @@ -1,28 +0,0 @@ -from rdflib.parser import Parser -from rdflib.plugins.parsers.ntriples import NTriplesParser - -__all__ = ['NTSink', 'NTParser'] - -class NTSink(object): - def __init__(self, graph): - self.graph = graph - - def triple(self, s, p, o): - self.graph.add((s, p, o)) - - -class NTParser(Parser): - """parser for the ntriples format, often stored with the .nt extension - - See http://www.w3.org/TR/rdf-testcases/#ntriples""" - - def __init__(self): - super(NTParser, self).__init__() - - def parse(self, source, sink, baseURI=None): - f = source.getByteStream() # TODO getCharacterStream? - parser = NTriplesParser(NTSink(sink)) - parser.parse(f) - f.close() - - diff --git a/creactistore/_templates/lib/rdflib/plugins/parsers/ntriples.py b/creactistore/_templates/lib/rdflib/plugins/parsers/ntriples.py deleted file mode 100644 index 48fe327..0000000 --- a/creactistore/_templates/lib/rdflib/plugins/parsers/ntriples.py +++ /dev/null @@ -1,243 +0,0 @@ -#!/usr/bin/env python -__doc__=""" -N-Triples Parser -License: GPL 2, W3C, BSD, or MIT -Author: Sean B. Palmer, inamidst.com -""" - -import re -from rdflib.term import URIRef as URI -from rdflib.term import BNode as bNode -from rdflib.term import Literal - -from rdflib.py3compat import b, cast_bytes - -__all__ = ['unquote', 'uriquote', 'Sink', 'NTriplesParser'] - -uriref = b(r'<([^:]+:[^\s"<>]+)>') -literal = b(r'"([^"\\]*(?:\\.[^"\\]*)*)"') -litinfo = b(r'(?:@([a-z]+(?:-[a-z0-9]+)*)|\^\^') + uriref + b(r')?') - -r_line = re.compile(b(r'([^\r\n]*)(?:\r\n|\r|\n)')) -r_wspace = re.compile(b(r'[ \t]*')) -r_wspaces = re.compile(b(r'[ \t]+')) -r_tail = re.compile(b(r'[ \t]*\.[ \t]*')) -r_uriref = re.compile(uriref) -r_nodeid = re.compile(b(r'_:([A-Za-z][A-Za-z0-9]*)')) -r_literal = re.compile(literal + litinfo) - -bufsiz = 2048 -validate = False - -class Node(unicode): pass - -class ParseError(Exception): pass - -class Sink(object): - def __init__(self): - self.length = 0 - - def triple(self, s, p, o): - self.length += 1 - print (s, p, o) - -quot = {b('t'): u'\t', b('n'): u'\n', b('r'): u'\r', b('"'): u'"', b('\\'): u'\\'} -r_safe = re.compile(b(r'([\x20\x21\x23-\x5B\x5D-\x7E]+)')) -r_quot = re.compile(b(r'\\(t|n|r|"|\\)')) -r_uniquot = re.compile(b(r'\\u([0-9A-F]{4})|\\U([0-9A-F]{8})')) - -def unquote(s): - """Unquote an N-Triples string.""" - if not validate: - return s.decode('unicode-escape') - else: - result = [] - while s: - m = r_safe.match(s) - if m: - s = s[m.end():] - result.append(m.group(1).decode('ascii')) - continue - - m = r_quot.match(s) - if m: - s = s[2:] - result.append(quot[m.group(1)]) - continue - - m = r_uniquot.match(s) - if m: - s = s[m.end():] - u, U = m.groups() - codepoint = int(u or U, 16) - if codepoint > 0x10FFFF: - raise ParseError("Disallowed codepoint: %08X" % codepoint) - result.append(unichr(codepoint)) - elif s.startswith(b('\\')): - raise ParseError("Illegal escape at: %s..." % s[:10]) - else: raise ParseError("Illegal literal character: %r" % s[0]) - return u''.join(result) - -r_hibyte = re.compile(ur'([\x80-\xFF])') - -def uriquote(uri): - if not validate: - return uri - else: - return r_hibyte.sub( - lambda m: '%%%02X' % ord(m.group(1)), uri) - -class NTriplesParser(object): - """An N-Triples Parser. - - Usage:: - - p = NTriplesParser(sink=MySink()) - sink = p.parse(f) # file; use parsestring for a string - """ - - def __init__(self, sink=None): - if sink is not None: - self.sink = sink - else: self.sink = Sink() - - def parse(self, f): - """Parse f as an N-Triples file.""" - if not hasattr(f, 'read'): - raise ParseError("Item to parse must be a file-like object.") - - self.file = f - self.buffer = '' - while True: - self.line = self.readline() - if self.line is None: break - try: self.parseline() - except ParseError: - raise ParseError("Invalid line: %r" % self.line) - return self.sink - - def parsestring(self, s): - """Parse s as an N-Triples string.""" - if not isinstance(s, basestring): - raise ParseError("Item to parse must be a string instance.") - try: - from io import BytesIO - except ImportError: - from cStringIO import StringIO as BytesIO - f = BytesIO() - f.write(cast_bytes(s)) - f.seek(0) - self.parse(f) - - def readline(self): - """Read an N-Triples line from buffered input.""" - # N-Triples lines end in either CRLF, CR, or LF - # Therefore, we can't just use f.readline() - if not self.buffer: - buffer = self.file.read(bufsiz) - if not buffer: return None - self.buffer = buffer - - while True: - m = r_line.match(self.buffer) - if m: # the more likely prospect - self.buffer = self.buffer[m.end():] - return m.group(1) - else: - buffer = self.file.read(bufsiz) - if not buffer and not self.buffer.isspace(): - raise ParseError("EOF in line") - elif not buffer: - return None - self.buffer += buffer - - def parseline(self): - self.eat(r_wspace) - if (not self.line) or self.line.startswith(b('#')): - return # The line is empty or a comment - - subject = self.subject() - self.eat(r_wspaces) - - predicate = self.predicate() - self.eat(r_wspaces) - - object = self.object() - self.eat(r_tail) - - if self.line: - raise ParseError("Trailing garbage") - self.sink.triple(subject, predicate, object) - - def peek(self, token): - return self.line.startswith(token) - - def eat(self, pattern): - m = pattern.match(self.line) - if not m: # @@ Why can't we get the original pattern? - print(dir(pattern)) - print repr(self.line), type(self.line) - raise ParseError("Failed to eat %s" % pattern) - self.line = self.line[m.end():] - return m - - def subject(self): - # @@ Consider using dictionary cases - subj = self.uriref() or self.nodeid() - if not subj: - raise ParseError("Subject must be uriref or nodeID") - return subj - - def predicate(self): - pred = self.uriref() - if not pred: - raise ParseError("Predicate must be uriref") - return pred - - def object(self): - objt = self.uriref() or self.nodeid() or self.literal() - if objt is False: - raise ParseError("Unrecognised object type") - return objt - - def uriref(self): - if self.peek(b('<')): - uri = self.eat(r_uriref).group(1) - uri = unquote(uri) - uri = uriquote(uri) - return URI(uri) - return False - - def nodeid(self): - if self.peek(b('_')): - return bNode(self.eat(r_nodeid).group(1).decode()) - return False - - def literal(self): - if self.peek(b('"')): - lit, lang, dtype = self.eat(r_literal).groups() - if lang: - lang = lang.decode() - else: - lang = None - if dtype: - dtype = dtype.decode() - else: - dtype = None - if lang and dtype: - raise ParseError("Can't have both a language and a datatype") - lit = unquote(lit) - return Literal(lit, lang, dtype) - return False - -# # Obsolete, unused -# def parseURI(uri): -# import urllib -# parser = NTriplesParser() -# u = urllib.urlopen(uri) -# sink = parser.parse(u) -# u.close() -# # for triple in sink: -# # print triple -# print 'Length of input:', sink.length - diff --git a/creactistore/_templates/lib/rdflib/plugins/parsers/rdfa/__init__.py b/creactistore/_templates/lib/rdflib/plugins/parsers/rdfa/__init__.py deleted file mode 100644 index 9553349..0000000 --- a/creactistore/_templates/lib/rdflib/plugins/parsers/rdfa/__init__.py +++ /dev/null @@ -1,168 +0,0 @@ -""" -From a Python file, expecting an RDF/XML pretty printed output:: - - import rdflib.graph as g - graph = g.Graph() - graph.parse('filename.html', format='rdfa') - print graph.serialize(format='pretty-xml') - -For details on RDFa, the reader should consult the `RDFa syntax document`__. - -This is an adapted version of pyRdfa (`W3C RDFa Distiller page`__) by Ivan Herman - -.. __: http://www.w3.org/TR/rdfa-syntax -.. __: http://www.w3.org/2007/08/pyRdfa/ - -""" - - -import sys -import urllib -import xml.dom.minidom - -from rdflib.term import URIRef -from rdflib.parser import Parser -from rdflib.plugins.parsers.rdfa.state import ExecutionContext -from rdflib.plugins.parsers.rdfa.parse import parse_one_node -from rdflib.plugins.parsers.rdfa.options import (Options, _add_to_comment_graph, - DIST_NS, ERROR, GENERIC_XML, XHTML_RDFA, HTML5_RDFA) - -from rdflib.plugins.parsers.rdfa.transform.headabout import head_about_transform - -__all__ = ['RDFaParser'] - -# These are part of the RDFa spec. -BUILT_IN_TRANSFORMERS = [ - head_about_transform -] - -# Exception handling. Essentially, all the different exceptions are re-packaged -# into separate exception class, to allow for an easier management on the user -# level -class RDFaError(Exception) : - """Just a wrapper around the local exceptions. It does not add any new - functionality to the Exception class.""" - pass - -# For some doctype and element name combinations an automatic switch to an -# input mode is done -_HOST_LANG = { - ("http://www.w3.org/1999/xhtml", "html"): XHTML_RDFA, - ("http://www.w3.org/2000/svg", "svg"): GENERIC_XML -} - - -class RDFaParser(Parser): - - def parse(self, source, sink, - warnings=False, space_preserve=True, - transformers=None, xhtml=True, lax=True, html5=False, encoding=None): - if transformers is None: - transformers = [] - options = Options(warnings, space_preserve, transformers, xhtml, lax) - baseURI = source.getPublicId() - stream = source.getByteStream() - if html5: - dom = _process_html5_source(stream, options, encoding) - else: - dom = _try_process_source(stream, options, encoding) - _process_DOM(dom, baseURI, sink, options) - - -def _process_DOM(dom, base, graph, options=None): - """ - Core processing. The transformers ("pre-processing") is done on the DOM - tree, the state is initialized, and the "real" RDFa parsing is done. - The result is put into the provided Graph. - - The real work is done in the parser function ``parse_one_node()``. - - Params: - dom -- XML DOM Tree node (for the top level) - base -- URI for the default "base" value (usually the URI of the file to be processed) - - Options: - obj -- `Options` for the distiller - raise RDFaError -- when called via CGI, this encapsulates the possible - exceptions raised by the RDFLib serializer or the processing itself - """ - html = dom.documentElement - # Perform the built-in and external transformations on the HTML tree. This is, - # in simulated form, the hGRDDL approach of Ben Adida. - for trans in options.transformers + BUILT_IN_TRANSFORMERS: - trans(html, options) - # Collect the initial state. This takes care of things - # like base, top level namespace settings, etc. - # Ensure the proper initialization. - state = ExecutionContext(html, graph, base=base, options=options) - # The top level subject starts with the current document; this - # is used by the recursion - subject = URIRef(state.base) - # Parse the whole thing recursively and fill the graph. - parse_one_node(html, graph, subject, state, []) - if options.comment_graph.graph != None: - # Add the content of the comment graph to the output. - graph.bind("dist", DIST_NS) - for t in options.comment_graph.graph: - graph.add(t) - -def _try_process_source(stream, options, encoding): - """ - Tries to parse input as xhtml, xml (e.g. svg) or html(5), modifying options - while figuring out input.. - - Returns a DOM tree. - """ - parse = xml.dom.minidom.parse - try: - dom = parse(stream) - # Try to second-guess the input type - # This is _not_ really kosher, but the minidom is not really namespace aware... - # In practice the goal is to have the system recognize svg content automatically - # First see if there is a default namespace defined for the document: - top = dom.documentElement - if top.hasAttribute("xmlns"): - key = (top.getAttribute("xmlns"), top.nodeName) - if key in _HOST_LANG: - options.host_language = _HOST_LANG[key] - return dom - except: - # XML Parsing error in the input - type, value, traceback = sys.exc_info() - if options.host_language == GENERIC_XML or options.lax == False: - raise RDFaError('Parsing error in input file: "%s"' % value) - - # XML Parsing error in the input - msg = "XHTML Parsing error in input file: %s. Falling back on the HTML5 parser" % value - if options != None and options.warnings: - options.comment_graph.add_warning(msg) - - # in Ivan's original code he reopened the stream if it was from urllib - if isinstance(stream, urllib.addinfourl): - stream = urllib.urlopen(stream.url) - - return _process_html5_source(stream, options, encoding) - - -def _process_html5_source(stream, options, encoding): - # Now try to see if and HTML5 parser is an alternative... - try: - from html5lib import HTMLParser, treebuilders - except ImportError: - # no alternative to the XHTML error, because HTML5 parser not available... - msg2 = 'XHTML Parsing error in input file: %s. Though parsing is lax, HTML5 parser not available. Try installing html5lib <http://code.google.com/p/html5lib>' - raise RDFaError(msg2) - - parser = HTMLParser(tree=treebuilders.getTreeBuilder("dom")) - parse = parser.parse - try: - dom = parse(stream, encoding) - # The host language has changed - options.host_language = HTML5_RDFA - except: - # Well, even the HTML5 parser could not do anything with this... - (type, value, traceback) = sys.exc_info() - msg2 = 'Parsing error in input file as HTML5: "%s"' % value - raise RDFaError, msg2 - - return dom diff --git a/creactistore/_templates/lib/rdflib/plugins/parsers/rdfa/embeddedrdf.py b/creactistore/_templates/lib/rdflib/plugins/parsers/rdfa/embeddedrdf.py deleted file mode 100644 index 4a9b015..0000000 --- a/creactistore/_templates/lib/rdflib/plugins/parsers/rdfa/embeddedrdf.py +++ /dev/null @@ -1,36 +0,0 @@ -# -*- coding: utf-8 -*- -""" -Extracting possible embedded RDF/XML content from the file and parse it separately into the Graph. This is used, for example -by U{SVG 1.2 Tiny<http://www.w3.org/TR/SVGMobile12/>}. - -@author: U{Ivan Herman<a href="http://www.w3.org/People/Ivan/">} -@license: This software is available for use under the -U{W3C® SOFTWARE NOTICE AND LICENSE<href="http://www.w3.org/Consortium/Legal/2002/copyright-software-20021231">} -@contact: Ivan Herman, ivan@w3.org -""" - -from StringIO import StringIO - -__all__ = ['handle_embeddedRDF'] - -def handle_embeddedRDF(node, graph, state): - """ - Check if the node is the top level rdf element for RDF/XML. If so, the content is parsed and added to the target graph. Note that if an separate - base is defined in the state, the C{xml:base} attribute will be added to the C{rdf} node before parsing. - @param node: a DOM node for the top level xml element - @param graph: target rdf graph - @type graph: RDFLib's Graph object instance - @param state: the inherited state (namespaces, lang, etc) - @type state: L{State.ExecutionContext} - @return: whether an RDF/XML content has been detected or not. If TRUE, the RDFa processing should not occur on the node and its descendents. - @rtype: Boolean - - """ - if node.localName == "RDF" and node.namespaceURI == "http://www.w3.org/1999/02/22-rdf-syntax-ns#": - node.setAttribute("xml:base",state.base) - rdf = StringIO(node.toxml()) - graph.parse(rdf) - return True - else: - return False - diff --git a/creactistore/_templates/lib/rdflib/plugins/parsers/rdfa/literal.py b/creactistore/_templates/lib/rdflib/plugins/parsers/rdfa/literal.py deleted file mode 100644 index 2ab9b44..0000000 --- a/creactistore/_templates/lib/rdflib/plugins/parsers/rdfa/literal.py +++ /dev/null @@ -1,180 +0,0 @@ -# -*- coding: utf-8 -*- -""" -Implementation of the Literal handling. Details of the algorithm are described on -U{RDFa Task Force's wiki page<http://www.w3.org/2006/07/SWD/wiki/RDFa/LiteralObject>}. - -@summary: RDFa Literal generation -@requires: U{RDFLib package<http://rdflib.net>} -@organization: U{World Wide Web Consortium<http://www.w3.org>} -@author: U{Ivan Herman<a href="http://www.w3.org/People/Ivan/">} -@license: This software is available for use under the -U{W3C® SOFTWARE NOTICE AND LICENSE<href="http://www.w3.org/Consortium/Legal/2002/copyright-software-20021231">} -""" - -import re -from rdflib.namespace import RDF -from rdflib.term import Literal - -__all__ = ['generate_literal'] - -XMLLiteral = RDF.XMLLiteral - - -def __putBackEntities(str): - """Put 'back' entities for the '&', '<', and '>' characters, to produce kosher XML string. - Used by XML Literal - @param str: string to be converted - @return: string with entities - @rtype: string - """ - return str.replace('&', '&').replace('<', '<').replace('>', '>') - -#### The real meat... -def generate_literal(node, graph, subject, state): - """Generate the literal the C{@property}, taking into account datatype, etc. - Note: this method is called only if the C{@property} is indeed present, no need to check. - - This method is an encoding of the algorithm documented - U{task force's wiki page<http://www.w3.org/2006/07/SWD/wiki/RDFa/LiteralObject>}. - - The method returns a value whether the literal is a 'normal' literal (regardless of its datatype) - or an XML Literal. The return value is True or False, respectively. This value is used to control whether - the parser should stop recursion. This also means that that if the literal is generated from @content, - the return value is False, regardless of the possible @datatype value. - - @param node: DOM element node - @param graph: the (RDF) graph to add the properies to - @param subject: the RDFLib URIRef serving as a subject for the generated triples - @param state: the current state to be used for the CURIE-s - @type state: L{State.ExecutionContext} - @return: whether the literal is a 'normal' or an XML Literal (return value is True or False, respectively). Note that if the literal is generated from @content, the return value is False, regardless of the possible @datatype value. - @rtype: Boolean - """ - def _get_literal(Pnode): - """ - Get (recursively) the full text from a DOM Node. - - @param Pnode: DOM Node - @return: string - """ - rc = "" - for node in Pnode.childNodes: - if node.nodeType == node.TEXT_NODE: - rc = rc + node.data - elif node.nodeType == node.ELEMENT_NODE: - rc = rc + _get_literal(node) - - # The decision of the group in February 2008 is not to normalize the result by default. - # This is reflected in the default value of the option - if state.options.space_preserve: - return rc - else: - return re.sub(r'(\r| |\n|\t)+', " ", rc).strip() - # end getLiteral - - def _get_XML_literal(Pnode): - """ - Get (recursively) the XML Literal content of a DOM Node. (Most of the processing is done - via a C{node.toxml} call of the xml minidom implementation.) - - @param Pnode: DOM Node - @return: string - """ - def collectPrefixes(prefixes, node): - def addPf(prefx, string): - pf = string.split(':')[0] - if pf != string and pf not in prefx : prefx.append(pf) - # edn addPf - - # first the local name of the node - addPf(prefixes, node.tagName) - # get all the attributes and children - for child in node.childNodes: - if child.nodeType == node.ELEMENT_NODE: - collectPrefixes(prefixes, child) - elif child.nodeType == node.ATTRIBUTE_NODE: - addPf(prefixes, node.child.name) - # end collectPrefixes - - rc = "" - prefixes = [] - for node in Pnode.childNodes: - if node.nodeType == node.ELEMENT_NODE: - collectPrefixes(prefixes, node) - - for node in Pnode.childNodes: - if node.nodeType == node.TEXT_NODE: - rc = rc + __putBackEntities(node.data) - elif node.nodeType == node.ELEMENT_NODE: - # Decorate the element with namespaces and lang values - for prefix in prefixes: - if prefix in state.ns and not node.hasAttribute("xmlns:%s" % prefix): - node.setAttribute("xmlns:%s" % prefix, "%s" % state.ns[prefix]) - # Set the default namespace, if not done (and is available) - if not node.getAttribute("xmlns") and state.defaultNS != None: - node.setAttribute("xmlns", state.defaultNS) - # Get the lang, if necessary - if not node.getAttribute("xml:lang") and state.lang != None: - node.setAttribute("xml:lang", state.lang) - rc = rc + node.toxml() - return rc - # If XML Literals must be canonicalized for space, then this is the return line: - #return re.sub(r'(\r| |\n|\t)+', " ", rc).strip() - # end getXMLLiteral - - # Most of the times the literal is a 'normal' one, ie, not an XML Literal - retval = True - - # Get the Property URI-s - props = state.get_resources(node.getAttribute("property"), prop=True) - - # Get, if exists, the value of @datatype, and figure out the language - datatype = None - dtset = False - lang = state.lang - if node.hasAttribute("datatype"): - dtset = True - dt = node.getAttribute("datatype") - if dt != "": - datatype = state.get_resource(dt) - lang = None - - # The simple case: separate @content attribute - if node.hasAttribute("content"): - val = node.getAttribute("content") - object = Literal(node.getAttribute("content"), datatype=datatype, lang=lang) - # The value of datatype has been set, and the keyword paramaters take care of the rest - else: - # see if there *is* a datatype (even if it is empty!) - if dtset: - # yep. The Literal content is the pure text part of the current element: - # We have to check whether the specified datatype is, in fact, and - # explicit XML Literal - if datatype == XMLLiteral: - object = Literal(_get_XML_literal(node), datatype=XMLLiteral) - retval = False - else: - object = Literal(_get_literal(node), datatype=datatype, lang=lang) - else: - # no controlling @datatype. We have to see if there is markup in the contained - # element - if True in [ n.nodeType == node.ELEMENT_NODE for n in node.childNodes ]: - # yep, and XML Literal should be generated - object = Literal(_get_XML_literal(node), datatype=XMLLiteral) - retval = False - else: - val = _get_literal(node) - # At this point, there might be entities in the string that are returned as real characters by the dom - # implementation. That should be turned back - object = Literal(_get_literal(node), lang=lang) - - # NOTE: rdflib<2.5 didn't equal Literal with lang="", hence this check - # proably always passed? - # All tests pass with this check removed; going with that.. - ## The object may be empty, for example in an ill-defined <meta> element... - if True:#object != "": - for prop in props: - graph.add((subject, prop, object)) - - return retval - diff --git a/creactistore/_templates/lib/rdflib/plugins/parsers/rdfa/options.py b/creactistore/_templates/lib/rdflib/plugins/parsers/rdfa/options.py deleted file mode 100644 index 0329969..0000000 --- a/creactistore/_templates/lib/rdflib/plugins/parsers/rdfa/options.py +++ /dev/null @@ -1,173 +0,0 @@ -# -*- coding: utf-8 -*- -""" - -Options class: collect the possible options that govern the parsing possibilities. It also includes a reference and -handling of the extra Graph for warnings, informations, errors. - - -@summary: RDFa parser (distiller) -@requires: U{RDFLib<http://rdflib.net>} -@requires: U{html5lib<http://code.google.com/p/html5lib/>} for the HTML5 parsing; note possible dependecies on Python's version on the project's web site -@organization: U{World Wide Web Consortium<http://www.w3.org>} -@author: U{Ivan Herman<a href="http://www.w3.org/People/Ivan/">} -@license: This software is available for use under the -U{W3C® SOFTWARE NOTICE AND LICENSE<href="http://www.w3.org/Consortium/Legal/2002/copyright-software-20021231">} - -""" - -import sys -from rdflib.graph import Graph -from rdflib.term import BNode, Literal, URIRef -from rdflib.namespace import Namespace - -__all__ = ['CommentGraph', 'Options'] - -DIST_URI = "http://www.w3.org/2007/08/pyRdfa/distiller" -DIST_NS = DIST_URI + '#' - -ns_errors = Namespace(DIST_NS) -distillerURI = URIRef(DIST_URI) - -WARNING = 'warning' -ERROR = 'error' -INFO = 'info' -DEBUG = 'debug' - -_message_properties = { - WARNING: ns_errors["warning"], - ERROR: ns_errors["error"], - INFO: ns_errors["information"], - DEBUG: ns_errors["debug"] -} - -def _add_to_comment_graph(graph, msg, prop, uri): - """ - Add a distiller message to the graph. - - @param graph: RDFLib Graph - @param msg: message of an exception - @type msg: RDFLIb Literal - @param prop: the property to be used - @type prop: string, must be one of 'warning', 'error', 'info', 'debug' - @param uri: the top URI used to invoke the distiller - @type uri: URIRef - """ - bnode = BNode() - graph.add((distillerURI, _message_properties[prop], bnode)) - graph.add((bnode, ns_errors["onURI"], uri)) - graph.add((bnode, ns_errors["message"], msg)) - - -class CommentGraph(object): - """Class to handle the 'comment graph', ie, the (RDF) Graph containing the warnings, - error messages, and informational messages. - """ - def __init__(self, warnings = False): - """ - @param warnings: whether a graph should effectively be set up, or whether this - should just be an empty shell for the various calls to work (without effect) - """ - if warnings: - self.graph = Graph() - else: - self.graph = None - self.accumulated_literals = [] - self.baseURI = None - - def _add_triple(self, msg, prop): - obj = Literal(msg) - if self.baseURI == None: - self.accumulated_literals.append((obj,prop)) - elif self.graph != None: - _add_to_comment_graph(self.graph, obj, prop, self.baseURI) - - def set_base_URI(self, URI): - """Set the base URI for the comment triples. - - Note that this method I{must} be called at some point to complete the triples. Without it the triples - added via L{add_warning<CommentGraph.add_warning>}, L{add_info<CommentGraph.add_info>}, etc, will not be added to the final graph. - - @param URI: URIRef for the subject of the comments - """ - self.baseURI = URI - if self.graph != None: - for obj, prop in self.accumulated_literals: - _add_to_comment_graph(self.graph, obj, prop, self.baseURI) - self.accumulated_literals = [] - - def add_warning(self, txt): - """Add a warning. A comment triplet is added to the separate "warning" graph. - @param txt: the warning text. It will be preceded by the string "==== pyRdfa Warning ==== " - """ - self._add_triple(txt, WARNING) - - def add_info(self, txt): - """Add an informational comment. A comment triplet is added to the separate "warning" graph. - @param txt: the information text. It will be preceded by the string "==== pyRdfa information ==== " - """ - self._add_triple(txt, INFO) - - def add_error(self, txt): - """Add an error comment. A comment triplet is added to the separate "warning" graph. - @param txt: the information text. It will be preceded by the string "==== pyRdfa information ==== " - """ - self._add_triple(txt, ERROR) - - def _add_debug(self, txt): - self._add_triple(txt, DEBUG) - - -GENERIC_XML = 0 -XHTML_RDFA = 1 -HTML5_RDFA = 2 - -class Options(object): - """Settable options. An instance of this class is stored in - the L{execution context<ExecutionContext>} of the parser. - - @ivar space_preserve: whether plain literals should preserve spaces at output or not - @type space_preserve: Boolean - @ivar comment_graph: Graph for the storage of warnings - @type comment_graph: L{CommentGraph} - @ivar warnings: whether warnings should be generated or not - @type warnings: Boolean - @ivar transformers: extra transformers - @type transformers: list - @type host_language: the host language for the RDFa attributes. Default is XHTML_RDFA, but it can be GENERIC_XML and HTML5_RDFA - @ivar host_language: integer (logically: an enumeration) - @ivar lax: whether a 'lax' parsing of XHTML (ie, HTML5) is allowed. This means that the value of the host language might change run time - @type lax: Boolean - """ - def __init__(self, warnings=False, space_preserve=True, transformers=[], xhtml=True, lax=False): - """ - @param space_preserve: whether plain literals should preserve spaces at output or not - @type space_preserve: Boolean - @param warnings: whether warnings should be generated or not - @type warnings: Boolean - @param transformers: extra transformers - @type transformers: list - @param xhtml: initial value for the host language. If True, the value is set to XHTML_RDFA. Note that run-time the class variable might be set ot HTML5_RDFA, depending on the value of the lax flag and the result of parsing. - @type xhtml: Booelan - @param lax: whether a 'lax' parsing of XHTML (ie, HTML5) is allowed. This means that the value of the host language might change run time - @type lax: Boolean - """ - self.space_preserve = space_preserve - self.transformers = transformers - self.comment_graph = CommentGraph(warnings) - self.warnings = warnings - self.lax = lax - if xhtml: - self.host_language = XHTML_RDFA - else: - self.host_language = GENERIC_XML - - def __str__(self): - retval = """Current options: - space_preserve : %s - warnings : %s - lax parsing : %s - host language : %s - """ - return retval % (self.space_preserve, self.warnings, self.lax, self.host_language) - - diff --git a/creactistore/_templates/lib/rdflib/plugins/parsers/rdfa/parse.py b/creactistore/_templates/lib/rdflib/plugins/parsers/rdfa/parse.py deleted file mode 100644 index d5b411f..0000000 --- a/creactistore/_templates/lib/rdflib/plugins/parsers/rdfa/parse.py +++ /dev/null @@ -1,200 +0,0 @@ -# -*- coding: utf-8 -*- -""" -The core parsing function of RDFa. Some details are -put into other modules to make it clearer to update/modify (eg, generation of literals, or managing the current state). - -@summary: RDFa core parser processing step -@requires: U{RDFLib package<http://rdflib.net>} -@organization: U{World Wide Web Consortium<http://www.w3.org>} -@author: U{Ivan Herman<a href="http://www.w3.org/People/Ivan/">} -@license: This software is available for use under the -U{W3C® SOFTWARE NOTICE AND LICENSE<href="http://www.w3.org/Consortium/Legal/2002/copyright-software-20021231">} -""" - -from rdflib.term import BNode, URIRef -from rdflib.namespace import RDF - -from rdflib.plugins.parsers.rdfa.state import ExecutionContext -from rdflib.plugins.parsers.rdfa.literal import generate_literal -from rdflib.plugins.parsers.rdfa.embeddedrdf import handle_embeddedRDF -from rdflib.plugins.parsers.rdfa.options import GENERIC_XML, XHTML_RDFA, HTML5_RDFA - -__all__ = ['parse_one_node'] - -def parse_one_node(node, graph, parent_object, incoming_state, parent_incomplete_triples): - """The (recursive) step of handling a single node. See the - U{RDFa syntax document<http://www.w3.org/TR/rdfa-syntax>} for further details. - - @param node: the DOM node to handle - @param graph: the RDF graph - @type graph: RDFLib's Graph object instance - @param parent_object: the parent's object, as an RDFLib URIRef - @param incoming_state: the inherited state (namespaces, lang, etc) - @type incoming_state: L{State.ExecutionContext} - @param parent_incomplete_triples: list of hanging triples (the missing resource set to None) to be handled (or not) - by the current node. - @return: whether the caller has to complete it's parent's incomplete triples - @rtype: Boolean - """ - def _get_resources_for_attr(attr): - """Get a series of resources encoded via CURIE-s for an attribute on a specific node. - @param attr: the name of the attribute - @return: a list of RDFLib URIRef instances - """ - if not node.hasAttribute(attr): - return [] - else: - rel = (attr == "rel") or (attr == "rev") - prop = (attr == "property") - return state.get_resources(node.getAttribute(attr), rel, prop) - - # Update the state. This means, for example, the possible local settings of - # namespaces and lang - state = ExecutionContext(node, graph, inherited_state=incoming_state) - - #--------------------------------------------------------------------------------- - # Handle the special case for embedded RDF, eg, in SVG1.2. - # This may add some triples to the target graph that does not originate from RDFa parsing - # If the function return TRUE, that means that an rdf:RDF has been found. No - # RDFa parsing should be done on that subtree, so we simply return... - if state.options.host_language == GENERIC_XML and node.nodeType == node.ELEMENT_NODE and handle_embeddedRDF(node, graph, state): - return - - #--------------------------------------------------------------------------------- - # First, let us check whether there is anything to do at all. Ie, - # whether there is any relevant RDFa specific attribute on the element - # - if not _has_one_of_attributes(node, "href", "resource", "about", "property", "rel", "rev", "typeof", "src"): - # nop, there is nothing to do here, just go down the tree and return... - for n in node.childNodes: - if n.nodeType == node.ELEMENT_NODE : parse_one_node(n, graph, parent_object, state, parent_incomplete_triples) - return - - - #----------------------------------------------------------------- - # The goal is to establish the subject and object for local processing - # The behaviour is slightly different depending on the presense or not - # of the @rel/@rev attributes - current_subject = None - current_object = None - - if _has_one_of_attributes(node, "rel", "rev"): - # in this case there is the notion of 'left' and 'right' of @rel/@rev - # in establishing the new Subject and the objectResource - - # set first the subject - if node.hasAttribute("about"): - current_subject = state.get_Curie_ref(node.getAttribute("about")) - elif node.hasAttribute("src"): - current_subject = state.get_URI_ref(node.getAttribute("src")) - elif node.hasAttribute("typeof"): - current_subject = BNode() - - # get_URI_ref may return None in case of an illegal Curie, so - # we have to be careful here, not use only an 'else' - if current_subject == None: - current_subject = parent_object - - # set the object resource - if node.hasAttribute("resource"): - current_object = state.get_Curie_ref(node.getAttribute("resource")) - elif node.hasAttribute("href"): - current_object = state.get_URI_ref(node.getAttribute("href")) - else: - # in this case all the various 'resource' setting attributes - # behave identically, except that their value might be different - # in terms of CURIE-s and they also have their own priority, of course - if node.hasAttribute("about"): - current_subject = state.get_Curie_ref(node.getAttribute("about")) - elif node.hasAttribute("src"): - current_subject = state.get_URI_ref(node.getAttribute("src")) - elif node.hasAttribute("resource"): - current_subject = state.get_Curie_ref(node.getAttribute("resource")) - elif node.hasAttribute("href"): - current_subject = state.get_URI_ref(node.getAttribute("href")) - elif node.hasAttribute("typeof"): - current_subject = BNode() - - # get_URI_ref may return None in case of an illegal Curie, so - # we have to be careful here, not use only an 'else' - if current_subject == None: - current_subject = parent_object - - # in this case no non-literal triples will be generated, so the - # only role of the current_objectResource is to be transferred to - # the children node - current_object = current_subject - - # --------------------------------------------------------------------- - # The possible typeof indicates a number of type statements on the newSubject - for defined_type in _get_resources_for_attr("typeof"): - graph.add((current_subject, RDF.type, defined_type)) - - # --------------------------------------------------------------------- - # In case of @rel/@rev, either triples or incomplete triples are generated - # the (possible) incomplete triples are collected, to be forwarded to the children - incomplete_triples = [] - for prop in _get_resources_for_attr("rel"): - theTriple = (current_subject, prop, current_object) - if current_object != None: - graph.add(theTriple) - else: - incomplete_triples.append(theTriple) - for prop in _get_resources_for_attr("rev"): - theTriple = (current_object, prop, current_subject) - if current_object != None: - graph.add(theTriple) - else: - incomplete_triples.append(theTriple) - - # ---------------------------------------------------------------------- - # Generation of the literal values. The newSubject is the subject - # A particularity of property is that it stops the parsing down the DOM tree if an XML Literal is generated, - # because everything down there is part of the generated literal. For this purpose the recurse flag is set (and used later - # in the parsing process). - if node.hasAttribute("property"): - # Generate the literal. It has been put it into a separate module to make it more managable - # the overall return value should be set to true if any valid triple has been generated - recurse = generate_literal(node, graph, current_subject, state) - else: - recurse = True - - # ---------------------------------------------------------------------- - # Setting the current object to a bnode is setting up a possible resource - # for the incomplete triples downwards - if current_object == None: - object_to_children = BNode() - else: - object_to_children = current_object - - #----------------------------------------------------------------------- - # Here is the recursion step for all the children - if recurse: - for n in node.childNodes: - if n.nodeType == node.ELEMENT_NODE: - parse_one_node(n, graph, object_to_children, state, incomplete_triples) - - # --------------------------------------------------------------------- - # At this point, the parent's incomplete triples may be completed - for s, p, o in parent_incomplete_triples: - if s == None: s = current_subject - if o == None: o = current_subject - graph.add((s, p, o)) - - # ------------------------------------------------------------------- - # This should be it... - # ------------------------------------------------------------------- - return - - -def _has_one_of_attributes(node, *args): - """ - Check whether one of the listed attributes is present on a (DOM) node. - @param node: DOM element node - @param args: possible attribute names - @return: True or False - @rtype: Boolean - """ - return True in [ node.hasAttribute(attr) for attr in args ] - - diff --git a/creactistore/_templates/lib/rdflib/plugins/parsers/rdfa/state.py b/creactistore/_templates/lib/rdflib/plugins/parsers/rdfa/state.py deleted file mode 100644 index 31caf41..0000000 --- a/creactistore/_templates/lib/rdflib/plugins/parsers/rdfa/state.py +++ /dev/null @@ -1,434 +0,0 @@ -# -*- coding: utf-8 -*- -""" -Parser's execution context (a.k.a. state) object and handling. The state includes: - - - dictionary for namespaces. Keys are the namespace prefixes, values are RDFLib Namespace instances - - language, retrieved from C{@xml:lang} - - URI base, determined by <base> (or set explicitly). This is a little bit superfluous, because the current RDFa syntax does not make use of C{@xml:base}; ie, this could be a global value. But the structure is prepared to add C{@xml:base} easily, if needed. - - options, in the form of an L{Options<pyRdfa.Options>} instance - -The execution context object is also used to turn relative URI-s and CURIES into real URI references. - -@summary: RDFa core parser processing step -@requires: U{RDFLib package<http://rdflib.net>} -@organization: U{World Wide Web Consortium<http://www.w3.org>} -@author: U{Ivan Herman<a href="http://www.w3.org/People/Ivan/">} -@license: This software is available for use under the -U{W3C® SOFTWARE NOTICE AND LICENSE<href="http://www.w3.org/Consortium/Legal/2002/copyright-software-20021231">} - -@var XHTML_PREFIX: prefix for the XHTML vocabulary namespace -@var XHTML_URI: URI prefix of the XHTML vocabulary -@var RDFa_PROFILE: the official RDFa profile URI -@var RDFa_VERSION: the official version string of RDFa -@var usual_protocols: list of "usual" protocols (used to generate warnings when CURIES are not protected) -@var _predefined_rel: list of predefined C{@rev} and C{@rel} values that should be mapped onto the XHTML vocabulary URI-s. -@var _predefined_property: list of predefined C{@property} values that should be mapped onto the XHTML vocabulary URI-s. (At present, this list is empty, but this has been an ongoing question in the group, so the I{mechanism} of checking is still there.) -@var __bnodes: dictionary of blank node names to real blank node -@var __empty_bnode: I{The} Bnode to be associated with the CURIE of the form "C{_:}". -""" - -from rdflib.namespace import Namespace, RDF, RDFS -from rdflib.term import BNode, URIRef -from rdflib.plugins.parsers.rdfa.options import Options, GENERIC_XML, XHTML_RDFA, HTML5_RDFA - -import re -import random -import urlparse - -__all__ = ['ExecutionContext'] - -RDFa_PROFILE = "http://www.w3.org/1999/xhtml/vocab" -RDFa_VERSION = "XHTML+RDFa 1.0" -RDFa_PublicID = "-//W3C//DTD XHTML+RDFa 1.0//EN" -RDFa_SystemID = "http://www.w3.org/MarkUp/DTD/xhtml-rdfa-1.dtd" - -usual_protocols = ["http", "https", "mailto", "ftp", "urn", "gopher", "tel", "ldap", "doi", "news"] - -####Predefined @rel/@rev/@property values -# predefined values for the @rel and @rev values. These are considered to be part of a specific -# namespace, defined by the RDFa document. -# At the moment, there are no predefined @property values, but the code is there in case -# some will be defined -XHTML_PREFIX = "xhv" -XHTML_URI = "http://www.w3.org/1999/xhtml/vocab#" - -_predefined_rel = ['alternate', 'appendix', 'cite', 'bookmark', 'chapter', 'contents', -'copyright', 'glossary', 'help', 'icon', 'index', 'meta', 'next', 'p3pv1', 'prev', -'role', 'section', 'subsection', 'start', 'license', 'up', 'last', 'stylesheet', 'first', 'top'] - -_predefined_property = [] - -#### Managing blank nodes for CURIE-s -__bnodes = {} -__empty_bnode = BNode() -def _get_bnode_from_Curie(var): - """ - 'Var' gives the string after the coloumn in a CURIE of the form C{_:XXX}. If this variable has been used - before, then the corresponding BNode is returned; otherwise a new BNode is created and - associated to that value. - @param var: CURIE BNode identifier - @return: BNode - """ - if len(var) == 0: - return __empty_bnode - if var in __bnodes: - return __bnodes[var] - else: - retval = BNode() - __bnodes[var] = retval - return retval - -#### Quote URI-s -import urllib -# 'safe' characters for the URI quoting, ie, characters that can safely stay as they are. Other -# special characters are converted to their %.. equivalents for namespace prefixes -_unquotedChars = ':/\?=#' -_warnChars = [' ', '\n', '\r', '\t'] -def _quote(uri, options): - """ - 'quote' a URI, ie, exchange special characters for their '%..' equivalents. Some of the characters - may stay as they are (listed in L{_unquotedChars}. If one of the characters listed in L{_warnChars} - is also in the uri, an extra warning is also generated. - @param uri: URI - @param options: - @type options: L{Options<pyRdfa.Options>} - """ - suri = uri.strip() - for c in _warnChars: - if suri.find(c) != -1: - if options != None: - options.comment_graph.add_warning('Unusual character in uri:%s; possible error?' % suri) - break - return urllib.quote(suri, _unquotedChars) - - -#### Core Class definition -class ExecutionContext(object): - """State at a specific node, including the current set - of namespaces in the RDFLib sense, the - current language, and the base. The class is also used to interpret URI-s and CURIE-s to produce - URI references for RDFLib. - - @ivar options: reference to the overall options - @type ivar: L{Options.Options} - @ivar base: the 'base' URI - @ivar defaultNS: default namespace - @ivar lang: language tag (possibly None) - @ivar ns: dictionary of namespaces - @type ns: dictionary, each value is an RDFLib Namespace object - - """ - def __init__(self, node, graph, inherited_state=None, base="", options=None): - """ - @param node: the current DOM Node - @param graph: the RDFLib Graph - @keyword inherited_state: the state as inherited - from upper layers. This inherited_state is mixed with the state information - retrieved from the current node. - @type inherited_state: L{State.ExecutionContext} - @keyword base: string denoting the base URI for the specific node. This overrides the possible - base inherited from the upper layers. The - current XHTML+RDFa syntax does not allow the usage of C{@xml:base}, but SVG1.2 does, so this is - necessary for SVG (and other possible XML dialects that accept C{@xml:base}) - @keyword options: invocation option - @type options: L{Options<pyRdfa.Options>} - """ - #----------------------------------------------------------------- - # settling the base - # note that, strictly speaking, it is not necessary to add the base to the - # context, because there is only one place to set it (<base> element of the <header>). - # It is done because it is prepared for a possible future change in direction of - # accepting xml:base on each element. - # At the moment, it is invoked with a 'None' at the top level of parsing, that is - # when the <base> element is looked for. - if inherited_state: - self.base = inherited_state.base - self.options = inherited_state.options - # for generic XML versions the xml:base attribute should be handled - if self.options.host_language == GENERIC_XML and node.hasAttribute("xml:base"): - self.base = node.getAttribute("xml:base") - else: - # this is the branch called from the very top - self.base = "" - for bases in node.getElementsByTagName("base"): - if bases.hasAttribute("href"): - self.base = bases.getAttribute("href") - continue - if self.base == "": - self.base = base - - # this is just to play safe. I believe this branch should actually not happen... - if options == None: - from pyRdfa import Options - self.options = Options() - else: - self.options = options - - # xml:base is not part of XHTML+RDFa, but it is a valid setting for, say, SVG1.2 - if self.options.host_language == GENERIC_XML and node.hasAttribute("xml:base"): - self.base = node.getAttribute("xml:base") - - self.options.comment_graph.set_base_URI(URIRef(_quote(base, self.options))) - - # check the the presense of the @profile and or @version attribute for the RDFa profile... - # This whole branch is, however, irrelevant if the host language is a generic XML one (eg, SVG) - if self.options.host_language != GENERIC_XML: - doctype = None - try: - # I am not 100% sure the HTML5 minidom implementation has this, so let us just be - # cautious here... - doctype = node.ownerDocument.doctype - except: - pass - if doctype == None or not( doctype.publicId == RDFa_PublicID and doctype.systemId == RDFa_SystemID ): - # next level: check the version - html = node.ownerDocument.documentElement - if not( html.hasAttribute("version") and RDFa_VERSION == html.getAttribute("version") ): - # see if least the profile has been set - # Find the <head> element - head = None - for index in range(0, html.childNodes.length-1): - if html.childNodes.item(index).nodeName == "head": - head = html.childNodes.item(index) - break - if not( head != None and head.hasAttribute("profile") and RDFa_PROFILE in head.getAttribute("profile").strip().split() ): - if self.options.host_language == HTML5_RDFA: - self.options.comment_graph.add_info("RDFa profile or RFDa version has not been set (for a correct identification of RDFa). This is not a requirement for RDFa, but it is advised to use one of those nevertheless. Note that in the case of HTML5, the DOCTYPE setting may not work...") - else: - self.options.comment_graph.add_info("None of the RDFa DOCTYPE, RDFa profile, or RFDa version has been set (for a correct identification of RDFa). This is not a requirement for RDFa, but it is advised to use one of those nevertheless.") - - #----------------------------------------------------------------- - # Stripping the fragment ID from the base URI, as demanded by RFC 3986 - self.base = urlparse.urldefrag(self.base)[0] - - #----------------------------------------------------------------- - # Settling the language tags - # check first the lang or xml:lang attribute - # RDFa does not allow the lang attribute. HTML5 relies :-( on @lang; - # I just want to be prepared here... - if options != None and options.host_language == HTML5_RDFA and node.hasAttribute("lang"): - self.lang = node.getAttribute("lang") - if len(self.lang) == 0 : self.lang = None - elif node.hasAttribute("xml:lang"): - self.lang = node.getAttribute("xml:lang") - if len(self.lang) == 0 : self.lang = None - elif inherited_state: - self.lang = inherited_state.lang - else: - self.lang = None - - #----------------------------------------------------------------- - # Handling namespaces - # First get the local xmlns declarations/namespaces stuff. - dict = {} - for i in range(0, node.attributes.length): - attr = node.attributes.item(i) - if attr.name.find('xmlns:') == 0 : - # yep, there is a namespace setting - key = attr.localName - if key != "" : # exclude the top level xmlns setting... - if key == "_": - if warning: self.options.comment_graph.add_error("The '_' local CURIE prefix is reserved for blank nodes, and cannot be changed" ) - elif key.find(':') != -1: - if warning: self.options.comment_graph.add_error("The character ':' is not valid in a CURIE Prefix" ) - else : - # quote the URI, ie, convert special characters into %.. This is - # true, for example, for spaces - uri = _quote(attr.value, self.options) - # 1. create a new Namespace entry - ns = Namespace(uri) - # 2. 'bind' it in the current graph to - # get a nicer output - graph.bind(key, uri) - # 3. Add an entry to the dictionary - dict[key] = ns - - # See if anything has been collected at all. - # If not, the namespaces of the incoming state is - # taken over - self.ns = {} - if len(dict) == 0 and inherited_state: - self.ns = inherited_state.ns - else: - if inherited_state: - for k in inherited_state.ns : self.ns[k] = inherited_state.ns[k] - # copying the newly found namespace, possibly overwriting - # incoming values - for k in dict : self.ns[k] = dict[k] - else: - self.ns = dict - - # see if the xhtml core vocabulary has been set - self.xhtml_prefix = None - for key in self.ns.keys(): - if XHTML_URI == str(self.ns[key]): - self.xhtml_prefix = key - break - if self.xhtml_prefix == None: - if XHTML_PREFIX not in self.ns: - self.ns[XHTML_PREFIX] = Namespace(XHTML_URI) - self.xhtml_prefix = XHTML_PREFIX - else: - # the most disagreeable thing, the user has used - # the prefix for something else... - self.xhtml_prefix = XHTML_PREFIX + '_' + ("%d" % random.randint(1, 1000)) - self.ns[self.xhtml_prefix] = Namespace(XHTML_URI) - graph.bind(self.xhtml_prefix, XHTML_URI) - - # extra tricks for unusual usages... - # if the 'rdf' prefix is not used, it is artificially added... - if "rdf" not in self.ns: - self.ns["rdf"] = RDF - if "rdfs" not in self.ns: - self.ns["rdfs"] = RDFS - - # Final touch: setting the default namespace... - if node.hasAttribute("xmlns"): - self.defaultNS = node.getAttribute("xmlns") - elif inherited_state and inherited_state.defaultNS != None: - self.defaultNS = inherited_state.defaultNS - else: - self.defaultNS = None - - def _get_predefined_rels(self, val, warning): - """Get the predefined URI value for the C{@rel/@rev} attribute. - @param val: attribute name - @param warning: whether a warning should be generated or not - @type warning: boolean - @return: URIRef for the predefined URI (or None) - """ - vv = val.strip().lower() - if vv in _predefined_rel: - return self.ns[self.xhtml_prefix][vv] - else: - if warning: self.options.comment_graph.add_warning("invalid @rel/@rev value: '%s'" % val) - return None - - def _get_predefined_properties(self, val, warning): - """Get the predefined value for the C{@property} attribute. - @param val: attribute name - @param warning: whether a warning should be generated or not - @type warning: boolean - @return: URIRef for the predefined URI (or None) - """ - vv = val.strip().lower() - if vv in _predefined_property: - return self.ns[self.xhtml_prefix][vv] - else: - if warning: self.options.comment_graph.add_warning("invalid @property value: '%s'" % val) - return None - - def get_resource(self, val, rel=False, prop=False, warning=True): - """Get a resource for a CURIE. - The input argument is a CURIE; this is interpreted - via the current namespaces and the corresponding URI Reference is returned - @param val: string of the form "prefix:lname" - @keyword rel: whether the predefined C{@rel/@rev} values should also be interpreted - @keyword prop: whether the predefined C{@property} values should also be interpreted - @return: an RDFLib URIRef instance (or None) - """ - if val == "": - return None - elif val.find(":") != -1: - key = val.split(":", 1)[0] - lname = val.split(":", 1)[1] - if key == "_": - # A possible error: this method is invoked for property URI-s, which - # should not refer to a blank node. This case is checked and a possible - # error condition is handled - self.options.comment_graph.add_error("Blank node CURIE cannot be used in property position: _:%s" % lname) - return None - if key == "": - # This is the ":blabla" case - key = self.xhtml_prefix - else: - # if the resources correspond to a @rel or @rev or @property, then there - # may be one more possibility here, namely that it is one of the - # predefined values - if rel: - return self._get_predefined_rels(val, warning) - elif prop: - return self._get_predefined_properties(val, warning) - else: - self.options.comment_graph.add_warning("Invalid CURIE (without prefix): '%s'" % val) - return None - - if key not in self.ns: - self.options.comment_graph.add_error("CURIE used with non declared prefix: %s" % key) - return None - else: - if lname == "": - return URIRef(str(self.ns[key])) - else: - return self.ns[key][lname] - - def get_resources(self, val, rel=False, prop=False): - """Get a series of resources encoded in CURIE-s. - The input argument is a list of CURIE-s; these are interpreted - via the current namespaces and the corresponding URI References are returned. - @param val: strings of the form prefix':'lname, separated by space - @keyword rel: whether the predefined C{@rel/@rev} values should also be interpreted - @keyword prop: whether the predefined C{@property} values should also be interpreted - @return: a list of RDFLib URIRef instances (possibly empty) - """ - val.strip() - resources = [ self.get_resource(v, rel, prop) for v in val.split() if v != None ] - return [ r for r in resources if r != None ] - - def get_URI_ref(self, val): - """Create a URI RDFLib resource for a URI. - The input argument is a URI. It is checked whether it is a local - reference with a '#' or not. If yes, a URIRef combined with the - stored base value is returned. In both cases a URIRef for a full URI is created - and returned - @param val: URI string - @return: an RDFLib URIRef instance - """ - if val == "": - return URIRef(self.base) - elif val[0] == '[' and val[-1] == ']': - self.options.comment_graph.add_error("Illegal usage of CURIE: %s" % val) - return None - else: - return URIRef(urlparse.urljoin(self.base, val)) - - def get_Curie_ref(self, val): - """Create a URI RDFLib resource for a CURIE. - The input argument is a CURIE. This means that it is: - - either of the form [a:b] where a:b should be resolved as an - 'unprotected' CURIE, or - - it is a traditional URI (relative or absolute) - - If the second case the URI value is also compared to 'usual' URI - protocols ('http', 'https', 'ftp', etc) (see L{usual_protocols}). - If there is no match, a warning is generated (indeed, a frequent - mistake in authoring RDFa is to forget the '[' and ']' characters to - "protect" CURIE-s.) - - @param val: CURIE string - @return: an RDFLib URIRef instance - """ - if len(val) == 0: - return URIRef(self.base) - elif val[0] == "[": - if val[-1] == "]": - curie = val[1:-1] - # A possible Blank node reference should be separated here: - if len(curie) >= 2 and curie[0] == "_" and curie[1] == ":": - return _get_bnode_from_Curie(curie[2:]) - else: - return self.get_resource(val[1:-1]) - else: - # illegal CURIE... - self.options.comment_graph.add_error("Illegal CURIE: %s" % val) - return None - else: - # check the value, to see if an error may have been made... - # Usual protocol values in the URI - v = val.strip().lower() - protocol = urlparse.urlparse(val)[0] - if protocol != "" and protocol not in usual_protocols: - err = "Possible URI error with '%s'; the intention may have been to use a protected CURIE" % val - self.options.comment_graph.add_warning(err) - return self.get_URI_ref(val) - diff --git a/creactistore/_templates/lib/rdflib/plugins/parsers/rdfa/transform/__init__.py b/creactistore/_templates/lib/rdflib/plugins/parsers/rdfa/transform/__init__.py deleted file mode 100644 index e69de29..0000000 --- a/creactistore/_templates/lib/rdflib/plugins/parsers/rdfa/transform/__init__.py +++ /dev/null diff --git a/creactistore/_templates/lib/rdflib/plugins/parsers/rdfa/transform/headabout.py b/creactistore/_templates/lib/rdflib/plugins/parsers/rdfa/transform/headabout.py deleted file mode 100644 index 0cf8f7a..0000000 --- a/creactistore/_templates/lib/rdflib/plugins/parsers/rdfa/transform/headabout.py +++ /dev/null @@ -1,27 +0,0 @@ -# -*- coding: utf-8 -*- -""" -Simple transfomer: the C{@about=""} is added to the C{<head>} and C{<body>} elements (unless something is already there). -Note that this transformer is always invoked by the parser because this behaviour is mandated by the RDFa syntax. - -@summary: Add a top "about" to <head> and <body> -@requires: U{RDFLib package<http://rdflib.net>} -@organization: U{World Wide Web Consortium<http://www.w3.org>} -@author: U{Ivan Herman<a href="http://www.w3.org/People/Ivan/">} -@license: This software is available for use under the -U{W3C® SOFTWARE NOTICE AND LICENSE<href="http://www.w3.org/Consortium/Legal/2002/copyright-software-20021231">} -@contact: Ivan Herman, ivan@w3.org -""" - -def head_about_transform(html, options): - """ - @param html: a DOM node for the top level html element - @param options: invocation options - @type options: L{Options<pyRdfa.Options>} - """ - for top in html.getElementsByTagName("head"): - if not top.hasAttribute("about"): - top.setAttribute("about", "") - for top in html.getElementsByTagName("body"): - if not top.hasAttribute("about"): - top.setAttribute("about", "") - diff --git a/creactistore/_templates/lib/rdflib/plugins/parsers/rdfxml.py b/creactistore/_templates/lib/rdflib/plugins/parsers/rdfxml.py deleted file mode 100644 index 00e8d6a..0000000 --- a/creactistore/_templates/lib/rdflib/plugins/parsers/rdfxml.py +++ /dev/null @@ -1,579 +0,0 @@ -# Copyright (c) 2002, Daniel Krech, http://eikeon.com/ -# All rights reserved. -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions are -# met: -# -# * Redistributions of source code must retain the above copyright -# notice, this list of conditions and the following disclaimer. -# -# * Redistributions in binary form must reproduce the above -# copyright notice, this list of conditions and the following -# disclaimer in the documentation and/or other materials provided -# with the distribution. -# -# * Neither the name of Daniel Krech nor the names of its -# contributors may be used to endorse or promote products derived -# from this software without specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -""" -""" -from xml.sax import make_parser -from xml.sax.handler import ErrorHandler -from xml.sax.saxutils import handler, quoteattr, escape -from urlparse import urljoin, urldefrag - -from rdflib.namespace import RDF, is_ncname -from rdflib.term import URIRef -from rdflib.term import BNode -from rdflib.term import Literal -from rdflib.exceptions import ParserError, Error -from rdflib.parser import Parser - -__all__ = ['create_parser', 'BagID', 'ElementHandler', 'RDFXMLHandler', 'RDFXMLParser'] - -RDFNS = RDF - -# http://www.w3.org/TR/rdf-syntax-grammar/#eventterm-attribute-URI -# A mapping from unqualified terms to there qualified version. -UNQUALIFIED = {"about" : RDF.about, - "ID" : RDF.ID, - "type" : RDF.type, - "resource": RDF.resource, - "parseType": RDF.parseType} - -# http://www.w3.org/TR/rdf-syntax-grammar/#coreSyntaxTerms -CORE_SYNTAX_TERMS = [RDF.RDF, RDF.ID, RDF.about, RDF.parseType, RDF.resource, RDF.nodeID, RDF.datatype] - -# http://www.w3.org/TR/rdf-syntax-grammar/#syntaxTerms -SYNTAX_TERMS = CORE_SYNTAX_TERMS + [RDF.Description, RDF.li] - -# http://www.w3.org/TR/rdf-syntax-grammar/#oldTerms -OLD_TERMS = [ - URIRef("http://www.w3.org/1999/02/22-rdf-syntax-ns#aboutEach"), - URIRef("http://www.w3.org/1999/02/22-rdf-syntax-ns#aboutEachPrefix"), - URIRef("http://www.w3.org/1999/02/22-rdf-syntax-ns#bagID")] - -NODE_ELEMENT_EXCEPTIONS = CORE_SYNTAX_TERMS + [RDF.li,] + OLD_TERMS -NODE_ELEMENT_ATTRIBUTES = [RDF.ID, RDF.nodeID, RDF.about] - -PROPERTY_ELEMENT_EXCEPTIONS = CORE_SYNTAX_TERMS + [RDF.Description,] + OLD_TERMS -PROPERTY_ATTRIBUTE_EXCEPTIONS = CORE_SYNTAX_TERMS + [RDF.Description, RDF.li] + OLD_TERMS -PROPERTY_ELEMENT_ATTRIBUTES = [RDF.ID, RDF.resource, RDF.nodeID] - -XMLNS = "http://www.w3.org/XML/1998/namespace" -BASE = (XMLNS, "base") -LANG = (XMLNS, "lang") - - -class BagID(URIRef): - __slots__ = ['li'] - def __init__(self, val): - super(URIRef, self).__init__(val) - self.li = 0 - - def next_li(self): - self.li += 1 - return RDFNS[self.li] - - -class ElementHandler(object): - __slots__ = ['start', 'char', 'end', 'li', 'id', - 'base', 'subject', 'predicate', 'object', - 'list', 'language', 'datatype', 'declared', 'data'] - def __init__(self): - self.start = None - self.char = None - self.end = None - self.li = 0 - self.id = None - self.base = None - self.subject = None - self.object = None - self.list = None - self.language = None - self.datatype = None - self.declared = None - self.data = None - - def next_li(self): - self.li += 1 - return RDFNS[self.li] - - -class RDFXMLHandler(handler.ContentHandler): - - def __init__(self, store): - self.store = store - self.preserve_bnode_ids = False - self.reset() - - def reset(self): - document_element = ElementHandler() - document_element.start = self.document_element_start - document_element.end = lambda name, qname: None - self.stack = [None, document_element,] - self.ids = {} # remember IDs we have already seen - self.bnode = {} - self._ns_contexts = [{}] # contains uri -> prefix dicts - self._current_context = self._ns_contexts[-1] - - # ContentHandler methods - - def setDocumentLocator(self, locator): - self.locator = locator - - def startDocument(self): - pass - - def startPrefixMapping(self, prefix, namespace): - self._ns_contexts.append(self._current_context.copy()) - self._current_context[namespace] = prefix - self.store.bind(prefix, URIRef(namespace), override=False) - - def endPrefixMapping(self, prefix): - self._current_context = self._ns_contexts[-1] - del self._ns_contexts[-1] - - def startElementNS(self, name, qname, attrs): - stack = self.stack - stack.append(ElementHandler()) - current = self.current - parent = self.parent - base = attrs.get(BASE, None) - if base is not None: - base, frag = urldefrag(base) - if parent and parent.base: - base = urljoin(parent.base, base) - else: - systemId = self.locator.getPublicId() or self.locator.getSystemId() - if systemId: - base = urljoin(systemId, base) - else: - if parent: - base = parent.base - if base is None: - systemId = self.locator.getPublicId() or self.locator.getSystemId() - if systemId: - base, frag = urldefrag(systemId) - current.base = base - language = attrs.get(LANG, None) - if language is None: - if parent: - language = parent.language - current.language = language - current.start(name, qname, attrs) - - def endElementNS(self, name, qname): - self.current.end(name, qname) - self.stack.pop() - - def characters(self, content): - char = self.current.char - if char: - char(content) - - def ignorableWhitespace(self, content): - pass - - def processingInstruction(self, target, data): - pass - - def add_reified(self, sid, (s, p, o)): - self.store.add((sid, RDF.type, RDF.Statement)) - self.store.add((sid, RDF.subject, s)) - self.store.add((sid, RDF.predicate, p)) - self.store.add((sid, RDF.object, o)) - - def error(self, message): - locator = self.locator - info = "%s:%s:%s: " % (locator.getSystemId(), - locator.getLineNumber(), locator.getColumnNumber()) - raise ParserError(info + message) - - def get_current(self): - return self.stack[-2] - # Create a read only property called current so that self.current - # give the current element handler. - current = property(get_current) - - def get_next(self): - return self.stack[-1] - # Create a read only property that gives the element handler to be - # used for the next element. - next = property(get_next) - - def get_parent(self): - return self.stack[-3] - # Create a read only property that gives the current parent - # element handler - parent = property(get_parent) - - def absolutize(self, uri): - result = urljoin(self.current.base, uri, allow_fragments=1) - if uri and uri[-1]=="#" and result[-1]!="#": - result = "%s#" % result - return URIRef(result) - - def convert(self, name, qname, attrs): - if name[0] is None: - name = URIRef(name[1]) - else: - name = URIRef("".join(name)) - atts = {} - for (n, v) in attrs.items(): #attrs._attrs.iteritems(): # - if n[0] is None: - att = URIRef(n[1]) - else: - att = URIRef("".join(n)) - if att.startswith(XMLNS) or att[0:3].lower()=="xml": - pass - elif att in UNQUALIFIED: - #if not RDFNS[att] in atts: - atts[RDFNS[att]] = v - else: - atts[URIRef(att)] = v - return name, atts - - def document_element_start(self, name, qname, attrs): - if name[0] and URIRef("".join(name)) == RDF.RDF: - # Cheap hack so 2to3 doesn't turn it into __next__ - next = getattr(self, 'next') - next.start = self.node_element_start - next.end = self.node_element_end - else: - self.node_element_start(name, qname, attrs) - #self.current.end = self.node_element_end - # TODO... set end to something that sets start such that - # another element will cause error - - - def node_element_start(self, name, qname, attrs): - name, atts = self.convert(name, qname, attrs) - current = self.current - absolutize = self.absolutize - - # Cheap hack so 2to3 doesn't turn it into __next__ - next = getattr(self, 'next') - next.start = self.property_element_start - next.end = self.property_element_end - - if name in NODE_ELEMENT_EXCEPTIONS: - self.error("Invalid node element URI: %s" % name) - - if RDF.ID in atts: - if RDF.about in atts or RDF.nodeID in atts: - self.error("Can have at most one of rdf:ID, rdf:about, and rdf:nodeID") - - id = atts[RDF.ID] - if not is_ncname(id): - self.error("rdf:ID value is not a valid NCName: %s" % id) - subject = absolutize("#%s" % id) - if subject in self.ids: - self.error("two elements cannot use the same ID: '%s'" % subject) - self.ids[subject] = 1 # IDs can only appear once within a document - elif RDF.nodeID in atts: - if RDF.ID in atts or RDF.about in atts: - self.error("Can have at most one of rdf:ID, rdf:about, and rdf:nodeID") - nodeID = atts[RDF.nodeID] - if not is_ncname(nodeID): - self.error("rdf:nodeID value is not a valid NCName: %s" % nodeID) - if self.preserve_bnode_ids is False: - if nodeID in self.bnode: - subject = self.bnode[nodeID] - else: - subject = BNode() - self.bnode[nodeID] = subject - else: - subject = BNode(nodeID) - elif RDF.about in atts: - if RDF.ID in atts or RDF.nodeID in atts: - self.error("Can have at most one of rdf:ID, rdf:about, and rdf:nodeID") - subject = absolutize(atts[RDF.about]) - else: - subject = BNode() - - if name!=RDF.Description: # S1 - self.store.add((subject, RDF.type, absolutize(name))) - - language = current.language - for att in atts: - if not att.startswith(str(RDFNS)): - predicate = absolutize(att) - try: - object = Literal(atts[att], language) - except Error, e: - self.error(e.msg) - elif att==RDF.type: #S2 - predicate = RDF.type - object = absolutize(atts[RDF.type]) - elif att in NODE_ELEMENT_ATTRIBUTES: - continue - elif att in PROPERTY_ATTRIBUTE_EXCEPTIONS: #S3 - self.error("Invalid property attribute URI: %s" % att) - continue # for when error does not throw an exception - else: - predicate = absolutize(att) - try: - object = Literal(atts[att], language) - except Error, e: - self.error(e.msg) - self.store.add((subject, predicate, object)) - - current.subject = subject - - - def node_element_end(self, name, qname): - self.parent.object = self.current.subject - - def property_element_start(self, name, qname, attrs): - name, atts = self.convert(name, qname, attrs) - current = self.current - absolutize = self.absolutize - - # Cheap hack so 2to3 doesn't turn it into __next__ - next = getattr(self, 'next') - object = None - current.data = None - current.list = None - - if not name.startswith(str(RDFNS)): - current.predicate = absolutize(name) - elif name==RDF.li: - current.predicate = current.next_li() - elif name in PROPERTY_ELEMENT_EXCEPTIONS: - self.error("Invalid property element URI: %s" % name) - else: - current.predicate = absolutize(name) - - id = atts.get(RDF.ID, None) - if id is not None: - if not is_ncname(id): - self.error("rdf:ID value is not a value NCName: %s" % id) - current.id = absolutize("#%s" % id) - else: - current.id = None - - resource = atts.get(RDF.resource, None) - nodeID = atts.get(RDF.nodeID, None) - parse_type = atts.get(RDF.parseType, None) - if resource is not None and nodeID is not None: - self.error("Property element cannot have both rdf:nodeID and rdf:resource") - if resource is not None: - object = absolutize(resource) - next.start = self.node_element_start - next.end = self.node_element_end - elif nodeID is not None: - if not is_ncname(nodeID): - self.error("rdf:nodeID value is not a valid NCName: %s" % nodeID) - if self.preserve_bnode_ids is False: - if nodeID in self.bnode: - object = self.bnode[nodeID] - else: - subject = BNode() - self.bnode[nodeID] = subject - object = subject - else: - object = subject = BNode(nodeID) - next.start = self.node_element_start - next.end = self.node_element_end - else: - if parse_type is not None: - for att in atts: - if att!=RDF.parseType and att!=RDF.ID: - self.error("Property attr '%s' now allowed here" % att) - if parse_type=="Resource": - current.subject = object = BNode() - current.char = self.property_element_char - next.start = self.property_element_start - next.end = self.property_element_end - elif parse_type=="Collection": - current.char = None - object = current.list = RDF.nil #BNode()#self.parent.subject - next.start = self.node_element_start - next.end = self.list_node_element_end - else: #if parse_type=="Literal": - # All other values are treated as Literal - # See: http://www.w3.org/TR/rdf-syntax-grammar/#parseTypeOtherPropertyElt - object = Literal("", datatype=RDF.XMLLiteral) - current.char = self.literal_element_char - current.declared = {} - next.start = self.literal_element_start - next.char = self.literal_element_char - next.end = self.literal_element_end - current.object = object - return - else: - object = None - current.char = self.property_element_char - next.start = self.node_element_start - next.end = self.node_element_end - - datatype = current.datatype = atts.get(RDF.datatype, None) - language = current.language - if datatype is not None: - # TODO: check that there are no atts other than datatype and id - datatype = absolutize(datatype) - else: - for att in atts: - if not att.startswith(str(RDFNS)): - predicate = absolutize(att) - elif att in PROPERTY_ELEMENT_ATTRIBUTES: - continue - elif att in PROPERTY_ATTRIBUTE_EXCEPTIONS: - self.error("""Invalid property attribute URI: %s""" % att) - else: - predicate = absolutize(att) - - if att==RDF.type: - o = URIRef(atts[att]) - else: - if datatype is not None: - language = None - o = Literal(atts[att], language, datatype) - - if object is None: - object = BNode() - self.store.add((object, predicate, o)) - if object is None: - current.data = "" - current.object = None - else: - current.data = None - current.object = object - - def property_element_char(self, data): - current = self.current - if current.data is not None: - current.data += data - - def property_element_end(self, name, qname): - current = self.current - if current.data is not None and current.object is None: - literalLang = current.language - if current.datatype is not None: - literalLang = None - current.object = Literal(current.data, literalLang, current.datatype) - current.data = None - if self.next.end==self.list_node_element_end: - if current.object!=RDF.nil: - self.store.add((current.list, RDF.rest, RDF.nil)) - if current.object is not None: - self.store.add((self.parent.subject, current.predicate, current.object)) - if current.id is not None: - self.add_reified(current.id, (self.parent.subject, - current.predicate, current.object)) - current.subject = None - - def list_node_element_end(self, name, qname): - current = self.current - if self.parent.list==RDF.nil: - list = BNode() - # Removed between 20030123 and 20030905 - #self.store.add((list, RDF.type, LIST)) - self.parent.list = list - self.store.add((self.parent.list, RDF.first, current.subject)) - self.parent.object = list - self.parent.char = None - else: - list = BNode() - # Removed between 20030123 and 20030905 - #self.store.add((list, RDF.type, LIST)) - self.store.add((self.parent.list, RDF.rest, list)) - self.store.add((list, RDF.first, current.subject)) - self.parent.list = list - - def literal_element_start(self, name, qname, attrs): - current = self.current - self.next.start = self.literal_element_start - self.next.char = self.literal_element_char - self.next.end = self.literal_element_end - current.declared = self.parent.declared.copy() - if name[0]: - prefix = self._current_context[name[0]] - if prefix: - current.object = "<%s:%s" % (prefix, name[1]) - else: - current.object = "<%s" % name[1] - if not name[0] in current.declared: - current.declared[name[0]] = prefix - if prefix: - current.object += (' xmlns:%s="%s"' % (prefix, name[0])) - else: - current.object += (' xmlns="%s"' % name[0]) - else: - current.object = "<%s" % name[1] - - for (name, value) in attrs.items(): - if name[0]: - if not name[0] in current.declared: - current.declared[name[0]] = self._current_context[name[0]] - name = current.declared[name[0]] + ":" + name[1] - else: - name = name[1] - current.object += (' %s=%s' % (name, quoteattr(value))) - current.object += ">" - - def literal_element_char(self, data): - self.current.object += escape(data) - - def literal_element_end(self, name, qname): - if name[0]: - prefix = self._current_context[name[0]] - if prefix: - end = u"</%s:%s>" % (prefix, name[1]) - else: - end = u"</%s>" % name[1] - else: - end = u"</%s>" % name[1] - self.parent.object += self.current.object + end - - -def create_parser(target, store): - parser = make_parser() - try: - # Workaround for bug in expatreader.py. Needed when - # expatreader is trying to guess a prefix. - parser.start_namespace_decl("xml", "http://www.w3.org/XML/1998/namespace") - except AttributeError: - pass # Not present in Jython (at least) - parser.setFeature(handler.feature_namespaces, 1) - rdfxml = RDFXMLHandler(store) - rdfxml.setDocumentLocator(target) - #rdfxml.setDocumentLocator(_Locator(self.url, self.parser)) - parser.setContentHandler(rdfxml) - parser.setErrorHandler(ErrorHandler()) - return parser - - -class RDFXMLParser(Parser): - - def __init__(self): - pass - - def parse(self, source, sink, **args): - self._parser = create_parser(source, sink) - content_handler = self._parser.getContentHandler() - preserve_bnode_ids = args.get("preserve_bnode_ids", None) - if preserve_bnode_ids is not None: - content_handler.preserve_bnode_ids = preserve_bnode_ids - # We're only using it once now - #content_handler.reset() - #self._parser.reset() - self._parser.parse(source) - - - diff --git a/creactistore/_templates/lib/rdflib/plugins/parsers/trix.py b/creactistore/_templates/lib/rdflib/plugins/parsers/trix.py deleted file mode 100644 index 0c2e708..0000000 --- a/creactistore/_templates/lib/rdflib/plugins/parsers/trix.py +++ /dev/null @@ -1,286 +0,0 @@ -# Copyright (c) 2002, Daniel Krech, http://eikeon.com/ -# All rights reserved. -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions are -# met: -# -# * Redistributions of source code must retain the above copyright -# notice, this list of conditions and the following disclaimer. -# -# * Redistributions in binary form must reproduce the above -# copyright notice, this list of conditions and the following -# disclaimer in the documentation and/or other materials provided -# with the distribution. -# -# * Neither the name of Daniel Krech nor the names of its -# contributors may be used to endorse or promote products derived -# from this software without specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -""" -""" -from rdflib.namespace import Namespace -from rdflib.term import URIRef -from rdflib.term import BNode -from rdflib.term import Literal -from rdflib.graph import Graph, ConjunctiveGraph -from rdflib.exceptions import ParserError -from rdflib.parser import Parser - -from xml.sax.saxutils import handler -from xml.sax import make_parser -from xml.sax.handler import ErrorHandler - -__all__ = ['create_parser', 'TriXHandler', 'TriXParser'] - - -TRIXNS=Namespace("http://www.w3.org/2004/03/trix/trix-1/") -XMLNS=Namespace("http://www.w3.org/XML/1998/namespace") - -class TriXHandler(handler.ContentHandler): - """An Sax Handler for TriX. See http://sw.nokia.com/trix/""" - - def __init__(self, store): - self.store = store - self.preserve_bnode_ids = False - self.reset() - - def reset(self): - self.bnode = {} - self.graph=self.store - self.triple=None - self.state=0 - self.lang=None - self.datatype=None - - # ContentHandler methods - - def setDocumentLocator(self, locator): - self.locator = locator - - def startDocument(self): - pass - - def startPrefixMapping(self, prefix, namespace): - pass - - def endPrefixMapping(self, prefix): - pass - - def startElementNS(self, name, qname, attrs): - - if name[0]!=str(TRIXNS): - self.error("Only elements in the TriX namespace are allowed. %s!=%s"%(name[0],TRIXNS)) - - if name[1]=="TriX": - if self.state==0: - self.state=1 - else: - self.error("Unexpected TriX element") - - elif name[1]=="graph": - if self.state==1: - self.state=2 - else: - self.error("Unexpected graph element") - - elif name[1]=="uri": - if self.state==2: - # the context uri - self.state=3 - elif self.state==4: - # part of a triple - pass - else: - self.error("Unexpected uri element") - - elif name[1]=="triple": - if self.state==2: - if self.graph==None: - # anonymous graph, create one with random bnode id - self.graph=Graph(store=self.store.store) - # start of a triple - self.triple=[] - self.state=4 - else: - self.error("Unexpected triple element") - - elif name[1]=="typedLiteral": - if self.state==4: - # part of triple - self.lang=None - self.datatype=None - - try: - self.lang=attrs.getValue((unicode(XMLNS), u"lang")) - except: - # language not required - ignore - pass - try: - self.datatype=attrs.getValueByQName(u"datatype") - except KeyError: - self.error("No required attribute 'datatype'") - else: - self.error("Unexpected typedLiteral element") - - elif name[1]=="plainLiteral": - if self.state==4: - # part of triple - self.lang=None - self.datatype=None - try: - self.lang=attrs.getValue((unicode(XMLNS), u"lang")) - except: - # language not required - ignore - pass - - else: - self.error("Unexpected plainLiteral element") - - elif name[1]=="id": - if self.state==2: - # the context uri - self.state=3 - - elif self.state==4: - # part of triple - pass - else: - self.error("Unexpected id element") - - else: - self.error("Unknown element %s in TriX namespace"%name[1]) - - self.chars="" - - - def endElementNS(self, name, qname): - if name[0]!=str(TRIXNS): - self.error("Only elements in the TriX namespace are allowed. %s!=%s"%(name[0], TRIXNS)) - - if name[1]=="uri": - if self.state==3: - self.graph=Graph(store=self.store.store, identifier=URIRef(self.chars.strip())) - self.state=2 - elif self.state==4: - self.triple+=[URIRef(self.chars.strip())] - else: - self.error("Illegal internal self.state - This should never happen if the SAX parser ensures XML syntax correctness") - - elif name[1]=="id": - if self.state==3: - self.graph=Graph(self.store.store,identifier=self.get_bnode(self.chars.strip())) - self.state=2 - elif self.state==4: - self.triple+=[self.get_bnode(self.chars.strip())] - else: - self.error("Illegal internal self.state - This should never happen if the SAX parser ensures XML syntax correctness") - - elif name[1]=="plainLiteral" or name[1]=="typedLiteral": - if self.state==4: - self.triple+=[Literal(self.chars, lang=self.lang, datatype=self.datatype)] - else: - self.error("This should never happen if the SAX parser ensures XML syntax correctness") - - elif name[1]=="triple": - if self.state==4: - if len(self.triple)!=3: - self.error("Triple has wrong length, got %d elements: %s"%(len(self.triple),self.triple)) - - self.graph.add(self.triple) - #self.store.store.add(self.triple,context=self.graph) - #self.store.addN([self.triple+[self.graph]]) - self.state=2 - else: - self.error("This should never happen if the SAX parser ensures XML syntax correctness") - - elif name[1]=="graph": - self.graph=None - self.state=1 - - elif name[1]=="TriX": - self.state=0 - - else: - self.error("Unexpected close element") - - - def get_bnode(self,label): - if self.preserve_bnode_ids: - bn=BNode(label) - else: - if label in self.bnode: - bn=self.bnode[label] - else: - bn=BNode(label) - self.bnode[label]=bn - return bn - - - def characters(self, content): - self.chars+=content - - - def ignorableWhitespace(self, content): - pass - - def processingInstruction(self, target, data): - pass - - - def error(self, message): - locator = self.locator - info = "%s:%s:%s: " % (locator.getSystemId(), - locator.getLineNumber(), locator.getColumnNumber()) - raise ParserError(info + message) - - -def create_parser(store): - parser = make_parser() - try: - # Workaround for bug in expatreader.py. Needed when - # expatreader is trying to guess a prefix. - parser.start_namespace_decl("xml", "http://www.w3.org/XML/1998/namespace") - except AttributeError: - pass # Not present in Jython (at least) - parser.setFeature(handler.feature_namespaces, 1) - trix = TriXHandler(store) - parser.setContentHandler(trix) - parser.setErrorHandler(ErrorHandler()) - return parser - - -class TriXParser(Parser): - """A parser for TriX. See http://sw.nokia.com/trix/""" - - def __init__(self): - pass - - def parse(self, source, sink, **args): - assert sink.store.context_aware - g=ConjunctiveGraph(store=sink.store) - - self._parser = create_parser(g) - content_handler = self._parser.getContentHandler() - preserve_bnode_ids = args.get("preserve_bnode_ids", None) - if preserve_bnode_ids is not None: - content_handler.preserve_bnode_ids = preserve_bnode_ids - # We're only using it once now - #content_handler.reset() - #self._parser.reset() - self._parser.parse(source) - - - diff --git a/creactistore/_templates/lib/rdflib/plugins/serializers/__init__.py b/creactistore/_templates/lib/rdflib/plugins/serializers/__init__.py deleted file mode 100644 index e69de29..0000000 --- a/creactistore/_templates/lib/rdflib/plugins/serializers/__init__.py +++ /dev/null diff --git a/creactistore/_templates/lib/rdflib/plugins/serializers/n3.py b/creactistore/_templates/lib/rdflib/plugins/serializers/n3.py deleted file mode 100644 index 63faf9d..0000000 --- a/creactistore/_templates/lib/rdflib/plugins/serializers/n3.py +++ /dev/null @@ -1,123 +0,0 @@ -""" -Notation 3 (N3) RDF graph serializer for RDFLib. -""" -from rdflib.graph import Graph -from rdflib.namespace import Namespace, OWL -from rdflib.plugins.serializers.turtle import (TurtleSerializer, - SUBJECT, VERB, OBJECT) - -__all__ = ['N3Serializer'] - -SWAP_LOG = Namespace("http://www.w3.org/2000/10/swap/log#") - - -class N3Serializer(TurtleSerializer): - - short_name = "n3" - - def __init__(self, store, parent=None): - super(N3Serializer, self).__init__(store) - self.keywords.update({ - OWL.sameAs: '=', - SWAP_LOG.implies: '=>' - }) - self.parent = parent - - def reset(self): - super(N3Serializer, self).reset() - self._stores = {} - - def subjectDone(self, subject): - super(N3Serializer, self).subjectDone(subject) - if self.parent: - self.parent.subjectDone(subject) - - def isDone(self, subject): - return (super(N3Serializer, self).isDone(subject) - and (not self.parent or self.parent.isDone(subject))) - - def startDocument(self): - super(N3Serializer, self).startDocument() - #if not isinstance(self.store, N3Store): - # return - # - #all_list = [self.label(var) for var in - # self.store.get_universals(recurse=False)] - #all_list.sort() - #some_list = [self.label(var) for var in - # self.store.get_existentials(recurse=False)] - #some_list.sort() - # - #for var in all_list: - # self.write('\n'+self.indent()+'@forAll %s. '%var) - #for var in some_list: - # self.write('\n'+self.indent()+'@forSome %s. '%var) - # - #if (len(all_list) + len(some_list)) > 0: - # self.write('\n') - - def endDocument(self): - if not self.parent: - super(N3Serializer, self).endDocument() - - def indent(self, modifier=0): - indent = super(N3Serializer, self).indent(modifier) - if self.parent is not None: - indent += self.parent.indent()#modifier) - return indent - - def preprocessTriple(self, triple): - super(N3Serializer, self).preprocessTriple(triple) - if isinstance(triple[0], Graph): - for t in triple[0]: - self.preprocessTriple(t) - if isinstance(triple[2], Graph): - for t in triple[2]: - self.preprocessTriple(t) - - def getQName(self, uri, gen_prefix=True): - qname = None - if self.parent is not None: - qname = self.parent.getQName(uri, gen_prefix) - if qname is None: - qname = super(N3Serializer, self).getQName(uri, gen_prefix) - return qname - - def statement(self, subject): - self.subjectDone(subject) - properties = self.buildPredicateHash(subject) - if len(properties) == 0: - return False - return (self.s_clause(subject) - or super(N3Serializer, self).statement(subject)) - - def path(self, node, position, newline=False): - if not self.p_clause(node, position): - super(N3Serializer, self).path(node, position, newline) - - def s_clause(self, subject): - if isinstance(subject, Graph): - self.write('\n'+self.indent()) - self.p_clause(subject, SUBJECT) - self.predicateList(subject) - self.write(' .') - return True - else: - return False - - def p_clause(self, node, position): - if isinstance(node, Graph): - self.subjectDone(node) - if position is OBJECT: - self.write(' ') - self.write('{') - self.depth += 1 - serializer = N3Serializer(node, parent=self) - serializer.serialize(self.stream) - self.depth -= 1 - self.write(self.indent()+'}') - return True - else: - return False - - diff --git a/creactistore/_templates/lib/rdflib/plugins/serializers/nquads.py b/creactistore/_templates/lib/rdflib/plugins/serializers/nquads.py deleted file mode 100644 index 29e0dff..0000000 --- a/creactistore/_templates/lib/rdflib/plugins/serializers/nquads.py +++ /dev/null @@ -1,34 +0,0 @@ -import warnings - -from rdflib.serializer import Serializer -from rdflib.py3compat import b - -from rdflib.plugins.serializers.nt import _xmlcharref_encode - -__all__ = ['NQuadsSerializer'] - -class NQuadsSerializer(Serializer): - - def __init__(self, store): - if not store.context_aware: - raise Exception("NQuads serialization only makes sense for context-aware stores!") - - super(NQuadsSerializer, self).__init__(store) - - def serialize(self, stream, base=None, encoding=None, **args): - if base is not None: - warnings.warn("NQuadsSerializer does not support base.") - if encoding is not None: - warnings.warn("NQuadsSerializer does not use custom encoding.") - encoding = self.encoding - for context in self.store.contexts(): - for triple in context: - stream.write(_nq_row(triple, context.identifier).encode(encoding, "replace")) - stream.write(b("\n")) - -def _nq_row(triple,context): - return u"%s %s %s %s .\n" % (triple[0].n3(), - triple[1].n3(), - _xmlcharref_encode(triple[2].n3()), - context.n3()) - diff --git a/creactistore/_templates/lib/rdflib/plugins/serializers/nt.py b/creactistore/_templates/lib/rdflib/plugins/serializers/nt.py deleted file mode 100644 index bbbe720..0000000 --- a/creactistore/_templates/lib/rdflib/plugins/serializers/nt.py +++ /dev/null @@ -1,76 +0,0 @@ -""" -N-Triples RDF graph serializer for RDFLib. -See <http://www.w3.org/TR/rdf-testcases/#ntriples> for details about the -format. -""" -from rdflib.serializer import Serializer -from rdflib.py3compat import b -import warnings - -__all__ = ['NTSerializer'] - -class NTSerializer(Serializer): - """ - Serializes RDF graphs to NTriples format. - """ - - def serialize(self, stream, base=None, encoding=None, **args): - if base is not None: - warnings.warn("NTSerializer does not support base.") - if encoding is not None: - warnings.warn("NTSerializer does not use custom encoding.") - encoding = self.encoding - for triple in self.store: - stream.write(_nt_row(triple).encode(encoding, "replace")) - stream.write(b("\n")) - - -def _nt_row(triple): - return u"%s %s %s .\n" % (triple[0].n3(), - triple[1].n3(), - _xmlcharref_encode(triple[2].n3())) - -# from <http://code.activestate.com/recipes/303668/> -def _xmlcharref_encode(unicode_data, encoding="ascii"): - """Emulate Python 2.3's 'xmlcharrefreplace' encoding error handler.""" - chars = [] - - # nothing to do about xmlchars, but replace newlines with escapes: - unicode_data=unicode_data.replace("\n","\\n") - if unicode_data.startswith('"""'): - # Updated with Bernhard Schandl's patch... - # unicode_data = unicode_data.replace('"""', '"') # original - - last_triplequote_pos = unicode_data.rfind('"""') - payload = unicode_data[3:last_triplequote_pos] - trail = unicode_data[last_triplequote_pos+3:] - - # fix three-quotes encoding - payload = payload.replace('\\"""', '"""') - - # corner case: if string ends with " it is already encoded. - # so we need to de-escape it before it will be re-escaped in the next step. - if payload.endswith('\\"'): - payload = payload.replace('\\"', '"') - - # escape quotes in payload - payload = payload.replace('"', '\\"') - - # reconstruct result using single quotes - unicode_data = '"%s"%s' % (payload, trail) - - # Step through the unicode_data string one character at a time in - # order to catch unencodable characters: - for char in unicode_data: - try: - char.encode(encoding, 'strict') - except UnicodeError: - if ord(char) <= 0xFFFF: - chars.append('\\u%04X' % ord(char)) - else: - chars.append('\\U%08X' % ord(char)) - else: - chars.append(char) - - return ''.join(chars) - diff --git a/creactistore/_templates/lib/rdflib/plugins/serializers/rdfxml.py b/creactistore/_templates/lib/rdflib/plugins/serializers/rdfxml.py deleted file mode 100644 index d72c27e..0000000 --- a/creactistore/_templates/lib/rdflib/plugins/serializers/rdfxml.py +++ /dev/null @@ -1,282 +0,0 @@ -from __future__ import generators - -from rdflib.plugins.serializers.xmlwriter import XMLWriter - -from rdflib.namespace import Namespace, RDF, RDFS, split_uri - -from rdflib.term import URIRef, Literal, BNode -from rdflib.util import first, more_than -from rdflib.collection import Collection -from rdflib.serializer import Serializer - -from rdflib.exceptions import Error - -from rdflib.py3compat import b - -from xml.sax.saxutils import quoteattr, escape - -__all__ = ['fix', 'XMLSerializer', 'PrettyXMLSerializer'] - -class XMLSerializer(Serializer): - - def __init__(self, store): - super(XMLSerializer, self).__init__(store) - - def __bindings(self): - store = self.store - nm = store.namespace_manager - bindings = {} - for predicate in set(store.predicates()): - prefix, namespace, name = nm.compute_qname(predicate) - bindings[prefix] = URIRef(namespace) - RDFNS = URIRef("http://www.w3.org/1999/02/22-rdf-syntax-ns#") - if "rdf" in bindings: - assert bindings["rdf"]==RDFNS - else: - bindings["rdf"] = RDFNS - for prefix, namespace in bindings.iteritems(): - yield prefix, namespace - - - def serialize(self, stream, base=None, encoding=None, **args): - self.base = base - self.__stream = stream - self.__serialized = {} - encoding = self.encoding - self.write = write = lambda uni: stream.write(uni.encode(encoding, 'replace')) - - # startDocument - write('<?xml version="1.0" encoding="%s"?>\n' % self.encoding) - - # startRDF - write('<rdf:RDF\n') - # If provided, write xml:base attribute for the RDF - if "xml_base" in args: - write(' xml:base="%s"\n' % args['xml_base']) - # TODO: assert(namespaces["http://www.w3.org/1999/02/22-rdf-syntax-ns#"]=='rdf') - bindings = list(self.__bindings()) - bindings.sort() - for prefix, namespace in bindings: - if prefix: - write(' xmlns:%s="%s"\n' % (prefix, namespace)) - else: - write(' xmlns="%s"\n' % namespace) - write('>\n') - - # write out triples by subject - for subject in self.store.subjects(): - self.subject(subject, 1) - - # endRDF - write( "</rdf:RDF>\n" ) - - # Set to None so that the memory can get garbage collected. - #self.__serialized = None - del self.__serialized - - - def subject(self, subject, depth=1): - if not subject in self.__serialized: - self.__serialized[subject] = 1 - if isinstance(subject, (BNode,URIRef)): - write = self.write - indent = " " * depth - element_name = "rdf:Description" - if isinstance(subject, BNode): - write( '%s<%s rdf:nodeID="%s"' % - (indent, element_name, subject)) - else: - uri = quoteattr(self.relativize(subject)) - write( "%s<%s rdf:about=%s" % (indent, element_name, uri)) - if (subject, None, None) in self.store: - write( ">\n" ) - for predicate, object in self.store.predicate_objects(subject): - self.predicate(predicate, object, depth+1) - write( "%s</%s>\n" % (indent, element_name)) - else: - write( "/>\n" ) - - def predicate(self, predicate, object, depth=1): - write = self.write - indent = " " * depth - qname = self.store.namespace_manager.qname(predicate) - if isinstance(object, Literal): - attributes = "" - if object.language: - attributes += ' xml:lang="%s"'%object.language - - if object.datatype: - attributes += ' rdf:datatype="%s"'%object.datatype - - write("%s<%s%s>%s</%s>\n" % - (indent, qname, attributes, - escape(object), qname) ) - else: - if isinstance(object, BNode): - write('%s<%s rdf:nodeID="%s"/>\n' % - (indent, qname, object)) - else: - write("%s<%s rdf:resource=%s/>\n" % - (indent, qname, quoteattr(self.relativize(object)))) - - - -XMLLANG = "http://www.w3.org/XML/1998/namespacelang" -XMLBASE = "http://www.w3.org/XML/1998/namespacebase" -OWL_NS = Namespace('http://www.w3.org/2002/07/owl#') - -# TODO: -def fix(val): - "strip off _: from nodeIDs... as they are not valid NCNames" - if val.startswith("_:"): - return val[2:] - else: - return val - - -class PrettyXMLSerializer(Serializer): - - def __init__(self, store, max_depth=3): - super(PrettyXMLSerializer, self).__init__(store) - self.forceRDFAbout=set() - - def serialize(self, stream, base=None, encoding=None, **args): - self.__serialized = {} - store = self.store - self.base = base - self.max_depth = args.get("max_depth", 3) - assert self.max_depth>0, "max_depth must be greater than 0" - - self.nm = nm = store.namespace_manager - self.writer = writer = XMLWriter(stream, nm, encoding) - - namespaces = {} - possible = set(store.predicates()).union(store.objects(None, RDF.type)) - for predicate in possible: - prefix, namespace, local = nm.compute_qname(predicate) - namespaces[prefix] = namespace - namespaces["rdf"] = "http://www.w3.org/1999/02/22-rdf-syntax-ns#" - writer.push(RDF.RDF) - if "xml_base" in args: - writer.attribute(XMLBASE, args["xml_base"]) - writer.namespaces(namespaces.iteritems()) - - # Write out subjects that can not be inline - for subject in store.subjects(): - if (None, None, subject) in store: - if (subject, None, subject) in store: - self.subject(subject, 1) - else: - self.subject(subject, 1) - - # write out anything that has not yet been reached - # write out BNodes last (to ensure they can be inlined where possible) - bnodes=set() - for subject in store.subjects(): - if isinstance(subject,BNode): - bnodes.add(subject) - continue - self.subject(subject, 1) - #now serialize only those BNodes that have not been serialized yet - for bnode in bnodes: - if bnode not in self.__serialized: - self.subject(subject, 1) - writer.pop(RDF.RDF) - stream.write(b("\n")) - - # Set to None so that the memory can get garbage collected. - self.__serialized = None - - - - def subject(self, subject, depth=1): - store = self.store - writer = self.writer - if subject in self.forceRDFAbout: - writer.push(RDF.Description) - writer.attribute(RDF.about, self.relativize(subject)) - writer.pop(RDF.Description) - self.forceRDFAbout.remove(subject) - elif not subject in self.__serialized: - self.__serialized[subject] = 1 - type = first(store.objects(subject, RDF.type)) - try: - self.nm.qname(type) - except: - type = None - element = type or RDF.Description - writer.push(element) - if isinstance(subject, BNode): - def subj_as_obj_more_than(ceil): - return True - # more_than(store.triples((None, None, subject)), ceil) - - #here we only include BNode labels if they are referenced - #more than once (this reduces the use of redundant BNode identifiers) - if subj_as_obj_more_than(1): - writer.attribute(RDF.nodeID, fix(subject)) - else: - writer.attribute(RDF.about, self.relativize(subject)) - if (subject, None, None) in store: - for predicate, object in store.predicate_objects(subject): - if not (predicate==RDF.type and object==type): - self.predicate(predicate, object, depth+1) - writer.pop(element) - elif subject in self.forceRDFAbout: - writer.push(RDF.Description) - writer.attribute(RDF.about, self.relativize(subject)) - writer.pop(RDF.Description) - self.forceRDFAbout.remove(subject) - - def predicate(self, predicate, object, depth=1): - writer = self.writer - store = self.store - writer.push(predicate) - if isinstance(object, Literal): - attributes = "" - if object.language: - writer.attribute(XMLLANG, object.language) - if object.datatype: - writer.attribute(RDF.datatype, object.datatype) - writer.text(object) - elif object in self.__serialized or not (object, None, None) in store: - if isinstance(object, BNode): - if more_than(store.triples((None, None, object)), 0): - writer.attribute(RDF.nodeID, fix(object)) - else: - writer.attribute(RDF.resource, self.relativize(object)) - else: - if first(store.objects(object, RDF.first)): # may not have type RDF.List - collection = object - self.__serialized[object] = 1 - # TODO: warn that any assertions on object other than - # RDF.first and RDF.rest are ignored... including RDF.List - writer.attribute(RDF.parseType, "Collection") - col=Collection(store,object) - for item in col: - if isinstance(item,URIRef): - self.forceRDFAbout.add(item) - self.subject(item) - if not isinstance(item,URIRef): - self.__serialized[item] = 1 - else: - if first(store.triples_choices((object, - RDF.type, - [OWL_NS.Class,RDFS.Class]))) and\ - isinstance(object, URIRef): - writer.attribute(RDF.resource, self.relativize(object)) - elif depth<=self.max_depth: - self.subject(object, depth+1) - elif isinstance(object, BNode): - if not object in self.__serialized and \ - (object, None, None) in store and \ - len(list(store.subjects(object=object)))==1: - #inline blank nodes if they haven't been serialized yet and are - #only referenced once (regardless of depth) - self.subject(object, depth+1) - else: - writer.attribute(RDF.nodeID, fix(object)) - else: - writer.attribute(RDF.resource, self.relativize(object)) - writer.pop(predicate) - diff --git a/creactistore/_templates/lib/rdflib/plugins/serializers/trix.py b/creactistore/_templates/lib/rdflib/plugins/serializers/trix.py deleted file mode 100644 index c7115c0..0000000 --- a/creactistore/_templates/lib/rdflib/plugins/serializers/trix.py +++ /dev/null @@ -1,72 +0,0 @@ -from rdflib.serializer import Serializer -from rdflib.plugins.serializers.xmlwriter import XMLWriter - -from rdflib.term import URIRef, Literal, BNode -from rdflib.namespace import Namespace - -from rdflib.graph import Graph, ConjunctiveGraph - -from rdflib.py3compat import b - -__all__ = ['TriXSerializer'] - -## TODO: MOve this somewhere central -TRIXNS=Namespace("http://www.w3.org/2004/03/trix/trix-1/") -XMLNS=Namespace("http://www.w3.org/XML/1998/namespace") - -class TriXSerializer(Serializer): - def __init__(self, store): - super(TriXSerializer, self).__init__(store) - - def serialize(self, stream, base=None, encoding=None, **args): - - nm=self.store.namespace_manager - - self.writer=XMLWriter(stream, nm, encoding, extra_ns={"": TRIXNS}) - - self.writer.push(TRIXNS[u"TriX"]) - self.writer.namespaces() - - if isinstance(self.store, ConjunctiveGraph): - for subgraph in self.store.contexts(): - self._writeGraph(subgraph) - elif isinstance(self.store, Graph): - self._writeGraph(self.store) - else: - raise Exception("Unknown graph type: "+type(self.store)) - - self.writer.pop() - stream.write(b("\n")) - - - def _writeGraph(self, graph): - self.writer.push(TRIXNS[u"graph"]) - if isinstance(graph.identifier, URIRef): - self.writer.element(TRIXNS[u"uri"], content=unicode(graph.identifier)) - - for triple in graph.triples((None,None,None)): - self._writeTriple(triple) - self.writer.pop() - - def _writeTriple(self, triple): - self.writer.push(TRIXNS[u"triple"]) - for component in triple: - if isinstance(component, URIRef): - self.writer.element(TRIXNS[u"uri"], - content=unicode(component)) - elif isinstance(component, BNode): - self.writer.element(TRIXNS[u"id"], - content=unicode(component)) - elif isinstance(component, Literal): - if component.datatype: - self.writer.element(TRIXNS[u"typedLiteral"], - content=unicode(component), - attributes={ TRIXNS[u"datatype"]: unicode(component.datatype) }) - elif component.language: - self.writer.element(TRIXNS[u"plainLiteral"], - content=unicode(component), - attributes={ XMLNS[u"lang"]: unicode(component.language) }) - else: - self.writer.element(TRIXNS[u"plainLiteral"], - content=unicode(component)) - self.writer.pop() diff --git a/creactistore/_templates/lib/rdflib/plugins/serializers/turtle.py b/creactistore/_templates/lib/rdflib/plugins/serializers/turtle.py deleted file mode 100644 index 6878013..0000000 --- a/creactistore/_templates/lib/rdflib/plugins/serializers/turtle.py +++ /dev/null @@ -1,364 +0,0 @@ -""" -Turtle RDF graph serializer for RDFLib. -See <http://www.w3.org/TeamSubmission/turtle/> for syntax specification. -""" -from rdflib.term import BNode, Literal, URIRef - -from rdflib.exceptions import Error - -from rdflib.serializer import Serializer - -from rdflib.namespace import RDF, RDFS - -__all__ = ['RecursiveSerializer', 'TurtleSerializer'] - -class RecursiveSerializer(Serializer): - - topClasses = [RDFS.Class] - predicateOrder = [RDF.type, RDFS.label] - maxDepth = 10 - indentString = u" " - - def __init__(self, store): - - super(RecursiveSerializer, self).__init__(store) - self.stream = None - self.reset() - - def addNamespace(self, prefix, uri): - self.namespaces[prefix] = uri - - def checkSubject(self, subject): - """Check to see if the subject should be serialized yet""" - if ((self.isDone(subject)) - or (subject not in self._subjects) - or ((subject in self._topLevels) and (self.depth > 1)) - or (isinstance(subject, URIRef) and (self.depth >= self.maxDepth)) - ): - return False - return True - - def isDone(self, subject): - """Return true if subject is serialized""" - return subject in self._serialized - - def orderSubjects(self): - seen = {} - subjects = [] - - for classURI in self.topClasses: - members = list(self.store.subjects(RDF.type, classURI)) - members.sort() - - for member in members: - subjects.append(member) - self._topLevels[member] = True - seen[member] = True - - recursable = [(isinstance(subject,BNode), self.refCount(subject), subject) for subject in self._subjects - if subject not in seen] - - recursable.sort() - subjects.extend([subject for (isbnode, refs, subject) in recursable]) - - return subjects - - def preprocess(self): - for triple in self.store.triples((None,None,None)): - self.preprocessTriple(triple) - - def preprocessTriple(self, (s,p,o)): - references = self.refCount(o) + 1 - self._references[o] = references - self._subjects[s] = True - - def refCount(self, node): - """Return the number of times this node has been referenced in the object position""" - return self._references.get(node, 0) - - def reset(self): - self.depth = 0 - self.lists = {} - self.namespaces = {} - self._references = {} - self._serialized = {} - self._subjects = {} - self._topLevels = {} - - def buildPredicateHash(self, subject): - """Build a hash key by predicate to a list of objects for the given subject""" - properties = {} - for s,p,o in self.store.triples((subject, None, None)): - oList = properties.get(p, []) - oList.append(o) - properties[p] = oList - return properties - - def sortProperties(self, properties): - """Take a hash from predicate uris to lists of values. - Sort the lists of values. Return a sorted list of properties.""" - # Sort object lists - for prop, objects in properties.items(): - objects.sort() - - # Make sorted list of properties - propList = [] - seen = {} - for prop in self.predicateOrder: - if (prop in properties) and (prop not in seen): - propList.append(prop) - seen[prop] = True - props = properties.keys() - props.sort() - for prop in props: - if prop not in seen: - propList.append(prop) - seen[prop] = True - return propList - - def subjectDone(self, subject): - """Mark a subject as done.""" - self._serialized[subject] = True - - def indent(self, modifier=0): - """Returns indent string multiplied by the depth""" - return (self.depth+modifier)*self.indentString - - def write(self, text): - """Write text in given encoding.""" - self.stream.write(text.encode(self.encoding, 'replace')) - - -SUBJECT = 0 -VERB = 1 -OBJECT = 2 - -_GEN_QNAME_FOR_DT = False -_SPACIOUS_OUTPUT = False - - -class TurtleSerializer(RecursiveSerializer): - - short_name = "turtle" - indentString = ' ' - - def __init__(self, store): - super(TurtleSerializer, self).__init__(store) - self.keywords = { - RDF.type: 'a' - } - self.reset() - self.stream = None - self._spacious = _SPACIOUS_OUTPUT - - def reset(self): - super(TurtleSerializer, self).reset() - self._shortNames = {} - self._started = False - - def serialize(self, stream, base=None, encoding=None, spacious=None, **args): - self.reset() - self.stream = stream - self.base = base - - if spacious is not None: - self._spacious = spacious - # In newer rdflibs these are always in the namespace manager - #self.store.prefix_mapping('rdf', RDFNS) - #self.store.prefix_mapping('rdfs', RDFSNS) - - self.preprocess() - subjects_list = self.orderSubjects() - - self.startDocument() - - firstTime = True - for subject in subjects_list: - if self.isDone(subject): - continue - if firstTime: - firstTime = False - if self.statement(subject) and not firstTime: - self.write('\n') - - self.endDocument() - stream.write(u"\n".encode('ascii')) - - def preprocessTriple(self, triple): - super(TurtleSerializer, self).preprocessTriple(triple) - for i, node in enumerate(triple): - if node in self.keywords: - continue - # Don't use generated prefixes for subjects and objects - self.getQName(node, gen_prefix=(i==VERB)) - if isinstance(node, Literal) and node.datatype: - self.getQName(node.datatype, gen_prefix=_GEN_QNAME_FOR_DT) - p = triple[1] - if isinstance(p, BNode): - self._references[p] = self.refCount(p) + 1 - - def getQName(self, uri, gen_prefix=True): - if not isinstance(uri, URIRef): - return None - - parts=None - - try: - parts = self.store.compute_qname(uri, generate=gen_prefix) - except: - - # is the uri a namespace in itself? - pfx = self.store.store.prefix(uri) - - if pfx is not None: - parts = (pfx, uri, '') - else: - # nothing worked - return None - - prefix, namespace, local = parts - # Local parts with '.' will mess up serialization - if '.' in local: - return None - self.addNamespace(prefix, namespace) - return u'%s:%s' % (prefix, local) - - def startDocument(self): - self._started = True - ns_list = sorted(self.namespaces.items()) - for prefix, uri in ns_list: - self.write(self.indent()+'@prefix %s: <%s> .\n' % (prefix, uri)) - if ns_list and self._spacious: - self.write('\n') - - def endDocument(self): - if self._spacious: - self.write('\n') - - def statement(self, subject): - self.subjectDone(subject) - return self.s_squared(subject) or self.s_default(subject) - - def s_default(self, subject): - self.write('\n'+self.indent()) - self.path(subject, SUBJECT) - self.predicateList(subject) - self.write(' .') - return True - - def s_squared(self, subject): - if (self.refCount(subject) > 0) or not isinstance(subject, BNode): - return False - self.write('\n'+self.indent()+'[]') - #self.depth+=1 - self.predicateList(subject) - #self.depth-=1 - self.write(' .') - return True - - def path(self, node, position, newline=False): - if not (self.p_squared(node, position, newline) - or self.p_default(node, position, newline)): - raise Error("Cannot serialize node '%s'"%(node, )) - - def p_default(self, node, position, newline=False): - if position != SUBJECT and not newline: - self.write(' ') - self.write(self.label(node, position)) - return True - - def label(self, node, position): - if node == RDF.nil: - return '()' - if position is VERB and node in self.keywords: - return self.keywords[node] - if isinstance(node, Literal): - return node._literal_n3(use_plain=True, - qname_callback=lambda dt: - self.getQName(dt, _GEN_QNAME_FOR_DT)) - else: - return self.getQName(node, position==VERB) or node.n3() - - def p_squared(self, node, position, newline=False): - if (not isinstance(node, BNode) - or node in self._serialized - or self.refCount(node) > 1 - or position == SUBJECT): - return False - - if not newline: - self.write(' ') - - if self.isValidList(node): - # this is a list - self.write('(') - self.depth += 1#2 - self.doList(node) - self.depth -= 1#2 - self.write(' )') - else: - self.subjectDone(node) - self.depth += 2 - #self.write('[\n' + self.indent()) - self.write('[') - self.depth -= 1 - #self.predicateList(node, newline=True) - self.predicateList(node, newline=False) - #self.write('\n' + self.indent() + ']') - self.write(' ]') - self.depth -= 1 - - return True - - def isValidList(self, l): - """ - Checks if l is a valid RDF list, i.e. no nodes have other properties. - """ - try: - if not self.store.value(l, RDF.first): - return False - except: - return False - while l: - if l != RDF.nil and len( - list(self.store.predicate_objects(l))) != 2: - return False - l = self.store.value(l, RDF.rest) - return True - - def doList(self,l): - while l: - item = self.store.value(l, RDF.first) - if item: - self.path(item, OBJECT) - self.subjectDone(l) - l = self.store.value(l, RDF.rest) - - def predicateList(self, subject, newline=False): - properties = self.buildPredicateHash(subject) - propList = self.sortProperties(properties) - if len(propList) == 0: - return - self.verb(propList[0], newline=newline) - self.objectList(properties[propList[0]]) - for predicate in propList[1:]: - self.write(';\n' + self.indent(1)) - self.verb(predicate, newline=True) - self.objectList(properties[predicate]) - - def verb(self, node, newline=False): - self.path(node, VERB, newline) - - def objectList(self, objects): - count = len(objects) - if count == 0: - return - depthmod = (count == 1) and 0 or 1 - self.depth += depthmod - self.path(objects[0], OBJECT) - for obj in objects[1:]: - self.write(',\n' + self.indent(1)) - self.path(obj, OBJECT, newline=True) - self.depth -= depthmod - - diff --git a/creactistore/_templates/lib/rdflib/plugins/serializers/xmlwriter.py b/creactistore/_templates/lib/rdflib/plugins/serializers/xmlwriter.py deleted file mode 100644 index d36af4b..0000000 --- a/creactistore/_templates/lib/rdflib/plugins/serializers/xmlwriter.py +++ /dev/null @@ -1,103 +0,0 @@ -import codecs -from xml.sax.saxutils import quoteattr, escape - -__all__ = ['XMLWriter'] - -class XMLWriter(object): - def __init__(self, stream, namespace_manager, encoding=None, decl=1, extra_ns={}): - encoding = encoding or 'utf-8' - encoder, decoder, stream_reader, stream_writer = codecs.lookup(encoding) - self.stream = stream = stream_writer(stream) - if decl: - stream.write('<?xml version="1.0" encoding="%s"?>' % encoding) - self.element_stack = [] - self.nm = namespace_manager - self.extra_ns=extra_ns - self.closed = True - - def __get_indent(self): - return " " * len(self.element_stack) - indent = property(__get_indent) - - def __close_start_tag(self): - if not self.closed: # TODO: - self.closed = True - self.stream.write(">") - - def push(self, uri): - self.__close_start_tag() - write = self.stream.write - write("\n") - write(self.indent) - write("<%s" % self.qname(uri)) - self.element_stack.append(uri) - self.closed = False - self.parent = False - - def pop(self, uri=None): - top = self.element_stack.pop() - if uri: - assert uri == top - write = self.stream.write - if not self.closed: - self.closed = True - write("/>") - else: - if self.parent: - write("\n") - write(self.indent) - write("</%s>" % self.qname(top)) - self.parent = True - - def element(self, uri, content, attributes={}): - """Utility method for adding a complete simple element""" - self.push(uri) - for k, v in attributes.iteritems(): - self.attribute(k,v) - self.text(content) - self.pop() - - def namespaces(self, namespaces=None): - if not namespaces: - namespaces=self.nm.namespaces() - - write = self.stream.write - write("\n") - for prefix, namespace in namespaces: - if prefix: - write(' xmlns:%s="%s"\n' % (prefix, namespace)) - else: - write(' xmlns="%s"\n' % namespace) - - for prefix, namespace in self.extra_ns.items(): - if prefix: - write(' xmlns:%s="%s"\n' % (prefix, namespace)) - else: - write(' xmlns="%s"\n' % namespace) - - - def attribute(self, uri, value): - write = self.stream.write - write(" %s=%s" % (self.qname(uri), quoteattr(value))) - - def text(self, text): - self.__close_start_tag() - if "<" in text and ">" in text and not "]]>" in text: - self.stream.write("<![CDATA[") - self.stream.write(text) - self.stream.write("]]>") - else: - self.stream.write(escape(text)) - - def qname(self,uri): - """Compute qname for a uri using our extra namespaces, - or the given namespace manager""" - - for pre,ns in self.extra_ns.items(): - if uri.startswith(ns): - if pre!="": - return ":".join(pre,uri[len(ns):]) - else: - return uri[len(ns):] - - return self.nm.qname(uri) diff --git a/creactistore/_templates/lib/rdflib/plugins/sleepycat.py b/creactistore/_templates/lib/rdflib/plugins/sleepycat.py deleted file mode 100644 index 67fcc17..0000000 --- a/creactistore/_templates/lib/rdflib/plugins/sleepycat.py +++ /dev/null @@ -1,531 +0,0 @@ -from rdflib.store import Store, VALID_STORE, CORRUPTED_STORE, NO_STORE, UNKNOWN -from rdflib.term import URIRef -from rdflib.py3compat import b -def bb(u): return u.encode('utf-8') - -try: - from bsddb import db - has_bsddb = True -except ImportError: - try: - from bsddb3 import db - has_bsddb = True - except ImportError: - has_bsddb = False -from os import mkdir -from os.path import exists, abspath -from urllib import pathname2url -from threading import Thread - -import logging -_logger = logging.getLogger(__name__) - -__all__ = ['Sleepycat'] - -class Sleepycat(Store): - context_aware = True - formula_aware = True - transaction_aware = False - db_env = None - - def __init__(self, configuration=None, identifier=None): - if not has_bsddb: raise Exception("Unable to import bsddb/bsddb3, store is unusable.") - self.__open = False - self.__identifier = identifier - super(Sleepycat, self).__init__(configuration) - self._loads = self.node_pickler.loads - self._dumps = self.node_pickler.dumps - - def __get_identifier(self): - return self.__identifier - identifier = property(__get_identifier) - - def _init_db_environment(self, homeDir, create=True): - envsetflags = db.DB_CDB_ALLDB - envflags = db.DB_INIT_MPOOL | db.DB_INIT_CDB | db.DB_THREAD - if not exists(homeDir): - if create==True: - mkdir(homeDir) # TODO: implement create method and refactor this to it - self.create(homeDir) - else: - return NO_STORE - db_env = db.DBEnv() - db_env.set_cachesize(0, 1024*1024*50) # TODO - #db_env.set_lg_max(1024*1024) - db_env.set_flags(envsetflags, 1) - db_env.open(homeDir, envflags | db.DB_CREATE) - return db_env - - def is_open(self): - return self.__open - - def open(self, path, create=True): - if not has_bsddb: return NO_STORE - homeDir = path - - if self.__identifier is None: - self.__identifier = URIRef(pathname2url(abspath(homeDir))) - - db_env = self._init_db_environment(homeDir, create) - if db_env == NO_STORE: - return NO_STORE - self.db_env = db_env - self.__open = True - - dbname = None - dbtype = db.DB_BTREE - # auto-commit ensures that the open-call commits when transactions are enabled - dbopenflags = db.DB_THREAD - if self.transaction_aware == True: - dbopenflags |= db.DB_AUTO_COMMIT - - dbmode = 0660 - dbsetflags = 0 - - # create and open the DBs - self.__indicies = [None,] * 3 - self.__indicies_info = [None,] * 3 - for i in xrange(0, 3): - index_name = to_key_func(i)((b("s"), b("p"), b("o")), b("c")).decode() - index = db.DB(db_env) - index.set_flags(dbsetflags) - index.open(index_name, dbname, dbtype, dbopenflags|db.DB_CREATE, dbmode) - self.__indicies[i] = index - self.__indicies_info[i] = (index, to_key_func(i), from_key_func(i)) - - lookup = {} - for i in xrange(0, 8): - results = [] - for start in xrange(0, 3): - score = 1 - len = 0 - for j in xrange(start, start+3): - if i & (1<<(j%3)): - score = score << 1 - len += 1 - else: - break - tie_break = 2-start - results.append(((score, tie_break), start, len)) - - results.sort() - score, start, len = results[-1] - - def get_prefix_func(start, end): - def get_prefix(triple, context): - if context is None: - yield "" - else: - yield context - i = start - while i<end: - yield triple[i%3] - i += 1 - yield "" - return get_prefix - - lookup[i] = (self.__indicies[start], get_prefix_func(start, start + len), from_key_func(start), results_from_key_func(start, self._from_string)) - - - self.__lookup_dict = lookup - - self.__contexts = db.DB(db_env) - self.__contexts.set_flags(dbsetflags) - self.__contexts.open("contexts", dbname, dbtype, dbopenflags|db.DB_CREATE, dbmode) - - self.__namespace = db.DB(db_env) - self.__namespace.set_flags(dbsetflags) - self.__namespace.open("namespace", dbname, dbtype, dbopenflags|db.DB_CREATE, dbmode) - - self.__prefix = db.DB(db_env) - self.__prefix.set_flags(dbsetflags) - self.__prefix.open("prefix", dbname, dbtype, dbopenflags|db.DB_CREATE, dbmode) - - self.__k2i = db.DB(db_env) - self.__k2i.set_flags(dbsetflags) - self.__k2i.open("k2i", dbname, db.DB_HASH, dbopenflags|db.DB_CREATE, dbmode) - - self.__i2k = db.DB(db_env) - self.__i2k.set_flags(dbsetflags) - self.__i2k.open("i2k", dbname, db.DB_RECNO, dbopenflags|db.DB_CREATE, dbmode) - - self.__needs_sync = False - t = Thread(target=self.__sync_run) - t.setDaemon(True) - t.start() - self.__sync_thread = t - return VALID_STORE - - - def __sync_run(self): - from time import sleep, time - try: - min_seconds, max_seconds = 10, 300 - while self.__open: - if self.__needs_sync: - t0 = t1 = time() - self.__needs_sync = False - while self.__open: - sleep(.1) - if self.__needs_sync: - t1 = time() - self.__needs_sync = False - if time()-t1 > min_seconds or time()-t0 > max_seconds: - self.__needs_sync = False - _logger.debug("sync") - self.sync() - break - else: - sleep(1) - except Exception, e: - _logger.exception(e) - - def sync(self): - if self.__open: - for i in self.__indicies: - i.sync() - self.__contexts.sync() - self.__namespace.sync() - self.__prefix.sync() - self.__i2k.sync() - self.__k2i.sync() - - def close(self, commit_pending_transaction=False): - self.__open = False - self.__sync_thread.join() - for i in self.__indicies: - i.close() - self.__contexts.close() - self.__namespace.close() - self.__prefix.close() - self.__i2k.close() - self.__k2i.close() - self.db_env.close() - - def add(self, (subject, predicate, object), context, quoted=False, txn=None): - """\ - Add a triple to the store of triples. - """ - assert self.__open, "The Store must be open." - assert context!=self, "Can not add triple directly to store" - Store.add(self, (subject, predicate, object), context, quoted) - - _to_string = self._to_string - - s = _to_string(subject, txn=txn) - p = _to_string(predicate, txn=txn) - o = _to_string(object, txn=txn) - c = _to_string(context, txn=txn) - - cspo, cpos, cosp = self.__indicies - - value = cspo.get(bb("%s^%s^%s^%s^" % (c, s, p, o)), txn=txn) - if value is None: - self.__contexts.put(bb(c), "", txn=txn) - - contexts_value = cspo.get(bb("%s^%s^%s^%s^" % ("", s, p, o)), txn=txn) or b("") - contexts = set(contexts_value.split(b("^"))) - contexts.add(bb(c)) - contexts_value = b("^").join(contexts) - assert contexts_value!=None - - cspo.put(bb("%s^%s^%s^%s^" % (c, s, p, o)), "", txn=txn) - cpos.put(bb("%s^%s^%s^%s^" % (c, p, o, s)), "", txn=txn) - cosp.put(bb("%s^%s^%s^%s^" % (c, o, s, p)), "", txn=txn) - if not quoted: - cspo.put(bb("%s^%s^%s^%s^" % ("", s, p, o)), contexts_value, txn=txn) - cpos.put(bb("%s^%s^%s^%s^" % ("", p, o, s)), contexts_value, txn=txn) - cosp.put(bb("%s^%s^%s^%s^" % ("", o, s, p)), contexts_value, txn=txn) - - self.__needs_sync = True - - def __remove(self, (s, p, o), c, quoted=False, txn=None): - cspo, cpos, cosp = self.__indicies - contexts_value = cspo.get(b("^").join([b(""), s, p, o, b("")]), txn=txn) or b("") - contexts = set(contexts_value.split(b("^"))) - contexts.discard(c) - contexts_value = b("^").join(contexts) - for i, _to_key, _from_key in self.__indicies_info: - i.delete(_to_key((s, p, o), c), txn=txn) - if not quoted: - if contexts_value: - for i, _to_key, _from_key in self.__indicies_info: - i.put(_to_key((s, p, o), b("")), contexts_value, txn=txn) - else: - for i, _to_key, _from_key in self.__indicies_info: - try: - i.delete(_to_key((s, p, o), b("")), txn=txn) - except db.DBNotFoundError, e: - pass # TODO: is it okay to ignore these? - - def remove(self, (subject, predicate, object), context, txn=None): - assert self.__open, "The Store must be open." - Store.remove(self, (subject, predicate, object), context) - _to_string = self._to_string - - if context is not None: - if context == self: - context = None - - if subject is not None and predicate is not None and object is not None and context is not None: - s = _to_string(subject, txn=txn) - p = _to_string(predicate, txn=txn) - o = _to_string(object, txn=txn) - c = _to_string(context, txn=txn) - value = self.__indicies[0].get(bb("%s^%s^%s^%s^" % (c, s, p, o)), txn=txn) - if value is not None: - self.__remove((bb(s), bb(p), bb(o)), bb(c), txn=txn) - self.__needs_sync = True - else: - cspo, cpos, cosp = self.__indicies - index, prefix, from_key, results_from_key = self.__lookup((subject, predicate, object), context, txn=txn) - - cursor = index.cursor(txn=txn) - try: - current = cursor.set_range(prefix) - needs_sync = True - except db.DBNotFoundError: - current = None - needs_sync = False - cursor.close() - while current: - key, value = current - cursor = index.cursor(txn=txn) - try: - cursor.set_range(key) - # Hack to stop 2to3 converting this to next(cursor) - current = getattr(cursor, 'next')() - except db.DBNotFoundError: - current = None - cursor.close() - if key.startswith(prefix): - c, s, p, o = from_key(key) - if context is None: - contexts_value = index.get(key, txn=txn) or b("") - contexts = set(contexts_value.split(b("^"))) # remove triple from all non quoted contexts - contexts.add(b("")) # and from the conjunctive index - for c in contexts: - for i, _to_key, _ in self.__indicies_info: - i.delete(_to_key((s, p, o), c), txn=txn) - else: - self.__remove((s, p, o), c, txn=txn) - else: - break - - if context is not None: - if subject is None and predicate is None and object is None: - # TODO: also if context becomes empty and not just on remove((None, None, None), c) - try: - self.__contexts.delete(bb(_to_string(context, txn=txn)), txn=txn) - except db.DBNotFoundError, e: - pass - - self.__needs_sync = needs_sync - - def triples(self, (subject, predicate, object), context=None, txn=None): - """A generator over all the triples matching """ - assert self.__open, "The Store must be open." - - if context is not None: - if context == self: - context = None - - _from_string = self._from_string - index, prefix, from_key, results_from_key = self.__lookup((subject, predicate, object), context, txn=txn) - - cursor = index.cursor(txn=txn) - try: - current = cursor.set_range(prefix) - except db.DBNotFoundError: - current = None - cursor.close() - while current: - key, value = current - cursor = index.cursor(txn=txn) - try: - cursor.set_range(key) - # Cheap hack so 2to3 doesn't convert to next(cursor) - current = getattr(cursor, 'next')() - except db.DBNotFoundError: - current = None - cursor.close() - if key and key.startswith(prefix): - contexts_value = index.get(key, txn=txn) - yield results_from_key(key, subject, predicate, object, contexts_value) - else: - break - - def __len__(self, context=None): - assert self.__open, "The Store must be open." - if context is not None: - if context == self: - context = None - - if context is None: - prefix = b("^") - else: - prefix = bb("%s^" % self._to_string(context)) - - index = self.__indicies[0] - cursor = index.cursor() - current = cursor.set_range(prefix) - count = 0 - while current: - key, value = current - if key.startswith(prefix): - count +=1 - # Hack to stop 2to3 converting this to next(cursor) - current = getattr(cursor, 'next')() - else: - break - cursor.close() - return count - - def bind(self, prefix, namespace): - prefix = prefix.encode("utf-8") - namespace = namespace.encode("utf-8") - bound_prefix = self.__prefix.get(namespace) - if bound_prefix: - self.__namespace.delete(bound_prefix) - self.__prefix[namespace] = prefix - self.__namespace[prefix] = namespace - - def namespace(self, prefix): - prefix = prefix.encode("utf-8") - ns = self.__namespace.get(prefix, None) - if ns is not None: - return ns.decode('utf-8') - return None - - def prefix(self, namespace): - namespace = namespace.encode("utf-8") - prefix = self.__prefix.get(namespace, None) - if prefix is not None: - return prefix.decode('utf-8') - return None - - def namespaces(self): - cursor = self.__namespace.cursor() - results = [] - current = cursor.first() - while current: - prefix, namespace = current - results.append((prefix.decode('utf-8'), namespace.decode('utf-8'))) - # Hack to stop 2to3 converting this to next(cursor) - current = getattr(cursor, 'next')() - cursor.close() - for prefix, namespace in results: - yield prefix, URIRef(namespace) - - def contexts(self, triple=None): - _from_string = self._from_string - _to_string = self._to_string - - if triple: - s, p, o = triple - s = _to_string(s) - p = _to_string(p) - o = _to_string(o) - contexts = self.__indicies[0].get(bb("%s^%s^%s^%s^" % ("", s, p, o))) - if contexts: - for c in contexts.split(b("^")): - if c: - yield _from_string(c) - else: - index = self.__contexts - cursor = index.cursor() - current = cursor.first() - cursor.close() - while current: - key, value = current - context = _from_string(key) - yield context - cursor = index.cursor() - try: - cursor.set_range(key) - # Hack to stop 2to3 converting this to next(cursor) - current = getattr(cursor, 'next')() - except db.DBNotFoundError: - current = None - cursor.close() - - def _from_string(self, i): - k = self.__i2k.get(int(i)) - return self._loads(k) - - def _to_string(self, term, txn=None): - k = self._dumps(term) - i = self.__k2i.get(k, txn=txn) - if i is None: - # weird behavoir from bsddb not taking a txn as a keyword argument - # for append - if self.transaction_aware: - i = "%s" % self.__i2k.append(k, txn) - else: - i = "%s" % self.__i2k.append(k) - - self.__k2i.put(k, i, txn=txn) - else: - i = i.decode() - return i - - def __lookup(self, (subject, predicate, object), context, txn=None): - _to_string = self._to_string - if context is not None: - context = _to_string(context, txn=txn) - i = 0 - if subject is not None: - i += 1 - subject = _to_string(subject, txn=txn) - if predicate is not None: - i += 2 - predicate = _to_string(predicate, txn=txn) - if object is not None: - i += 4 - object = _to_string(object, txn=txn) - index, prefix_func, from_key, results_from_key = self.__lookup_dict[i] - #print (subject, predicate, object), context, prefix_func, index #DEBUG - prefix = bb("^".join(prefix_func((subject, predicate, object), context))) - return index, prefix, from_key, results_from_key - - -def to_key_func(i): - def to_key(triple, context): - "Takes a string; returns key" - return b("^").join((context, triple[i%3], triple[(i+1)%3], triple[(i+2)%3], b(""))) # "" to tac on the trailing ^ - return to_key - -def from_key_func(i): - def from_key(key): - "Takes a key; returns string" - parts = key.split(b("^")) - return parts[0], parts[(3-i+0)%3+1], parts[(3-i+1)%3+1], parts[(3-i+2)%3+1] - return from_key - -def results_from_key_func(i, from_string): - def from_key(key, subject, predicate, object, contexts_value): - "Takes a key and subject, predicate, object; returns tuple for yield" - parts = key.split(b("^")) - if subject is None: - # TODO: i & 1: # dis assemble and/or measure to see which is faster - # subject is None or i & 1 - s = from_string(parts[(3-i+0)%3+1]) - else: - s = subject - if predicate is None:#i & 2: - p = from_string(parts[(3-i+1)%3+1]) - else: - p = predicate - if object is None:#i & 4: - o = from_string(parts[(3-i+2)%3+1]) - else: - o = object - return (s, p, o), (from_string(c) for c in contexts_value.split(b("^")) if c) - return from_key - -def readable_index(i): - s, p, o = "?" * 3 - if i & 1: s = "s" - if i & 2: p = "p" - if i & 4: o = "o" - return "%s,%s,%s" % (s, p, o) |