Web   ·   Wiki   ·   Activities   ·   Blog   ·   Lists   ·   Chat   ·   Meeting   ·   Bugs   ·   Git   ·   Translate   ·   Archive   ·   People   ·   Donate
summaryrefslogtreecommitdiffstats
path: root/creactistore/_templates/lib/rdflib/plugins
diff options
context:
space:
mode:
Diffstat (limited to 'creactistore/_templates/lib/rdflib/plugins')
-rw-r--r--creactistore/_templates/lib/rdflib/plugins/__init__.py7
-rw-r--r--creactistore/_templates/lib/rdflib/plugins/memory.py563
-rw-r--r--creactistore/_templates/lib/rdflib/plugins/parsers/__init__.py3
-rw-r--r--creactistore/_templates/lib/rdflib/plugins/parsers/notation3.py2314
-rw-r--r--creactistore/_templates/lib/rdflib/plugins/parsers/nquads.py107
-rw-r--r--creactistore/_templates/lib/rdflib/plugins/parsers/nt.py28
-rw-r--r--creactistore/_templates/lib/rdflib/plugins/parsers/ntriples.py243
-rw-r--r--creactistore/_templates/lib/rdflib/plugins/parsers/rdfa/__init__.py168
-rw-r--r--creactistore/_templates/lib/rdflib/plugins/parsers/rdfa/embeddedrdf.py36
-rw-r--r--creactistore/_templates/lib/rdflib/plugins/parsers/rdfa/literal.py180
-rw-r--r--creactistore/_templates/lib/rdflib/plugins/parsers/rdfa/options.py173
-rw-r--r--creactistore/_templates/lib/rdflib/plugins/parsers/rdfa/parse.py200
-rw-r--r--creactistore/_templates/lib/rdflib/plugins/parsers/rdfa/state.py434
-rw-r--r--creactistore/_templates/lib/rdflib/plugins/parsers/rdfa/transform/__init__.py0
-rw-r--r--creactistore/_templates/lib/rdflib/plugins/parsers/rdfa/transform/headabout.py27
-rw-r--r--creactistore/_templates/lib/rdflib/plugins/parsers/rdfxml.py579
-rw-r--r--creactistore/_templates/lib/rdflib/plugins/parsers/trix.py286
-rw-r--r--creactistore/_templates/lib/rdflib/plugins/serializers/__init__.py0
-rw-r--r--creactistore/_templates/lib/rdflib/plugins/serializers/n3.py123
-rw-r--r--creactistore/_templates/lib/rdflib/plugins/serializers/nquads.py34
-rw-r--r--creactistore/_templates/lib/rdflib/plugins/serializers/nt.py76
-rw-r--r--creactistore/_templates/lib/rdflib/plugins/serializers/rdfxml.py282
-rw-r--r--creactistore/_templates/lib/rdflib/plugins/serializers/trix.py72
-rw-r--r--creactistore/_templates/lib/rdflib/plugins/serializers/turtle.py364
-rw-r--r--creactistore/_templates/lib/rdflib/plugins/serializers/xmlwriter.py103
-rw-r--r--creactistore/_templates/lib/rdflib/plugins/sleepycat.py531
26 files changed, 0 insertions, 6933 deletions
diff --git a/creactistore/_templates/lib/rdflib/plugins/__init__.py b/creactistore/_templates/lib/rdflib/plugins/__init__.py
deleted file mode 100644
index 4622bb0..0000000
--- a/creactistore/_templates/lib/rdflib/plugins/__init__.py
+++ /dev/null
@@ -1,7 +0,0 @@
-"""
-Default plugins for rdflib.
-
-This is a namespace package and contains the default plugins for
-rdflib.
-
-"""
diff --git a/creactistore/_templates/lib/rdflib/plugins/memory.py b/creactistore/_templates/lib/rdflib/plugins/memory.py
deleted file mode 100644
index 3a9d9f8..0000000
--- a/creactistore/_templates/lib/rdflib/plugins/memory.py
+++ /dev/null
@@ -1,563 +0,0 @@
-from __future__ import generators
-from rdflib.term import BNode
-from rdflib.store import Store, NO_STORE, VALID_STORE
-
-__all__ = ['Memory', 'IOMemory']
-
-ANY = Any = None
-
-class Memory(Store):
- """\
- An in memory implementation of a triple store.
-
- This triple store uses nested dictionaries to store triples. Each
- triple is stored in two such indices as follows spo[s][p][o] = 1 and
- pos[p][o][s] = 1.
-
- Authors: Michel Pelletier, Daniel Krech, Stefan Niederhauser
- """
- def __init__(self, configuration=None, identifier=None):
- super(Memory, self).__init__(configuration)
- self.identifier = identifier
-
- # indexed by [subject][predicate][object]
- self.__spo = {}
-
- # indexed by [predicate][object][subject]
- self.__pos = {}
-
- # indexed by [predicate][object][subject]
- self.__osp = {}
-
- self.__namespace = {}
- self.__prefix = {}
-
- def add(self, (subject, predicate, object), context, quoted=False):
- """\
- Add a triple to the store of triples.
- """
- # add dictionary entries for spo[s][p][p] = 1 and pos[p][o][s]
- # = 1, creating the nested dictionaries where they do not yet
- # exits.
- spo = self.__spo
- try:
- po = spo[subject]
- except:
- po = spo[subject] = {}
- try:
- o = po[predicate]
- except:
- o = po[predicate] = {}
- o[object] = 1
-
- pos = self.__pos
- try:
- os = pos[predicate]
- except:
- os = pos[predicate] = {}
- try:
- s = os[object]
- except:
- s = os[object] = {}
- s[subject] = 1
-
- osp = self.__osp
- try:
- sp = osp[object]
- except:
- sp = osp[object] = {}
- try:
- p = sp[subject]
- except:
- p = sp[subject] = {}
- p[predicate] = 1
-
- def remove(self, (subject, predicate, object), context=None):
- for (subject, predicate, object), c in self.triples(
- (subject, predicate, object)):
- del self.__spo[subject][predicate][object]
- del self.__pos[predicate][object][subject]
- del self.__osp[object][subject][predicate]
-
- def triples(self, (subject, predicate, object), context=None):
- """A generator over all the triples matching """
- if subject!=ANY: # subject is given
- spo = self.__spo
- if subject in spo:
- subjectDictionary = spo[subject]
- if predicate!=ANY: # subject+predicate is given
- if predicate in subjectDictionary:
- if object!=ANY: # subject+predicate+object is given
- if object in subjectDictionary[predicate]:
- yield (subject, predicate, object), \
- self.__contexts()
- else: # given object not found
- pass
- else: # subject+predicate is given, object unbound
- for o in subjectDictionary[predicate].keys():
- yield (subject, predicate, o), \
- self.__contexts()
- else: # given predicate not found
- pass
- else: # subject given, predicate unbound
- for p in subjectDictionary.keys():
- if object!=ANY: # object is given
- if object in subjectDictionary[p]:
- yield (subject, p, object), self.__contexts()
- else: # given object not found
- pass
- else: # object unbound
- for o in subjectDictionary[p].keys():
- yield (subject, p, o), self.__contexts()
- else: # given subject not found
- pass
- elif predicate!=ANY: # predicate is given, subject unbound
- pos = self.__pos
- if predicate in pos:
- predicateDictionary = pos[predicate]
- if object!=ANY: # predicate+object is given, subject unbound
- if object in predicateDictionary:
- for s in predicateDictionary[object].keys():
- yield (s, predicate, object), self.__contexts()
- else: # given object not found
- pass
- else: # predicate is given, object+subject unbound
- for o in predicateDictionary.keys():
- for s in predicateDictionary[o].keys():
- yield (s, predicate, o), self.__contexts()
- elif object!=ANY: # object is given, subject+predicate unbound
- osp = self.__osp
- if object in osp:
- objectDictionary = osp[object]
- for s in objectDictionary.keys():
- for p in objectDictionary[s].keys():
- yield (s, p, object), self.__contexts()
- else: # subject+predicate+object unbound
- spo = self.__spo
- for s in spo.keys():
- subjectDictionary = spo[s]
- for p in subjectDictionary.keys():
- for o in subjectDictionary[p].keys():
- yield (s, p, o), self.__contexts()
-
- def __len__(self, context=None):
- #@@ optimize
- i = 0
- for triple in self.triples((None, None, None)):
- i += 1
- return i
-
- def bind(self, prefix, namespace):
- self.__prefix[namespace] = prefix
- self.__namespace[prefix] = namespace
-
- def namespace(self, prefix):
- return self.__namespace.get(prefix, None)
-
- def prefix(self, namespace):
- return self.__prefix.get(namespace, None)
-
- def namespaces(self):
- for prefix, namespace in self.__namespace.iteritems():
- yield prefix, namespace
-
- def __contexts(self):
- return (c for c in []) # TODO: best way to return empty generator
-
-class IOMemory(Store):
- """\
- An integer-key-optimized-context-aware-in-memory store.
-
- Uses nested dictionaries to store triples and context. Each triple
- is stored in six such indices as follows cspo[c][s][p][o] = 1
- and cpos[c][p][o][s] = 1 and cosp[c][o][s][p] = 1 as well as
- spo[s][p][o] = [c] and pos[p][o][s] = [c] and pos[o][s][p] = [c]
-
- Context information is used to track the 'source' of the triple
- data for merging, unmerging, remerging purposes. context aware
- store stores consume more memory size than non context stores.
-
- """
-
- context_aware = True
- formula_aware = True
-
- def __init__(self, configuration=None, identifier=None):
- super(IOMemory, self).__init__()
-
- # indexed by [context][subject][predicate][object] = 1
- self.cspo = self.createIndex()
-
- # indexed by [context][predicate][object][subject] = 1
- self.cpos = self.createIndex()
-
- # indexed by [context][object][subject][predicate] = 1
- self.cosp = self.createIndex()
-
- # indexed by [subject][predicate][object] = [context]
- self.spo = self.createIndex()
-
- # indexed by [predicate][object][subject] = [context]
- self.pos = self.createIndex()
-
- # indexed by [object][subject][predicate] = [context]
- self.osp = self.createIndex()
-
- # indexes integer keys to identifiers
- self.forward = self.createForward()
-
- # reverse index of forward
- self.reverse = self.createReverse()
-
- self.identifier = identifier or BNode()
-
- self.__namespace = self.createPrefixMap()
- self.__prefix = self.createPrefixMap()
-
- def open(self, configuration, create=False):
- if not create:
- # An IOMemory Store never exists.
- return NO_STORE
- else:
- return VALID_STORE
-
- def bind(self, prefix, namespace):
- self.__prefix[namespace] = prefix
- self.__namespace[prefix] = namespace
-
- def namespace(self, prefix):
- return self.__namespace.get(prefix, None)
-
- def prefix(self, namespace):
- return self.__prefix.get(namespace, None)
-
- def namespaces(self):
- for prefix, namespace in self.__namespace.iteritems():
- yield prefix, namespace
-
- def defaultContext(self):
- return self.default_context
-
- def addContext(self, context):
- """ Add context w/o adding statement. Dan you can remove this if you want """
-
- if not self.reverse.has_key(context):
- ci=randid()
- while not self.forward.insert(ci, context):
- ci=randid()
- self.reverse[context] = ci
-
- def intToIdentifier(self, (si, pi, oi)):
- """ Resolve an integer triple into identifers. """
- return (self.forward[si], self.forward[pi], self.forward[oi])
-
- def identifierToInt(self, (s, p, o)):
- """ Resolve an identifier triple into integers. """
- return (self.reverse[s], self.reverse[p], self.reverse[o])
-
- def uniqueSubjects(self, context=None):
- if context is None:
- index = self.spo
- else:
- index = self.cspo[context]
- for si in index.keys():
- yield self.forward[si]
-
- def uniquePredicates(self, context=None):
- if context is None:
- index = self.pos
- else:
- index = self.cpos[context]
- for pi in index.keys():
- yield self.forward[pi]
-
- def uniqueObjects(self, context=None):
- if context is None:
- index = self.osp
- else:
- index = self.cosp[context]
- for oi in index.keys():
- yield self.forward[oi]
-
- def createForward(self):
- return {}
-
- def createReverse(self):
- return {}
-
- def createIndex(self):
- return {}
-
- def createPrefixMap(self):
- return {}
-
- def add(self, triple, context, quoted=False):
- """\
- Add a triple to the store.
- """
- Store.add(self, triple, context, quoted)
- for triple, cg in self.triples(triple, context):
- #triple is already in the store.
- return
-
- subject, predicate, object = triple
-
- f = self.forward
- r = self.reverse
-
- # assign keys for new identifiers
-
- if not r.has_key(subject):
- si=randid()
- while f.has_key(si):
- si=randid()
- f[si] = subject
- r[subject] = si
- else:
- si = r[subject]
-
- if not r.has_key(predicate):
- pi=randid()
- while f.has_key(pi):
- pi=randid()
- f[pi] = predicate
- r[predicate] = pi
- else:
- pi = r[predicate]
-
- if not r.has_key(object):
- oi=randid()
- while f.has_key(oi):
- oi=randid()
- f[oi] = object
- r[object] = oi
- else:
- oi = r[object]
-
- if not r.has_key(context):
- ci=randid()
- while f.has_key(ci):
- ci=randid()
- f[ci] = context
- r[context] = ci
- else:
- ci = r[context]
-
- # add dictionary entries for cspo[c][s][p][o] = 1,
- # cpos[c][p][o][s] = 1, and cosp[c][o][s][p] = 1, creating the
- # nested {} where they do not yet exits.
- self._setNestedIndex(self.cspo, ci, si, pi, oi)
- self._setNestedIndex(self.cpos, ci, pi, oi, si)
- self._setNestedIndex(self.cosp, ci, oi, si, pi)
-
- if not quoted:
- self._setNestedIndex(self.spo, si, pi, oi, ci)
- self._setNestedIndex(self.pos, pi, oi, si, ci)
- self._setNestedIndex(self.osp, oi, si, pi, ci)
-
- def _setNestedIndex(self, index, *keys):
- for key in keys[:-1]:
- if not index.has_key(key):
- index[key] = self.createIndex()
- index = index[key]
- index[keys[-1]] = 1
-
-
- def _removeNestedIndex(self, index, *keys):
- """ Remove context from the list of contexts in a nested index.
-
- Afterwards, recursively remove nested indexes when they became empty.
- """
- parents = []
- for key in keys[:-1]:
- parents.append(index)
- index = index[key]
- del index[keys[-1]]
-
- n = len(parents)
- for i in xrange(n):
- index = parents[n-1-i]
- key = keys[n-1-i]
- if len(index[key]) == 0:
- del index[key]
-
- def remove(self, triple, context=None):
- Store.remove(self, triple, context)
- if context is not None:
- if context == self:
- context = None
-
- f = self.forward
- r = self.reverse
- if context is None:
- for triple, cg in self.triples(triple):
- subject, predicate, object = triple
- si, pi, oi = self.identifierToInt((subject, predicate, object))
- contexts = list(self.contexts(triple))
- for context in contexts:
- ci = r[context]
- del self.cspo[ci][si][pi][oi]
- del self.cpos[ci][pi][oi][si]
- del self.cosp[ci][oi][si][pi]
-
- self._removeNestedIndex(self.spo, si, pi, oi, ci)
- self._removeNestedIndex(self.pos, pi, oi, si, ci)
- self._removeNestedIndex(self.osp, oi, si, pi, ci)
- # grr!! hafta ref-count these before you can collect them dumbass!
- #del f[si], f[pi], f[oi]
- #del r[subject], r[predicate], r[object]
- else:
- subject, predicate, object = triple
- ci = r.get(context, None)
- if ci:
- for triple, cg in self.triples(triple, context):
- si, pi, oi = self.identifierToInt(triple)
- del self.cspo[ci][si][pi][oi]
- del self.cpos[ci][pi][oi][si]
- del self.cosp[ci][oi][si][pi]
-
- try:
- self._removeNestedIndex(self.spo, si, pi, oi, ci)
- self._removeNestedIndex(self.pos, pi, oi, si, ci)
- self._removeNestedIndex(self.osp, oi, si, pi, ci)
- except KeyError:
- # the context may be a quoted one in which
- # there will not be a triple in spo, pos or
- # osp. So ignore any KeyErrors
- pass
- # TODO delete references to resources in self.forward/self.reverse
- # that are not in use anymore...
-
- if subject is None and predicate is None and object is None:
- # remove context
- try:
- ci = self.reverse[context]
- del self.cspo[ci], self.cpos[ci], self.cosp[ci]
- except KeyError:
- # TODO: no exception when removing non-existant context?
- pass
-
-
- def triples(self, triple, context=None):
- """A generator over all the triples matching """
-
- if context is not None:
- if context == self:
- context = None
-
- subject, predicate, object = triple
- ci = si = pi = oi = Any
-
- if context is None:
- spo = self.spo
- pos = self.pos
- osp = self.osp
- else:
- try:
- ci = self.reverse[context] # TODO: Really ignore keyerror here
- spo = self.cspo[ci]
- pos = self.cpos[ci]
- osp = self.cosp[ci]
- except KeyError:
- return
- try:
- if subject is not Any:
- si = self.reverse[subject] # throws keyerror if subject doesn't exist ;(
- if predicate is not Any:
- pi = self.reverse[predicate]
- if object is not Any:
- oi = self.reverse[object]
- except KeyError, e:
- return #raise StopIteration
-
- if si != Any: # subject is given
- if spo.has_key(si):
- subjectDictionary = spo[si]
- if pi != Any: # subject+predicate is given
- if subjectDictionary.has_key(pi):
- if oi!= Any: # subject+predicate+object is given
- if subjectDictionary[pi].has_key(oi):
- ss, pp, oo = self.intToIdentifier((si, pi, oi))
- yield (ss, pp, oo), (c for c in self.contexts((ss, pp, oo)))
- else: # given object not found
- pass
- else: # subject+predicate is given, object unbound
- for o in subjectDictionary[pi].keys():
- ss, pp, oo = self.intToIdentifier((si, pi, o))
- yield (ss, pp, oo), (c for c in self.contexts((ss, pp, oo)))
- else: # given predicate not found
- pass
- else: # subject given, predicate unbound
- for p in subjectDictionary.keys():
- if oi != Any: # object is given
- if subjectDictionary[p].has_key(oi):
- ss, pp, oo = self.intToIdentifier((si, p, oi))
- yield (ss, pp, oo), (c for c in self.contexts((ss, pp, oo)))
- else: # given object not found
- pass
- else: # object unbound
- for o in subjectDictionary[p].keys():
- ss, pp, oo = self.intToIdentifier((si, p, o))
- yield (ss, pp, oo), (c for c in self.contexts((ss, pp, oo)))
- else: # given subject not found
- pass
- elif pi != Any: # predicate is given, subject unbound
- if pos.has_key(pi):
- predicateDictionary = pos[pi]
- if oi != Any: # predicate+object is given, subject unbound
- if predicateDictionary.has_key(oi):
- for s in predicateDictionary[oi].keys():
- ss, pp, oo = self.intToIdentifier((s, pi, oi))
- yield (ss, pp, oo), (c for c in self.contexts((ss, pp, oo)))
- else: # given object not found
- pass
- else: # predicate is given, object+subject unbound
- for o in predicateDictionary.keys():
- for s in predicateDictionary[o].keys():
- ss, pp, oo = self.intToIdentifier((s, pi, o))
- yield (ss, pp, oo), (c for c in self.contexts((ss, pp, oo)))
- elif oi != Any: # object is given, subject+predicate unbound
- if osp.has_key(oi):
- objectDictionary = osp[oi]
- for s in objectDictionary.keys():
- for p in objectDictionary[s].keys():
- ss, pp, oo = self.intToIdentifier((s, p, oi))
- yield (ss, pp, oo), (c for c in self.contexts((ss, pp, oo)))
- else: # subject+predicate+object unbound
- for s in spo.keys():
- subjectDictionary = spo[s]
- for p in subjectDictionary.keys():
- for o in subjectDictionary[p].keys():
- ss, pp, oo = self.intToIdentifier((s, p, o))
- yield (ss, pp, oo), (c for c in self.contexts((ss, pp, oo)))
-
- def __len__(self, context=None):
-
- if context is not None:
- if context == self:
- context = None
-
- # TODO: for eff. implementation
- count = 0
- for triple, cg in self.triples((Any, Any, Any), context):
- count += 1
- return count
-
- def contexts(self, triple=None):
- if triple:
- si, pi, oi = self.identifierToInt(triple)
- for ci in self.spo[si][pi][oi]:
- yield self.forward[ci]
- else:
- for ci in self.cspo.keys():
- yield self.forward[ci]
-
-
-
-
-import random
-
-def randid(randint=random.randint, choice=random.choice, signs=(-1,1)):
- return choice(signs)*randint(1,2000000000)
-
-del random
diff --git a/creactistore/_templates/lib/rdflib/plugins/parsers/__init__.py b/creactistore/_templates/lib/rdflib/plugins/parsers/__init__.py
deleted file mode 100644
index 8062daa..0000000
--- a/creactistore/_templates/lib/rdflib/plugins/parsers/__init__.py
+++ /dev/null
@@ -1,3 +0,0 @@
-"""
-
-"""
diff --git a/creactistore/_templates/lib/rdflib/plugins/parsers/notation3.py b/creactistore/_templates/lib/rdflib/plugins/parsers/notation3.py
deleted file mode 100644
index ac48340..0000000
--- a/creactistore/_templates/lib/rdflib/plugins/parsers/notation3.py
+++ /dev/null
@@ -1,2314 +0,0 @@
-#!/usr/bin/env python
-u"""
-notation3.py - Standalone Notation3 Parser
-Derived from CWM, the Closed World Machine
-
-Authors of the original suite:
-
-* Dan Connolly <@@>
-* Tim Berners-Lee <@@>
-* Yosi Scharf <@@>
-* Joseph M. Reagle Jr. <reagle@w3.org>
-* Rich Salz <rsalz@zolera.com>
-
-http://www.w3.org/2000/10/swap/notation3.py
-
-Copyright 2000-2007, World Wide Web Consortium.
-Copyright 2001, MIT.
-Copyright 2001, Zolera Systems Inc.
-
-License: W3C Software License
-http://www.w3.org/Consortium/Legal/copyright-software
-
-Modified by Sean B. Palmer
-Copyright 2007, Sean B. Palmer. \u32E1
-
-Modified to work with rdflib by Gunnar Aastrand Grimnes
-Copyright 2010, Gunnar A. Grimnes
-
-"""
-
-# Python standard libraries
-import types
-import sys
-import os
-import string
-import re
-import time
-import StringIO
-import codecs
-
-from binascii import a2b_hex
-from decimal import Decimal
-
-from rdflib.term import URIRef, BNode, Literal, Variable, _XSD_PFX, _unique_id
-from rdflib.graph import QuotedGraph, ConjunctiveGraph
-from rdflib import py3compat
-b = py3compat.b
-
-__all__ = ['URISyntaxError', 'BadSyntax', 'N3Parser', "verbosity", "setVerbosity", "progress", "splitFrag", "splitFragP", "join", "refTo", "base", "canonical", "runNamespace", "uniqueURI", "Canonicalize", "stripCR", "dummyWrite", "toBool", "stringToN3", "backslashUify", "hexify", "dummy"]
-
-from rdflib.parser import Parser
-
-# Incestuous.. would be nice to separate N3 and XML
-# from sax2rdf import XMLtoDOM
-def XMLtoDOM(*args, **kargs):
- # print >> sys.stderr, args, kargs
- pass
-
-# SWAP http://www.w3.org/2000/10/swap
-# from diag import verbosity, setVerbosity, progress
-def verbosity(*args, **kargs):
- # print >> sys.stderr, args, kargs
- pass
-def setVerbosity(*args, **kargs):
- # print >> sys.stderr, args, kargs
- pass
-def progress(*args, **kargs):
- # print >> sys.stderr, args, kargs
- pass
-
-
-
-def splitFrag(uriref):
- """split a URI reference between the fragment and the rest.
-
- Punctuation is thrown away.
-
- e.g.
-
- >>> splitFrag("abc#def")
- ('abc', 'def')
-
- >>> splitFrag("abcdef")
- ('abcdef', None)
-
- """
-
- i = uriref.rfind("#")
- if i>= 0: return uriref[:i], uriref[i+1:]
- else: return uriref, None
-
-def splitFragP(uriref, punct=0):
- """split a URI reference before the fragment
-
- Punctuation is kept.
-
- e.g.
-
- >>> splitFragP("abc#def")
- ('abc', '#def')
-
- >>> splitFragP("abcdef")
- ('abcdef', '')
-
- """
-
- i = uriref.rfind("#")
- if i>= 0: return uriref[:i], uriref[i:]
- else: return uriref, ''
-
-@py3compat.format_doctest_out
-def join(here, there):
- """join an absolute URI and URI reference
- (non-ascii characters are supported/doctested;
- haven't checked the details of the IRI spec though)
-
- here is assumed to be absolute.
- there is URI reference.
-
- >>> join('http://example/x/y/z', '../abc')
- 'http://example/x/abc'
-
- Raise ValueError if there uses relative path
- syntax but here has no hierarchical path.
-
- >>> join('mid:foo@example', '../foo')
- Traceback (most recent call last):
- raise ValueError, here
- ValueError: Base <mid:foo@example> has no slash after colon - with relative '../foo'.
-
- >>> join('http://example/x/y/z', '')
- 'http://example/x/y/z'
-
- >>> join('mid:foo@example', '#foo')
- 'mid:foo@example#foo'
-
- We grok IRIs
-
- >>> len(u'Andr\\xe9')
- 5
-
- >>> join('http://example.org/', u'#Andr\\xe9')
- %(u)s'http://example.org/#Andr\\xe9'
- """
-
- assert(here.find("#") < 0), "Base may not contain hash: '%s'"% here # caller must splitFrag (why?)
-
- slashl = there.find('/')
- colonl = there.find(':')
-
- # join(base, 'foo:/') -- absolute
- if colonl >= 0 and (slashl < 0 or colonl < slashl):
- return there
-
- bcolonl = here.find(':')
- assert(bcolonl >= 0), "Base uri '%s' is not absolute" % here # else it's not absolute
-
- path, frag = splitFragP(there)
- if not path: return here + frag
-
- # join('mid:foo@example', '../foo') bzzt
- if here[bcolonl+1:bcolonl+2] <> '/':
- raise ValueError ("Base <%s> has no slash after colon - with relative '%s'." %(here, there))
-
- if here[bcolonl+1:bcolonl+3] == '//':
- bpath = here.find('/', bcolonl+3)
- else:
- bpath = bcolonl+1
-
- # join('http://xyz', 'foo')
- if bpath < 0:
- bpath = len(here)
- here = here + '/'
-
- # join('http://xyz/', '//abc') => 'http://abc'
- if there[:2] == '//':
- return here[:bcolonl+1] + there
-
- # join('http://xyz/', '/abc') => 'http://xyz/abc'
- if there[:1] == '/':
- return here[:bpath] + there
-
- slashr = here.rfind('/')
-
- while 1:
- if path[:2] == './':
- path = path[2:]
- if path == '.':
- path = ''
- elif path[:3] == '../' or path == '..':
- path = path[3:]
- i = here.rfind('/', bpath, slashr)
- if i >= 0:
- here = here[:i+1]
- slashr = i
- else:
- break
-
- return here[:slashr+1] + path + frag
-
-commonHost = re.compile(r'^[-_a-zA-Z0-9.]+:(//[^/]*)?/[^/]*$')
-
-def refTo(base, uri):
- """figure out a relative URI reference from base to uri
-
- >>> refTo('http://example/x/y/z', 'http://example/x/abc')
- '../abc'
-
- >>> refTo('file:/ex/x/y', 'file:/ex/x/q/r#s')
- 'q/r#s'
-
- >>> refTo(None, 'http://ex/x/y')
- 'http://ex/x/y'
-
- >>> refTo('http://ex/x/y', 'http://ex/x/y')
- ''
-
- Note the relationship between refTo and join:
- join(x, refTo(x, y)) == y
- which points out certain strings which cannot be URIs. e.g.
- >>> x='http://ex/x/y';y='http://ex/x/q:r';join(x, refTo(x, y)) == y
- 0
-
- So 'http://ex/x/q:r' is not a URI. Use 'http://ex/x/q%3ar' instead:
- >>> x='http://ex/x/y';y='http://ex/x/q%3ar';join(x, refTo(x, y)) == y
- 1
-
- This one checks that it uses a root-realtive one where that is
- all they share. Now uses root-relative where no path is shared.
- This is a matter of taste but tends to give more resilience IMHO
- -- and shorter paths
-
- Note that base may be None, meaning no base. In some situations, there
- just ain't a base. Slife. In these cases, relTo returns the absolute value.
- The axiom abs(,rel(b,x))=x still holds.
- This saves people having to set the base to "bogus:".
-
- >>> refTo('http://ex/x/y/z', 'http://ex/r')
- '/r'
-
- """
-
-# assert base # don't mask bugs -danc # not a bug. -tim
- if not base: return uri
- if base == uri: return ""
-
- # Find how many path segments in common
- i=0
- while i<len(uri) and i<len(base):
- if uri[i] == base[i]: i = i + 1
- else: break
- # print "# relative", base, uri, " same up to ", i
- # i point to end of shortest one or first difference
-
- m = commonHost.match(base[:i])
- if m:
- k=uri.find("//")
- if k<0: k=-2 # no host
- l=uri.find("/", k+2)
- if uri[l+1:l+2] != "/" and base[l+1:l+2] != "/" and uri[:l]==base[:l]:
- return uri[l:]
-
- if uri[i:i+1] =="#" and len(base) == i: return uri[i:] # fragment of base
-
- while i>0 and uri[i-1] != '/' : i=i-1 # scan for slash
-
- if i < 3: return uri # No way.
- if base.find("//", i-2)>0 \
- or uri.find("//", i-2)>0: return uri # An unshared "//"
- if base.find(":", i)>0: return uri # An unshared ":"
- n = base.count("/", i)
- if n == 0 and i<len(uri) and uri[i] == '#':
- return "./" + uri[i:]
- elif n == 0 and i == len(uri):
- return "./"
- else:
- return ("../" * n) + uri[i:]
-
-
-def base():
- """The base URI for this process - the Web equiv of cwd
-
- Relative or abolute unix-standard filenames parsed relative to
- this yeild the URI of the file.
- If we had a reliable way of getting a computer name,
- we should put it in the hostname just to prevent ambiguity
-
- """
-# return "file://" + hostname + os.getcwd() + "/"
- return "file://" + _fixslash(os.getcwd()) + "/"
-
-
-def _fixslash(str):
- """ Fix windowslike filename to unixlike - (#ifdef WINDOWS)"""
- s = str
- for i in range(len(s)):
- if s[i] == "\\": s = s[:i] + "/" + s[i+1:]
- if s[0] != "/" and s[1] == ":": s = s[2:] # @@@ Hack when drive letter present
- return s
-
-URI_unreserved = b("ABCDEFGHIJJLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-._~")
- # unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~"
-
-@py3compat.format_doctest_out
-def canonical(str_in):
- """Convert equivalent URIs (or parts) to the same string
-
- There are many differenet levels of URI canonicalization
- which are possible. See http://www.ietf.org/rfc/rfc3986.txt
- Done:
- - Converfting unicode IRI to utf-8
- - Escaping all non-ASCII
- - De-escaping, if escaped, ALPHA (%%41-%%5A and %%61-%%7A), DIGIT (%%30-%%39),
- hyphen (%%2D), period (%%2E), underscore (%%5F), or tilde (%%7E) (Sect 2.4)
- - Making all escapes uppercase hexadecimal
-
- Not done:
- - Making URI scheme lowercase
- - changing /./ or /foo/../ to / with care not to change host part
-
-
- >>> canonical("foo bar")
- %(b)s'foo%%20bar'
-
- >>> canonical(u'http:')
- %(b)s'http:'
-
- >>> canonical('fran%%c3%%83%%c2%%a7ois')
- %(b)s'fran%%C3%%83%%C2%%A7ois'
-
- >>> canonical('a')
- %(b)s'a'
-
- >>> canonical('%%4e')
- %(b)s'N'
-
- >>> canonical('%%9d')
- %(b)s'%%9D'
-
- >>> canonical('%%2f')
- %(b)s'%%2F'
-
- >>> canonical('%%2F')
- %(b)s'%%2F'
-
- """
- if type(str_in) == type(u''):
- s8 = str_in.encode('utf-8')
- else:
- s8 = str_in
- s = b('')
- i = 0
- while i < len(s8):
- if py3compat.PY3:
- n = s8[i]; ch = bytes([n])
- else:
- ch = s8[i]; n = ord(ch)
- if (n > 126) or (n < 33) : # %-encode controls, SP, DEL, and utf-8
- s += b("%%%02X" % ord(ch))
- elif ch == b('%') and i+2 < len(s8):
- ch2 = a2b_hex(s8[i+1:i+3])
- if ch2 in URI_unreserved: s += ch2
- else: s += b("%%%02X" % ord(ch2))
- i = i+3
- continue
- else:
- s += ch
- i = i +1
- return s
-
-
-
-
-
-
-CONTEXT = 0
-PRED = 1
-SUBJ = 2
-OBJ = 3
-
-PARTS = PRED, SUBJ, OBJ
-ALL4 = CONTEXT, PRED, SUBJ, OBJ
-
-SYMBOL = 0
-FORMULA = 1
-LITERAL = 2
-LITERAL_DT = 21
-LITERAL_LANG = 22
-ANONYMOUS = 3
-XMLLITERAL = 25
-
-Logic_NS = "http://www.w3.org/2000/10/swap/log#"
-NODE_MERGE_URI = Logic_NS + "is" # Pseudo-property indicating node merging
-forSomeSym = Logic_NS + "forSome"
-forAllSym = Logic_NS + "forAll"
-
-RDF_type_URI = "http://www.w3.org/1999/02/22-rdf-syntax-ns#type"
-RDF_NS_URI = "http://www.w3.org/1999/02/22-rdf-syntax-ns#"
-OWL_NS = "http://www.w3.org/2002/07/owl#"
-DAML_sameAs_URI = OWL_NS+"sameAs"
-parsesTo_URI = Logic_NS + "parsesTo"
-RDF_spec = "http://www.w3.org/TR/REC-rdf-syntax/"
-
-List_NS = RDF_NS_URI # From 20030808
-_Old_Logic_NS = "http://www.w3.org/2000/10/swap/log.n3#"
-
-N3_first = (SYMBOL, List_NS + "first")
-N3_rest = (SYMBOL, List_NS + "rest")
-N3_li = (SYMBOL, List_NS + "li")
-N3_nil = (SYMBOL, List_NS + "nil")
-N3_List = (SYMBOL, List_NS + "List")
-N3_Empty = (SYMBOL, List_NS + "Empty")
-
-
-
-runNamespaceValue = None
-
-def runNamespace():
- "Return a URI suitable as a namespace for run-local objects"
- # @@@ include hostname (privacy?) (hash it?)
- global runNamespaceValue
- if runNamespaceValue == None:
- runNamespaceValue = join(base(), _unique_id()) + '#'
- return runNamespaceValue
-
-nextu = 0
-def uniqueURI():
- "A unique URI"
- global nextu
- nextu += 1
- return runNamespace() + "u_" + `nextu`
-
-class URISyntaxError(ValueError):
- """A parameter is passed to a routine that requires a URI reference"""
- pass
-
-
-tracking = False
-chatty_flag = 50
-
-
-from xml.dom import Node
-try:
- from xml.ns import XMLNS
-except:
- class XMLNS:
- BASE = "http://www.w3.org/2000/xmlns/"
- XML = "http://www.w3.org/XML/1998/namespace"
-
-
-_attrs = lambda E: (E.attributes and E.attributes.values()) or []
-_children = lambda E: E.childNodes or []
-_IN_XML_NS = lambda n: n.namespaceURI == XMLNS.XML
-_inclusive = lambda n: n.unsuppressedPrefixes == None
-
-# Does a document/PI has lesser/greater document order than the
-# first element?
-_LesserElement, _Element, _GreaterElement = range(3)
-
-def _sorter(n1,n2):
- '''_sorter(n1,n2) -> int
- Sorting predicate for non-NS attributes.'''
-
- i = cmp(n1.namespaceURI, n2.namespaceURI)
- if i: return i
- return cmp(n1.localName, n2.localName)
-
-
-def _sorter_ns(n1,n2):
- '''_sorter_ns((n,v),(n,v)) -> int
- "(an empty namespace URI is lexicographically least)."'''
-
- if n1[0] == 'xmlns': return -1
- if n2[0] == 'xmlns': return 1
- return cmp(n1[0], n2[0])
-
-def _utilized(n, node, other_attrs, unsuppressedPrefixes):
- '''_utilized(n, node, other_attrs, unsuppressedPrefixes) -> boolean
- Return true if that nodespace is utilized within the node'''
-
- if n.startswith('xmlns:'):
- n = n[6:]
- elif n.startswith('xmlns'):
- n = n[5:]
- if (n=="" and node.prefix in ["#default", None]) or \
- n == node.prefix or n in unsuppressedPrefixes:
- return 1
- for attr in other_attrs:
- if n == attr.prefix: return 1
- return 0
-
-#_in_subset = lambda subset, node: not subset or node in subset
-_in_subset = lambda subset, node: subset is None or node in subset # rich's tweak
-
-class _implementation:
- '''Implementation class for C14N. This accompanies a node during it's
- processing and includes the parameters and processing state.'''
-
- # Handler for each node type; populated during module instantiation.
- handlers = {}
-
- def __init__(self, node, write, **kw):
- '''Create and run the implementation.'''
- self.write = write
- self.subset = kw.get('subset')
- self.comments = kw.get('comments', 0)
- self.unsuppressedPrefixes = kw.get('unsuppressedPrefixes')
- nsdict = kw.get('nsdict', { 'xml': XMLNS.XML, 'xmlns': XMLNS.BASE })
-
- # Processing state.
- self.state = (nsdict, {'xml':''}, {}) #0422
-
- if node.nodeType == Node.DOCUMENT_NODE:
- self._do_document(node)
- elif node.nodeType == Node.ELEMENT_NODE:
- self.documentOrder = _Element # At document element
- if not _inclusive(self):
- self._do_element(node)
- else:
- inherited = self._inherit_context(node)
- self._do_element(node, inherited)
- elif node.nodeType == Node.DOCUMENT_TYPE_NODE:
- pass
- elif node.nodeType == Node.TEXT_NODE:
- self._do_text(node)
- else:
- raise TypeError, str(node)
-
-
- def _inherit_context(self, node):
- '''_inherit_context(self, node) -> list
- Scan ancestors of attribute and namespace context. Used only
- for single element node canonicalization, not for subset
- canonicalization.'''
-
- # Collect the initial list of xml:foo attributes.
- xmlattrs = filter(_IN_XML_NS, _attrs(node))
-
- # Walk up and get all xml:XXX attributes we inherit.
- inherited, parent = [], node.parentNode
- while parent and parent.nodeType == Node.ELEMENT_NODE:
- for a in filter(_IN_XML_NS, _attrs(parent)):
- n = a.localName
- if n not in xmlattrs:
- xmlattrs.append(n)
- inherited.append(a)
- parent = parent.parentNode
- return inherited
-
-
- def _do_document(self, node):
- '''_do_document(self, node) -> None
- Process a document node. documentOrder holds whether the document
- element has been encountered such that PIs/comments can be written
- as specified.'''
-
- self.documentOrder = _LesserElement
- for child in node.childNodes:
- if child.nodeType == Node.ELEMENT_NODE:
- self.documentOrder = _Element # At document element
- self._do_element(child)
- self.documentOrder = _GreaterElement # After document element
- elif child.nodeType == Node.PROCESSING_INSTRUCTION_NODE:
- self._do_pi(child)
- elif child.nodeType == Node.COMMENT_NODE:
- self._do_comment(child)
- elif child.nodeType == Node.DOCUMENT_TYPE_NODE:
- pass
- else:
- raise TypeError, str(child)
- handlers[Node.DOCUMENT_NODE] = _do_document
-
-
- def _do_text(self, node):
- '''_do_text(self, node) -> None
- Process a text or CDATA node. Render various special characters
- as their C14N entity representations.'''
- if not _in_subset(self.subset, node): return
- s = node.data.replace("&", "&amp;")
- s = s.replace("<", "&lt;")
- s = s.replace(">", "&gt;")
- s = s.replace("\015", "&#xD;")
- if s: self.write(s)
- handlers[Node.TEXT_NODE] = _do_text
- handlers[Node.CDATA_SECTION_NODE] = _do_text
-
-
- def _do_pi(self, node):
- '''_do_pi(self, node) -> None
- Process a PI node. Render a leading or trailing #xA if the
- document order of the PI is greater or lesser (respectively)
- than the document element.
- '''
- if not _in_subset(self.subset, node): return
- W = self.write
- if self.documentOrder == _GreaterElement: W('\n')
- W('<?')
- W(node.nodeName)
- s = node.data
- if s:
- W(' ')
- W(s)
- W('?>')
- if self.documentOrder == _LesserElement: W('\n')
- handlers[Node.PROCESSING_INSTRUCTION_NODE] = _do_pi
-
-
- def _do_comment(self, node):
- '''_do_comment(self, node) -> None
- Process a comment node. Render a leading or trailing #xA if the
- document order of the comment is greater or lesser (respectively)
- than the document element.
- '''
- if not _in_subset(self.subset, node): return
- if self.comments:
- W = self.write
- if self.documentOrder == _GreaterElement: W('\n')
- W('<!--')
- W(node.data)
- W('-->')
- if self.documentOrder == _LesserElement: W('\n')
- handlers[Node.COMMENT_NODE] = _do_comment
-
-
- def _do_attr(self, n, value):
- ''''_do_attr(self, node) -> None
- Process an attribute.'''
-
- W = self.write
- W(' ')
- W(n)
- W('="')
- s = value.replace(value, "&", "&amp;")
- s = s.replace("<", "&lt;")
- s = s.replace('"', '&quot;')
- s = s.replace('\011', '&#x9')
- s = s.replace('\012', '&#xA')
- s = s.replace('\015', '&#xD')
- W(s)
- W('"')
-
-
- def _do_element(self, node, initial_other_attrs = []):
- '''_do_element(self, node, initial_other_attrs = []) -> None
- Process an element (and its children).'''
-
- # Get state (from the stack) make local copies.
- # ns_parent -- NS declarations in parent
- # ns_rendered -- NS nodes rendered by ancestors
- # ns_local -- NS declarations relevant to this element
- # xml_attrs -- Attributes in XML namespace from parent
- # xml_attrs_local -- Local attributes in XML namespace.
- ns_parent, ns_rendered, xml_attrs = \
- self.state[0], self.state[1].copy(), self.state[2].copy() #0422
- ns_local = ns_parent.copy()
- xml_attrs_local = {}
-
- # progress("_do_element node.nodeName=", node.nodeName)
- # progress("_do_element node.namespaceURI", node.namespaceURI)
- # progress("_do_element node.tocml()", node.toxml())
- # Divide attributes into NS, XML, and others.
- other_attrs = initial_other_attrs[:]
- in_subset = _in_subset(self.subset, node)
- for a in _attrs(node):
- # progress("\t_do_element a.nodeName=", a.nodeName)
- if a.namespaceURI == XMLNS.BASE:
- n = a.nodeName
- if n == "xmlns:": n = "xmlns" # DOM bug workaround
- ns_local[n] = a.nodeValue
- elif a.namespaceURI == XMLNS.XML:
- if _inclusive(self) or in_subset:
- xml_attrs_local[a.nodeName] = a #0426
- else:
- other_attrs.append(a)
- #add local xml:foo attributes to ancestor's xml:foo attributes
- xml_attrs.update(xml_attrs_local)
-
- # Render the node
- W, name = self.write, None
- if in_subset:
- name = node.nodeName
- W('<')
- W(name)
-
- # Create list of NS attributes to render.
- ns_to_render = []
- for n,v in ns_local.items():
-
- # If default namespace is XMLNS.BASE or empty,
- # and if an ancestor was the same
- if n == "xmlns" and v in [ XMLNS.BASE, '' ] \
- and ns_rendered.get('xmlns') in [ XMLNS.BASE, '', None ]:
- continue
-
- # "omit namespace node with local name xml, which defines
- # the xml prefix, if its string value is
- # http://www.w3.org/XML/1998/namespace."
- if n in ["xmlns:xml", "xml"] \
- and v in [ 'http://www.w3.org/XML/1998/namespace' ]:
- continue
-
-
- # If not previously rendered
- # and it's inclusive or utilized
- if (n,v) not in ns_rendered.items() \
- and (_inclusive(self) or \
- _utilized(n, node, other_attrs, self.unsuppressedPrefixes)):
- ns_to_render.append((n, v))
-
- # Sort and render the ns, marking what was rendered.
- ns_to_render.sort(_sorter_ns)
- for n,v in ns_to_render:
- self._do_attr(n, v)
- ns_rendered[n]=v #0417
-
- # If exclusive or the parent is in the subset, add the local xml attributes
- # Else, add all local and ancestor xml attributes
- # Sort and render the attributes.
- if not _inclusive(self) or _in_subset(self.subset,node.parentNode): #0426
- other_attrs.extend(xml_attrs_local.values())
- else:
- other_attrs.extend(xml_attrs.values())
- other_attrs.sort(_sorter)
- for a in other_attrs:
- self._do_attr(a.nodeName, a.value)
- W('>')
-
- # Push state, recurse, pop state.
- state, self.state = self.state, (ns_local, ns_rendered, xml_attrs)
- for c in _children(node):
- _implementation.handlers[c.nodeType](self, c)
- self.state = state
-
- if name: W('</%s>' % name)
- handlers[Node.ELEMENT_NODE] = _do_element
-
-
-def Canonicalize(node, output=None, **kw):
- '''Canonicalize(node, output=None, **kw) -> UTF-8
-
- Canonicalize a DOM document/element node and all descendents.
- Return the text; if output is specified then output.write will
- be called to output the text and None will be returned
- Keyword parameters:
- nsdict -- a dictionary of prefix:uri namespace entries
- assumed to exist in the surrounding context
- comments -- keep comments if non-zero (default is 0)
- subset -- Canonical XML subsetting resulting from XPath (default is [])
- unsuppressedPrefixes -- do exclusive C14N, and this specifies the
- prefixes that should be inherited.
- '''
- if output:
- apply(_implementation, (node, output.write), kw)
- else:
- s = StringIO.StringIO()
- apply(_implementation, (node, s.write), kw)
- return s.getvalue()
-
-# end of xmlC14n.py
-
-# from why import BecauseOfData, becauseSubexpression
-def BecauseOfData(*args, **kargs):
- # print args, kargs
- pass
-def becauseSubexpression(*args, **kargs):
- # print args, kargs
- pass
-
-N3_forSome_URI = forSomeSym
-N3_forAll_URI = forAllSym
-
-# Magic resources we know about
-
-
-
-ADDED_HASH = "#" # Stop where we use this in case we want to remove it!
-# This is the hash on namespace URIs
-
-RDF_type = ( SYMBOL , RDF_type_URI )
-DAML_sameAs = ( SYMBOL, DAML_sameAs_URI )
-
-LOG_implies_URI = "http://www.w3.org/2000/10/swap/log#implies"
-
-BOOLEAN_DATATYPE = _XSD_PFX + "boolean"
-DECIMAL_DATATYPE = _XSD_PFX + "decimal"
-DOUBLE_DATATYPE = _XSD_PFX + "double"
-FLOAT_DATATYPE = _XSD_PFX + "float"
-INTEGER_DATATYPE = _XSD_PFX + "integer"
-
-option_noregen = 0 # If set, do not regenerate genids on output
-
-# @@ I18n - the notname chars need extending for well known unicode non-text
-# characters. The XML spec switched to assuming unknown things were name
-# characaters.
-# _namechars = string.lowercase + string.uppercase + string.digits + '_-'
-_notQNameChars = "\t\r\n !\"#$%&'()*.,+/;<=>?@[\\]^`{|}~" # else valid qname :-/
-_notNameChars = _notQNameChars + ":" # Assume anything else valid name :-/
-_rdfns = 'http://www.w3.org/1999/02/22-rdf-syntax-ns#'
-
-
-N3CommentCharacter = "#" # For unix script #! compatabilty
-
-########################################## Parse string to sink
-#
-# Regular expressions:
-eol = re.compile(r'[ \t]*(#[^\n]*)?\r?\n') # end of line, poss. w/comment
-eof = re.compile(r'[ \t]*(#[^\n]*)?$') # end of file, poss. w/comment
-ws = re.compile(r'[ \t]*') # Whitespace not including NL
-signed_integer = re.compile(r'[-+]?[0-9]+') # integer
-number_syntax = re.compile(r'(?P<integer>[-+]?[0-9]+)(?P<decimal>\.[0-9]+)?(?P<exponent>e[-+]?[0-9]+)?')
-digitstring = re.compile(r'[0-9]+') # Unsigned integer
-interesting = re.compile(r'[\\\r\n\"]')
-langcode = re.compile(r'[a-zA-Z0-9]+(-[a-zA-Z0-9]+)?')
-#"
-
-
-
-class SinkParser:
- def __init__(self, store, openFormula=None, thisDoc="", baseURI=None,
- genPrefix = "", flags="",
- why=None):
- """ note: namespace names should *not* end in #;
- the # will get added during qname processing """
-
- self._bindings = {}
- self._flags = flags
- if thisDoc != "":
- assert ':' in thisDoc, "Document URI not absolute: <%s>" % thisDoc
- self._bindings[""] = thisDoc + "#" # default
-
- self._store = store
- if genPrefix: store.setGenPrefix(genPrefix) # pass it on
-
- self._thisDoc = thisDoc
- self.lines = 0 # for error handling
- self.startOfLine = 0 # For calculating character number
- self._genPrefix = genPrefix
- self.keywords = ['a', 'this', 'bind', 'has', 'is', 'of', 'true', 'false' ]
- self.keywordsSet = 0 # Then only can others be considerd qnames
- self._anonymousNodes = {} # Dict of anon nodes already declared ln: Term
- self._variables = {}
- self._parentVariables = {}
- self._reason = why # Why the parser was asked to parse this
-
- self._reason2 = None # Why these triples
- # was: diag.tracking
- if tracking: self._reason2 = BecauseOfData(
- store.newSymbol(thisDoc), because=self._reason)
-
- if baseURI: self._baseURI = baseURI
- else:
- if thisDoc:
- self._baseURI = thisDoc
- else:
- self._baseURI = None
-
- assert not self._baseURI or ':' in self._baseURI
-
- if not self._genPrefix:
- if self._thisDoc: self._genPrefix = self._thisDoc + "#_g"
- else: self._genPrefix = uniqueURI()
-
- if openFormula ==None:
- if self._thisDoc:
- self._formula = store.newFormula(thisDoc + "#_formula")
- else:
- self._formula = store.newFormula()
- else:
- self._formula = openFormula
-
-
- self._context = self._formula
- self._parentContext = None
-
-
- def here(self, i):
- """String generated from position in file
-
- This is for repeatability when refering people to bnodes in a document.
- This has diagnostic uses less formally, as it should point one to which
- bnode the arbitrary identifier actually is. It gives the
- line and character number of the '[' charcacter or path character
- which introduced the blank node. The first blank node is boringly _L1C1.
- It used to be used only for tracking, but for tests in general
- it makes the canonical ordering of bnodes repeatable."""
-
- return "%s_L%iC%i" % (self._genPrefix , self.lines,
- i - self.startOfLine + 1)
-
- def formula(self):
- return self._formula
-
- def loadStream(self, stream):
- return self.loadBuf(stream.read()) # Not ideal
-
- def loadBuf(self, buf):
- """Parses a buffer and returns its top level formula"""
- self.startDoc()
-
- self.feed(buf)
- return self.endDoc() # self._formula
-
-
- def feed(self, octets):
- """Feed an octet stream tothe parser
-
- if BadSyntax is raised, the string
- passed in the exception object is the
- remainder after any statements have been parsed.
- So if there is more data to feed to the
- parser, it should be straightforward to recover."""
-
- if not isinstance(octets, unicode):
- s = octets.decode('utf-8')
- # NB already decoded, so \ufeff
- if len(s) > 0 and s[0] == codecs.BOM_UTF8.decode('utf-8'):
- s = s[1:]
- else:
- s=octets
-
- i = 0
- while i >= 0:
- j = self.skipSpace(s, i)
- if j<0: return
-
- i = self.directiveOrStatement(s,j)
- if i<0:
- print "# next char: ", `s[j]`
- raise BadSyntax(self._thisDoc, self.lines, s, j,
- "expected directive or statement")
-
- def directiveOrStatement(self, str,h):
-
- i = self.skipSpace(str, h)
- if i<0: return i # EOF
-
- j = self.directive(str, i)
- if j>=0: return self.checkDot(str,j)
-
- j = self.statement(str, i)
- if j>=0: return self.checkDot(str,j)
-
- return j
-
-
- #@@I18N
- global _notNameChars
- #_namechars = string.lowercase + string.uppercase + string.digits + '_-'
-
- def tok(self, tok, str, i):
- """Check for keyword. Space must have been stripped on entry and
- we must not be at end of file."""
-
- assert tok[0] not in _notNameChars # not for punctuation
- if str[i:i+1] == "@":
- i = i+1
- else:
- if tok not in self.keywords:
- return -1 # No, this has neither keywords declaration nor "@"
-
- if (str[i:i+len(tok)] == tok
- and (str[i+len(tok)] in _notQNameChars )):
- i = i + len(tok)
- return i
- else:
- return -1
-
- def directive(self, str, i):
- j = self.skipSpace(str, i)
- if j<0: return j # eof
- res = []
-
- j = self.tok('bind', str, i) # implied "#". Obsolete.
- if j>0: raise BadSyntax(self._thisDoc, self.lines, str, i,
- "keyword bind is obsolete: use @prefix")
-
- j = self.tok('keywords', str, i)
- if j>0:
- i = self.commaSeparatedList(str, j, res, self.bareWord)
- if i < 0:
- raise BadSyntax(self._thisDoc, self.lines, str, i,
- "'@keywords' needs comma separated list of words")
- self.setKeywords(res[:])
- # was: diag.chatty_flag
- if chatty_flag > 80: progress("Keywords ", self.keywords)
- return i
-
-
- j = self.tok('forAll', str, i)
- if j > 0:
- i = self.commaSeparatedList(str, j, res, self.uri_ref2)
- if i <0: raise BadSyntax(self._thisDoc, self.lines, str, i,
- "Bad variable list after @forAll")
- for x in res:
- #self._context.declareUniversal(x)
- if x not in self._variables or x in self._parentVariables:
- self._variables[x] = self._context.newUniversal(x)
- return i
-
- j = self.tok('forSome', str, i)
- if j > 0:
- i = self. commaSeparatedList(str, j, res, self.uri_ref2)
- if i <0: raise BadSyntax(self._thisDoc, self.lines, str, i,
- "Bad variable list after @forSome")
- for x in res:
- self._context.declareExistential(x)
- return i
-
-
- j=self.tok('prefix', str, i) # no implied "#"
- if j>=0:
- t = []
- i = self.qname(str, j, t)
- if i<0: raise BadSyntax(self._thisDoc, self.lines, str, j,
- "expected qname after @prefix")
- j = self.uri_ref2(str, i, t)
- if j<0: raise BadSyntax(self._thisDoc, self.lines, str, i,
- "expected <uriref> after @prefix _qname_")
- ns = self.uriOf(t[1])
-
- if self._baseURI:
- ns = join(self._baseURI, ns)
- elif ":" not in ns:
- raise BadSyntax(self._thisDoc, self.lines, str, j,
- "With no base URI, cannot use relative URI in @prefix <"+ns+">")
- assert ':' in ns # must be absolute
- self._bindings[t[0][0]] = ns
- self.bind(t[0][0], hexify(ns))
- return j
-
- j=self.tok('base', str, i) # Added 2007/7/7
- if j >= 0:
- t = []
- i = self.uri_ref2(str, j, t)
- if i<0: raise BadSyntax(self._thisDoc, self.lines, str, j,
- "expected <uri> after @base ")
- ns = self.uriOf(t[0])
-
- if self._baseURI:
- ns = join(self._baseURI, ns)
- else:
- raise BadSyntax(self._thisDoc, self.lines, str, j,
- "With no previous base URI, cannot use relative URI in @base <"+ns+">")
- assert ':' in ns # must be absolute
- self._baseURI = ns
- return i
-
- return -1 # Not a directive, could be something else.
-
- def bind(self, qn, uri):
- assert isinstance(uri,
- types.StringType), "Any unicode must be %x-encoded already"
- if qn == "":
- self._store.setDefaultNamespace(uri)
- else:
- self._store.bind(qn, uri)
-
- def setKeywords(self, k):
- "Takes a list of strings"
- if k == None:
- self.keywordsSet = 0
- else:
- self.keywords = k
- self.keywordsSet = 1
-
-
- def startDoc(self):
- # was: self._store.startDoc()
- self._store.startDoc(self._formula)
-
- def endDoc(self):
- """Signal end of document and stop parsing. returns formula"""
- self._store.endDoc(self._formula) # don't canonicalize yet
- return self._formula
-
- def makeStatement(self, quadruple):
- #$$$$$$$$$$$$$$$$$$$$$
-# print "# Parser output: ", `quadruple`
- self._store.makeStatement(quadruple, why=self._reason2)
-
-
-
- def statement(self, str, i):
- r = []
-
- i = self.object(str, i, r) # Allow literal for subject - extends RDF
- if i<0: return i
-
- j = self.property_list(str, i, r[0])
-
- if j<0: raise BadSyntax(self._thisDoc, self.lines,
- str, i, "expected propertylist")
- return j
-
- def subject(self, str, i, res):
- return self.item(str, i, res)
-
- def verb(self, str, i, res):
- """ has _prop_
- is _prop_ of
- a
- =
- _prop_
- >- prop ->
- <- prop -<
- _operator_"""
-
- j = self.skipSpace(str, i)
- if j<0:return j # eof
-
- r = []
-
- j = self.tok('has', str, i)
- if j>=0:
- i = self.prop(str, j, r)
- if i < 0: raise BadSyntax(self._thisDoc, self.lines,
- str, j, "expected property after 'has'")
- res.append(('->', r[0]))
- return i
-
- j = self.tok('is', str, i)
- if j>=0:
- i = self.prop(str, j, r)
- if i < 0: raise BadSyntax(self._thisDoc, self.lines, str, j,
- "expected <property> after 'is'")
- j = self.skipSpace(str, i)
- if j<0:
- raise BadSyntax(self._thisDoc, self.lines, str, i,
- "End of file found, expected property after 'is'")
- return j # eof
- i=j
- j = self.tok('of', str, i)
- if j<0: raise BadSyntax(self._thisDoc, self.lines, str, i,
- "expected 'of' after 'is' <prop>")
- res.append(('<-', r[0]))
- return j
-
- j = self.tok('a', str, i)
- if j>=0:
- res.append(('->', RDF_type))
- return j
-
-
- if str[i:i+2] == "<=":
- res.append(('<-', self._store.newSymbol(Logic_NS+"implies")))
- return i+2
-
- if str[i:i+1] == "=":
- if str[i+1:i+2] == ">":
- res.append(('->', self._store.newSymbol(Logic_NS+"implies")))
- return i+2
- res.append(('->', DAML_sameAs))
- return i+1
-
- if str[i:i+2] == ":=":
- # patch file relates two formulae, uses this @@ really?
- res.append(('->', Logic_NS+"becomes"))
- return i+2
-
- j = self.prop(str, i, r)
- if j >= 0:
- res.append(('->', r[0]))
- return j
-
- if str[i:i+2] == ">-" or str[i:i+2] == "<-":
- raise BadSyntax(self._thisDoc, self.lines, str, j,
- ">- ... -> syntax is obsolete.")
-
- return -1
-
- def prop(self, str, i, res):
- return self.item(str, i, res)
-
- def item(self, str, i, res):
- return self.path(str, i, res)
-
- def blankNode(self, uri=None):
- if "B" not in self._flags:
- return self._context.newBlankNode(uri, why=self._reason2)
- x = self._context.newSymbol(uri)
- self._context.declareExistential(x)
- return x
-
- def path(self, str, i, res):
- """Parse the path production.
- """
- j = self.nodeOrLiteral(str, i, res)
- if j<0: return j # nope
-
- while str[j:j+1] in "!^.": # no spaces, must follow exactly (?)
- ch = str[j:j+1] # @@ Allow "." followed IMMEDIATELY by a node.
- if ch == ".":
- ahead = str[j+1:j+2]
- if not ahead or (ahead in _notNameChars
- and ahead not in ":?<[{("): break
- subj = res.pop()
- obj = self.blankNode(uri=self.here(j))
- j = self.node(str, j+1, res)
- if j<0: raise BadSyntax(self._thisDoc, self.lines, str, j,
- "EOF found in middle of path syntax")
- pred = res.pop()
- if ch == "^": # Reverse traverse
- self.makeStatement((self._context, pred, obj, subj))
- else:
- self.makeStatement((self._context, pred, subj, obj))
- res.append(obj)
- return j
-
- def anonymousNode(self, ln):
- """Remember or generate a term for one of these _: anonymous nodes"""
- term = self._anonymousNodes.get(ln, None)
- if term != None: return term
- term = self._store.newBlankNode(self._context, why=self._reason2)
- self._anonymousNodes[ln] = term
- return term
-
- def node(self, str, i, res, subjectAlready=None):
- """Parse the <node> production.
- Space is now skipped once at the beginning
- instead of in multipe calls to self.skipSpace().
- """
- subj = subjectAlready
-
- j = self.skipSpace(str,i)
- if j<0: return j #eof
- i=j
- ch = str[i:i+1] # Quick 1-character checks first:
-
- if ch == "[":
- bnodeID = self.here(i)
- j=self.skipSpace(str,i+1)
- if j<0: raise BadSyntax(self._thisDoc,
- self.lines, str, i, "EOF after '['")
- if str[j:j+1] == "=": # Hack for "is" binding name to anon node
- i = j+1
- objs = []
- j = self.objectList(str, i, objs);
- if j>=0:
- subj = objs[0]
- if len(objs)>1:
- for obj in objs:
- self.makeStatement((self._context,
- DAML_sameAs, subj, obj))
- j = self.skipSpace(str, j)
- if j<0: raise BadSyntax(self._thisDoc, self.lines, str, i,
- "EOF when objectList expected after [ = ")
- if str[j:j+1] == ";":
- j=j+1
- else:
- raise BadSyntax(self._thisDoc, self.lines, str, i,
- "objectList expected after [= ")
-
- if subj is None:
- subj=self.blankNode(uri= bnodeID)
-
- i = self.property_list(str, j, subj)
- if i<0: raise BadSyntax(self._thisDoc, self.lines, str, j,
- "property_list expected")
-
- j = self.skipSpace(str, i)
- if j<0: raise BadSyntax(self._thisDoc, self.lines, str, i,
- "EOF when ']' expected after [ <propertyList>")
- if str[j:j+1] != "]":
- raise BadSyntax(self._thisDoc,
- self.lines, str, j, "']' expected")
- res.append(subj)
- return j+1
-
- if ch == "{":
- ch2 = str[i+1:i+2]
- if ch2 == '$':
- i += 1
- j = i + 1
- List = []
- first_run = True
- while 1:
- i = self.skipSpace(str, j)
- if i<0: raise BadSyntax(self._thisDoc, self.lines, str, i,
- "needed '$}', found end.")
- if str[i:i+2] == '$}':
- j = i+2
- break
-
- if not first_run:
- if str[i:i+1] == ',':
- i+=1
- else:
- raise BadSyntax(self._thisDoc, self.lines,
- str, i, "expected: ','")
- else: first_run = False
-
- item = []
- j = self.item(str,i, item) #@@@@@ should be path, was object
- if j<0: raise BadSyntax(self._thisDoc, self.lines, str, i,
- "expected item in set or '$}'")
- List.append(self._store.intern(item[0]))
- res.append(self._store.newSet(List, self._context))
- return j
- else:
- j=i+1
- oldParentContext = self._parentContext
- self._parentContext = self._context
- parentAnonymousNodes = self._anonymousNodes
- grandParentVariables = self._parentVariables
- self._parentVariables = self._variables
- self._anonymousNodes = {}
- self._variables = self._variables.copy()
- reason2 = self._reason2
- self._reason2 = becauseSubexpression
- if subj is None: subj = self._store.newFormula()
- self._context = subj
-
- while 1:
- i = self.skipSpace(str, j)
- if i<0: raise BadSyntax(self._thisDoc, self.lines,
- str, i, "needed '}', found end.")
-
- if str[i:i+1] == "}":
- j = i+1
- break
-
- j = self.directiveOrStatement(str,i)
- if j<0: raise BadSyntax(self._thisDoc, self.lines,
- str, i, "expected statement or '}'")
-
- self._anonymousNodes = parentAnonymousNodes
- self._variables = self._parentVariables
- self._parentVariables = grandParentVariables
- self._context = self._parentContext
- self._reason2 = reason2
- self._parentContext = oldParentContext
- res.append(subj.close()) # No use until closed
- return j
-
- if ch == "(":
- thing_type = self._store.newList
- ch2 = str[i+1:i+2]
- if ch2 == '$':
- thing_type = self._store.newSet
- i += 1
- j=i+1
-
- List = []
- while 1:
- i = self.skipSpace(str, j)
- if i<0: raise BadSyntax(self._thisDoc, self.lines,
- str, i, "needed ')', found end.")
- if str[i:i+1] == ')':
- j = i+1
- break
-
- item = []
- j = self.item(str,i, item) #@@@@@ should be path, was object
- if j<0: raise BadSyntax(self._thisDoc, self.lines, str, i,
- "expected item in list or ')'")
- List.append(self._store.intern(item[0]))
- res.append(thing_type(List, self._context))
- return j
-
- j = self.tok('this', str, i) # This context
- if j>=0:
- raise BadSyntax(self._thisDoc, self.lines, str, i,
- "Keyword 'this' was ancient N3. Now use @forSome and @forAll keywords.")
- res.append(self._context)
- return j
-
- #booleans
- j = self.tok('true', str, i)
- if j>=0:
- res.append(True)
- return j
- j = self.tok('false', str, i)
- if j>=0:
- res.append(False)
- return j
-
- if subj is None: # If this can be a named node, then check for a name.
- j = self.uri_ref2(str, i, res)
- if j >= 0:
- return j
-
- return -1
-
- def property_list(self, str, i, subj):
- """Parse property list
- Leaves the terminating punctuation in the buffer
- """
- while 1:
- j = self.skipSpace(str, i)
- if j<0:
- raise BadSyntax(self._thisDoc, self.lines, str, i,
- "EOF found when expected verb in property list")
- return j #eof
-
- if str[j:j+2] ==":-":
- i = j + 2
- res = []
- j = self.node(str, i, res, subj)
- if j<0: raise BadSyntax(self._thisDoc, self.lines, str, i,
- "bad {} or () or [] node after :- ")
- i=j
- continue
- i=j
- v = []
- j = self.verb(str, i, v)
- if j<=0:
- return i # void but valid
-
- objs = []
- i = self.objectList(str, j, objs)
- if i<0: raise BadSyntax(self._thisDoc, self.lines, str, j,
- "objectList expected")
- for obj in objs:
- dir, sym = v[0]
- if dir == '->':
- self.makeStatement((self._context, sym, subj, obj))
- else:
- self.makeStatement((self._context, sym, obj, subj))
-
- j = self.skipSpace(str, i)
- if j<0:
- raise BadSyntax(self._thisDoc, self.lines, str, j,
- "EOF found in list of objects")
- return j #eof
- if str[i:i+1] != ";":
- return i
- i = i+1 # skip semicolon and continue
-
- def commaSeparatedList(self, str, j, res, what):
- """return value: -1 bad syntax; >1 new position in str
- res has things found appended
- """
- i = self.skipSpace(str, j)
- if i<0:
- raise BadSyntax(self._thisDoc, self.lines, str, i,
- "EOF found expecting comma sep list")
- return i
- if str[i] == ".": return j # empty list is OK
- i = what(str, i, res)
- if i<0: return -1
-
- while 1:
- j = self.skipSpace(str, i)
- if j<0: return j # eof
- ch = str[j:j+1]
- if ch != ",":
- if ch != ".":
- return -1
- return j # Found but not swallowed "."
- i = what(str, j+1, res)
- if i<0:
- raise BadSyntax(self._thisDoc, self.lines, str, i,
- "bad list content")
- return i
-
- def objectList(self, str, i, res):
- i = self.object(str, i, res)
- if i<0: return -1
- while 1:
- j = self.skipSpace(str, i)
- if j<0:
- raise BadSyntax(self._thisDoc, self.lines, str, j,
- "EOF found after object")
- return j #eof
- if str[j:j+1] != ",":
- return j # Found something else!
- i = self.object(str, j+1, res)
- if i<0: return i
-
- def checkDot(self, str, i):
- j = self.skipSpace(str, i)
- if j<0: return j #eof
- if str[j:j+1] == ".":
- return j+1 # skip
- if str[j:j+1] == "}":
- return j # don't skip it
- if str[j:j+1] == "]":
- return j
- raise BadSyntax(self._thisDoc, self.lines,
- str, j, "expected '.' or '}' or ']' at end of statement")
- return i
-
-
- def uri_ref2(self, str, i, res):
- """Generate uri from n3 representation.
-
- Note that the RDF convention of directly concatenating
- NS and local name is now used though I prefer inserting a '#'
- to make the namesapces look more like what XML folks expect.
- """
- qn = []
- j = self.qname(str, i, qn)
- if j>=0:
- pfx, ln = qn[0]
- if pfx is None:
- assert 0, "not used?"
- ns = self._baseURI + ADDED_HASH
- else:
- try:
- ns = self._bindings[pfx]
- except KeyError:
- if pfx == "_": # Magic prefix 2001/05/30, can be overridden
- res.append(self.anonymousNode(ln))
- return j
- raise BadSyntax(self._thisDoc, self.lines, str, i,
- "Prefix \"%s:\" not bound" % (pfx))
- symb = self._store.newSymbol(ns + ln)
- if symb in self._variables:
- res.append(self._variables[symb])
- else:
- res.append(symb) # @@@ "#" CONVENTION
- if not ns.find("#"):progress(
- "Warning: no # on namespace %s," % ns)
- return j
-
-
- i = self.skipSpace(str, i)
- if i<0: return -1
-
- if str[i] == "?":
- v = []
- j = self.variable(str,i,v)
- if j>0: #Forget varibles as a class, only in context.
- res.append(v[0])
- return j
- return -1
-
- elif str[i]=="<":
- i = i + 1
- st = i
- while i < len(str):
- if str[i] == ">":
- uref = str[st:i] # the join should dealt with "":
- if self._baseURI:
- uref = join(self._baseURI, uref) # was: uripath.join
- else:
- assert ":" in uref, \
- "With no base URI, cannot deal with relative URIs"
- if str[i-1:i]=="#" and not uref[-1:]=="#":
- uref = uref + "#" # She meant it! Weirdness in urlparse?
- symb = self._store.newSymbol(uref)
- if symb in self._variables:
- res.append(self._variables[symb])
- else:
- res.append(symb)
- return i+1
- i = i + 1
- raise BadSyntax(self._thisDoc, self.lines, str, j,
- "unterminated URI reference")
-
- elif self.keywordsSet:
- v = []
- j = self.bareWord(str,i,v)
- if j<0: return -1 #Forget varibles as a class, only in context.
- if v[0] in self.keywords:
- raise BadSyntax(self._thisDoc, self.lines, str, i,
- 'Keyword "%s" not allowed here.' % v[0])
- res.append(self._store.newSymbol(self._bindings[""]+v[0]))
- return j
- else:
- return -1
-
- def skipSpace(self, str, i):
- """Skip white space, newlines and comments.
- return -1 if EOF, else position of first non-ws character"""
- while 1:
- m = eol.match(str, i)
- if m == None: break
- self.lines = self.lines + 1
- i = m.end() # Point to first character unmatched
- self.startOfLine = i
- m = ws.match(str, i)
- if m != None:
- i = m.end()
- m = eof.match(str, i)
- if m != None: return -1
- return i
-
- def variable(self, str, i, res):
- """ ?abc -> variable(:abc)
- """
-
- j = self.skipSpace(str, i)
- if j<0: return -1
-
- if str[j:j+1] != "?": return -1
- j=j+1
- i = j
- if str[j] in "0123456789-":
- raise BadSyntax(self._thisDoc, self.lines, str, j,
- "Varible name can't start with '%s'" % str[j])
- return -1
- while i <len(str) and str[i] not in _notNameChars:
- i = i+1
- if self._parentContext == None:
- varURI = self._store.newSymbol(self._baseURI + "#" +str[j:i])
- if varURI not in self._variables:
- self._variables[varURI] = self._context.newUniversal(varURI
- , why=self._reason2)
- res.append(self._variables[varURI])
- return i
- # @@ was:
- # raise BadSyntax(self._thisDoc, self.lines, str, j,
- # "Can't use ?xxx syntax for variable in outermost level: %s"
- # % str[j-1:i])
- varURI = self._store.newSymbol(self._baseURI + "#" +str[j:i])
- if varURI not in self._parentVariables:
- self._parentVariables[varURI] = self._parentContext.newUniversal(varURI
- , why=self._reason2)
- res.append(self._parentVariables[varURI])
- return i
-
- def bareWord(self, str, i, res):
- """ abc -> :abc
- """
- j = self.skipSpace(str, i)
- if j<0: return -1
-
- if str[j] in "0123456789-" or str[j] in _notNameChars: return -1
- i = j
- while i <len(str) and str[i] not in _notNameChars:
- i = i+1
- res.append(str[j:i])
- return i
-
- def qname(self, str, i, res):
- """
- xyz:def -> ('xyz', 'def')
- If not in keywords and keywordsSet: def -> ('', 'def')
- :def -> ('', 'def')
- """
-
- i = self.skipSpace(str, i)
- if i<0: return -1
-
- c = str[i]
- if c in "0123456789-+": return -1
- if c not in _notNameChars:
- ln = c
- i = i + 1
- while i < len(str):
- c = str[i]
- if c not in _notNameChars:
- ln = ln + c
- i = i + 1
- else: break
- else: # First character is non-alpha
- ln = '' # Was: None - TBL (why? useful?)
-
- if i<len(str) and str[i] == ':':
- pfx = ln
- i = i + 1
- ln = ''
- while i < len(str):
- c = str[i]
- if c not in _notNameChars:
- ln = ln + c
- i = i + 1
- else: break
-
- res.append((pfx, ln))
- return i
-
- else: # delimiter was not ":"
- if ln and self.keywordsSet and ln not in self.keywords:
- res.append(('', ln))
- return i
- return -1
-
- def object(self, str, i, res):
- j = self.subject(str, i, res)
- if j>= 0:
- return j
- else:
- j = self.skipSpace(str, i)
- if j<0: return -1
- else: i=j
-
- if str[i]=='"':
- if str[i:i+3] == '"""': delim = '"""'
- else: delim = '"'
- i = i + len(delim)
-
- j, s = self.strconst(str, i, delim)
-
- res.append(self._store.newLiteral(s))
- progress("New string const ", s, j)
- return j
- else:
- return -1
-
- def nodeOrLiteral(self, str, i, res):
- j = self.node(str, i, res)
- startline = self.lines # Remember where for error messages
- if j>= 0:
- return j
- else:
- j = self.skipSpace(str, i)
- if j<0: return -1
- else: i=j
-
- ch = str[i]
- if ch in "-+0987654321":
- m = number_syntax.match(str, i)
- if m == None:
- raise BadSyntax(self._thisDoc, self.lines, str, i,
- "Bad number syntax")
- j = m.end()
- if m.group('exponent') != None: # includes decimal exponent
- res.append(float(str[i:j]))
-# res.append(self._store.newLiteral(str[i:j],
-# self._store.newSymbol(FLOAT_DATATYPE)))
- elif m.group('decimal') != None:
- res.append(Decimal(str[i:j]))
- else:
- res.append(long(str[i:j]))
-# res.append(self._store.newLiteral(str[i:j],
-# self._store.newSymbol(INTEGER_DATATYPE)))
- return j
-
- if str[i]=='"':
- if str[i:i+3] == '"""': delim = '"""'
- else: delim = '"'
- i = i + len(delim)
-
- dt = None
- j, s = self.strconst(str, i, delim)
- lang = None
- if str[j:j+1] == "@": # Language?
- m = langcode.match(str, j+1)
- if m == None:
- raise BadSyntax(self._thisDoc, startline, str, i,
- "Bad language code syntax on string literal, after @")
- i = m.end()
- lang = str[j+1:i]
- j = i
- if str[j:j+2] == "^^":
- res2 = []
- j = self.uri_ref2(str, j+2, res2) # Read datatype URI
- dt = res2[0]
-# if dt.uriref() == "http://www.w3.org/1999/02/22-rdf-syntax-ns#XMLLiteral":
- if dt == "http://www.w3.org/1999/02/22-rdf-syntax-ns#XMLLiteral":
- try:
- dom = XMLtoDOM('<rdf:envelope xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns">'
- + s
- + '</rdf:envelope>').firstChild
- except:
- raise ValueError('s="%s"' % s)
- res.append(self._store.newXMLLiteral(dom))
- return j
- res.append(self._store.newLiteral(s, dt, lang))
- return j
- else:
- return -1
-
- def uriOf(self, sym):
- if isinstance(sym, types.TupleType):
- return sym[1] # old system for --pipe
- # return sym.uriref() # cwm api
- return sym
-
-
- def strconst(self, str, i, delim):
- """parse an N3 string constant delimited by delim.
- return index, val
- """
-
-
- j = i
- ustr = u"" # Empty unicode string
- startline = self.lines # Remember where for error messages
- while j<len(str):
- if str[j] == '"':
- if delim == '"': # done when delim is "
- i = j + 1
- return i, ustr
- if delim == '"""': # done when delim is """ and ...
- if str[j:j+5] == '"""""': # ... we have "" before
- i = j + 5
- ustr = ustr + '""'
- return i, ustr
- if str[j:j+4] == '""""': # ... we have " before
- i = j + 4
- ustr = ustr + '"'
- return i, ustr
- if str[j:j+3] == '"""': # ... current " is part of delim
- i = j + 3
- return i, ustr
-
- # we are inside of the string and current char is "
- j = j + 1
- ustr = ustr + '"'
- continue
-
- m = interesting.search(str, j) # was str[j:].
- # Note for pos param to work, MUST be compiled ... re bug?
- assert m , "Quote expected in string at ^ in %s^%s" %(
- str[j-20:j], str[j:j+20]) # we at least have to find a quote
-
- i = m.start()
- try:
- ustr = ustr + str[j:i]
- except UnicodeError:
- err = ""
- for c in str[j:i]:
- err = err + (" %02x" % ord(c))
- streason = sys.exc_info()[1].__str__()
- raise BadSyntax(self._thisDoc, startline, str, j,
- "Unicode error appending characters %s to string, because\n\t%s"
- % (err, streason))
-
-# print "@@@ i = ",i, " j=",j, "m.end=", m.end()
-
- ch = str[i]
- if ch == '"':
- j = i
- continue
- elif ch == "\r": # Strip carriage returns
- j = i+1
- continue
- elif ch == "\n":
- if delim == '"':
- raise BadSyntax(self._thisDoc, startline, str, i,
- "newline found in string literal")
- self.lines = self.lines + 1
- ustr = ustr + ch
- j = i + 1
- self.startOfLine = j
-
- elif ch == "\\":
- j = i + 1
- ch = str[j:j+1] # Will be empty if string ends
- if not ch:
- raise BadSyntax(self._thisDoc, startline, str, i,
- "unterminated string literal (2)")
- k = 'abfrtvn\\"'.find(ch)
- if k >= 0:
- uch = '\a\b\f\r\t\v\n\\"'[k]
- ustr = ustr + uch
- j = j + 1
- elif ch == "u":
- j, ch = self.uEscape(str, j+1, startline)
- ustr = ustr + ch
- elif ch == "U":
- j, ch = self.UEscape(str, j+1, startline)
- ustr = ustr + ch
- else:
- raise BadSyntax(self._thisDoc, self.lines, str, i,
- "bad escape")
-
- raise BadSyntax(self._thisDoc, self.lines, str, i,
- "unterminated string literal")
-
-
- def uEscape(self, str, i, startline):
- j = i
- count = 0
- value = 0
- while count < 4: # Get 4 more characters
- ch = str[j:j+1].lower()
- # sbp http://ilrt.org/discovery/chatlogs/rdfig/2002-07-05
- j = j + 1
- if ch == "":
- raise BadSyntax(self._thisDoc, startline, str, i,
- "unterminated string literal(3)")
- k = "0123456789abcdef".find(ch)
- if k < 0:
- raise BadSyntax(self._thisDoc, startline, str, i,
- "bad string literal hex escape")
- value = value * 16 + k
- count = count + 1
- uch = unichr(value)
- return j, uch
-
- def UEscape(self, str, i, startline):
- stringType = type('')
- j = i
- count = 0
- value = '\\U'
- while count < 8: # Get 8 more characters
- ch = str[j:j+1].lower()
- # sbp http://ilrt.org/discovery/chatlogs/rdfig/2002-07-05
- j = j + 1
- if ch == "":
- raise BadSyntax(self._thisDoc, startline, str, i,
- "unterminated string literal(3)")
- k = "0123456789abcdef".find(ch)
- if k < 0:
- raise BadSyntax(self._thisDoc, startline, str, i,
- "bad string literal hex escape")
- value = value + ch
- count = count + 1
-
- uch = stringType(value).decode('unicode-escape')
- return j, uch
-
-wide_build = True
-try:
- unichr(0x10000)
-except ValueError:
- wide_build = False
-
-# If we are going to do operators then they should generate
-# [ is operator:plus of ( \1 \2 ) ]
-
-
-class BadSyntax(SyntaxError):
- def __init__(self, uri, lines, str, i, why):
- self._str = str.encode('utf-8') # Better go back to strings for errors
- self._i = i
- self._why = why
- self.lines = lines
- self._uri = uri
-
- def __str__(self):
- str = self._str
- i = self._i
- st = 0
- if i>60:
- pre="..."
- st = i - 60
- else: pre=""
- if len(str)-i > 60: post="..."
- else: post=""
-
- return 'at line %i of <%s>:\nBad syntax (%s) at ^ in:\n"%s%s^%s%s"' \
- % (self.lines +1, self._uri, self._why, pre,
- str[st:i], str[i:i+60], post)
-
-
-
-def stripCR(str):
- res = ""
- for ch in str:
- if ch != "\r":
- res = res + ch
- return res
-
-def dummyWrite(x):
- pass
-
-################################################################################
-
-
-def toBool(s):
- if s == 'true' or s == 'True' or s == '1':
- return True
- if s == 'false' or s == 'False' or s == '0':
- return False
- raise ValueError(s)
-
-
-
-
-
-class Formula(object):
- number = 0
-
- def __init__(self, parent):
- self.counter = 0
- Formula.number += 1
- self.number = Formula.number
- self.existentials = {}
- self.universals = {}
-
- self.quotedgraph=QuotedGraph(store=parent.store, identifier=self.id())
-
- def __str__(self):
- return '_:Formula%s' % self.number
-
- def id(self):
- return BNode('_:Formula%s' % self.number)
-
- def newBlankNode(self, uri=None, why=None):
- if uri is None:
- self.counter += 1
- b = BNode('f%sb%s' % (id(self), self.counter))
- else: b = BNode(uri.split('#').pop().replace('_', 'b'))
- return b
-
- def newUniversal(self, uri, why=None):
- return Variable(uri.split('#').pop())
-
- def declareExistential(self, x):
- self.existentials[x] = self.newBlankNode()
-
- def close(self):
-
- return self.quotedgraph
-
-r_hibyte = re.compile(r'([\x80-\xff])')
-def iri(uri):
- return uri.decode('utf-8')
- # return unicode(r_hibyte.sub(lambda m: '%%%02X' % ord(m.group(1)), uri))
-
-class RDFSink(object):
- def __init__(self, graph):
- self.rootFormula = None
- self.counter = 0
- self.graph=graph
-
-
- def newFormula(self):
- assert self.graph.store.formula_aware
- f = Formula(self.graph)
- return f
-
- def newSymbol(self, *args):
- uri = args[0].encode('utf-8')
- return URIRef(iri(uri))
-
- def newBlankNode(self, arg=None, **kargs):
- if isinstance(arg, Formula):
- return arg.newBlankNode()
- elif arg is None:
- self.counter += 1
- b = BNode('n' + str(self.counter))
- else: b = BNode(str(arg[0]).split('#').pop().replace('_', 'b'))
- return b
-
- def newLiteral(self, s, dt, lang):
- if dt: return Literal(s, datatype=dt)
- else: return Literal(s, lang=lang)
-
- def newList(self, n, f):
- if not n:
- return self.newSymbol(
- 'http://www.w3.org/1999/02/22-rdf-syntax-ns#nil'
- )
-
- a = self.newBlankNode(f)
- first = self.newSymbol(
- 'http://www.w3.org/1999/02/22-rdf-syntax-ns#first'
- )
- rest = self.newSymbol('http://www.w3.org/1999/02/22-rdf-syntax-ns#rest')
- self.makeStatement((f, first, a, n[0]))
- self.makeStatement((f, rest, a, self.newList(n[1:], f)))
- return a
-
- def newSet(self, *args):
- return set(args)
-
- def setDefaultNamespace(self, *args):
- return ':'.join(repr(n) for n in args)
-
- def makeStatement(self, quadruple, why=None):
- f, p, s, o = quadruple
-
- if hasattr(p, 'formula'):
- raise Exception("Formula used as predicate")
-
- s = self.normalise(f, s)
- p = self.normalise(f, p)
- o = self.normalise(f, o)
-
-
- if f == self.rootFormula:
- # print s, p, o, '.'
- self.graph.add((s, p, o))
- else:
- f.quotedgraph.add((s,p,o))
-
-
- #return str(quadruple)
-
- def normalise(self, f, n):
- if isinstance(n, tuple):
- return URIRef(unicode(n[1]))
-
- # if isinstance(n, list):
- # rdflist, f = n
- # name = self.newBlankNode()
- # if f == self.rootFormula:
- # sublist = name
- # for i in xrange(0, len(rdflist) - 1):
- # print sublist, 'first', rdflist[i]
- # rest = self.newBlankNode()
- # print sublist, 'rest', rest
- # sublist = rest
- # print sublist, 'first', rdflist[-1]
- # print sublist, 'rest', 'nil'
- # return name
-
- if isinstance(n, bool):
- s = Literal(str(n).lower(), datatype=BOOLEAN_DATATYPE)
- return s
-
- if isinstance(n, int) or isinstance(n, long):
- s = Literal(unicode(n), datatype=INTEGER_DATATYPE)
- return s
-
- if isinstance(n, Decimal):
- value = str(n.normalize())
- if value == '-0':
- value = '0'
- s = Literal(value, datatype=DECIMAL_DATATYPE )
- return s
-
- if isinstance(n, float):
- s = Literal(str(n), datatype=DOUBLE_DATATYPE )
- return s
-
- if f.existentials.has_key(n):
- return f.existentials[n]
-
- # if isinstance(n, Var):
- # if f.universals.has_key(n):
- # return f.universals[n]
- # f.universals[n] = f.newBlankNode()
- # return f.universals[n]
-
- return n
-
- def intern(self, something):
- return something
-
- def bind(self, pfx, uri):
- pass # print pfx, ':', uri
-
- def startDoc(self, formula):
- self.rootFormula = formula
-
- def endDoc(self, formula):
- pass
-
-
-###################################################
-#
-# Utilities
-#
-
-Escapes = {'a': '\a',
- 'b': '\b',
- 'f': '\f',
- 'r': '\r',
- 't': '\t',
- 'v': '\v',
- 'n': '\n',
- '\\': '\\',
- '"': '"'}
-
-forbidden1 = re.compile(ur'[\\\"\a\b\f\r\v\u0080-\U0000ffff]')
-forbidden2 = re.compile(ur'[\\\"\a\b\f\r\v\t\n\u0080-\U0000ffff]')
-#"
-def stringToN3(str, singleLine=0, flags=""):
- res = ''
- if (len(str) > 20 and
- str[-1] <> '"' and
- not singleLine and
- (str.find("\n") >=0
- or str.find('"') >=0)):
- delim= '"""'
- forbidden = forbidden1 # (allow tabs too now)
- else:
- delim = '"'
- forbidden = forbidden2
-
- i = 0
-
- while i < len(str):
- m = forbidden.search(str, i)
- if not m:
- break
-
- j = m.start()
- res = res + str[i:j]
- ch = m.group(0)
- if ch == '"' and delim == '"""' and str[j:j+3] != '"""': #"
- res = res + ch
- else:
- k = '\a\b\f\r\t\v\n\\"'.find(ch)
- if k >= 0: res = res + "\\" + 'abfrtvn\\"'[k]
- else:
- if 'e' in flags:
-# res = res + ('\\u%04x' % ord(ch))
- res = res + ('\\u%04X' % ord(ch))
- # http://www.w3.org/TR/rdf-testcases/#ntriples
- else:
- res = res + ch
- i = j + 1
-
- # The following code fixes things for really high range Unicode
- newstr = ""
- for ch in res + str[i:]:
- if ord(ch)>65535:
- newstr = newstr + ('\\U%08X' % ord(ch))
- # http://www.w3.org/TR/rdf-testcases/#ntriples
- else:
- newstr = newstr + ch
- #
-
- return delim + newstr + delim
-
-def backslashUify(ustr):
- """Use URL encoding to return an ASCII string corresponding
- to the given unicode"""
-# progress("String is "+`ustr`)
-# s1=ustr.encode('utf-8')
- s = ""
- for ch in ustr: # .encode('utf-8'):
- if ord(ch) > 65535:
- ch = "\\U%08X" % ord(ch)
- elif ord(ch) > 126:
- ch = "\\u%04X" % ord(ch)
- else:
- ch = "%c" % ord(ch)
- s = s + ch
- return b(s)
-
-@py3compat.format_doctest_out
-def hexify(ustr):
- """Use URL encoding to return an ASCII string
- corresponding to the given UTF8 string
-
- >>> hexify("http://example/a b")
- %(b)s'http://example/a%%20b'
-
- """ #"
-# progress("String is "+`ustr`)
-# s1=ustr.encode('utf-8')
- s = ""
- for ch in ustr: # .encode('utf-8'):
- if ord(ch) > 126 or ord(ch) < 33 :
- ch = "%%%02X" % ord(ch)
- else:
- ch = "%c" % ord(ch)
- s = s + ch
- return b(s)
-
-def dummy():
- res = ""
- if len(str) > 20 and (str.find("\n") >=0
- or str.find('"') >=0):
- delim= '"""'
- forbidden = "\\\"\a\b\f\r\v" # (allow tabs too now)
- else:
- delim = '"'
- forbidden = "\\\"\a\b\f\r\v\t\n"
- for i in range(len(str)):
- ch = str[i]
- j = forbidden.find(ch)
- if ch == '"' and delim == '"""' \
- and i+1 < len(str) and str[i+1] != '"':
- j=-1 # Single quotes don't need escaping in long format
- if j>=0: ch = "\\" + '\\"abfrvtn'[j]
- elif ch not in "\n\t" and (ch < " " or ch > "}"):
- ch = "[[" + `ch` + "]]" #[2:-1] # Use python
- res = res + ch
- return delim + res + delim
-
-
-class N3Parser(Parser):
-
- def __init__(self):
- pass
-
- def parse(self, source, graph, encoding="utf-8"):
- # we're currently being handed a Graph, not a ConjunctiveGraph
- assert graph.store.context_aware # is this implied by formula_aware
- assert graph.store.formula_aware
-
- if encoding not in [None, "utf-8"]:
- raise Exception("N3 files are always utf-8 encoded, I was passed: %s"%encoding)
-
- conj_graph = ConjunctiveGraph(store=graph.store)
- conj_graph.default_context = graph # TODO: CG __init__ should have a default_context arg
- # TODO: update N3Processor so that it can use conj_graph as the sink
- conj_graph.namespace_manager = graph.namespace_manager
- sink = RDFSink(conj_graph)
-
- baseURI = graph.absolutize(source.getPublicId() or source.getSystemId() or "")
- p = SinkParser(sink, baseURI=baseURI)
-
- p.loadStream(source.getByteStream())
-
- for prefix, namespace in p._bindings.items():
- conj_graph.bind(prefix, namespace)
-
-
-
-
-def _test():
- import doctest
- doctest.testmod()
-
-
-# if __name__ == '__main__':
-# _test()
-
-def main():
- g=ConjunctiveGraph()
-
- sink = RDFSink(g)
- base = 'file://' + os.path.join(os.getcwd(), sys.argv[1])
-
- p = SinkParser(sink, baseURI=base)
- p._bindings[''] = p._baseURI + '#'
- p.startDoc()
-
- f = open(sys.argv[1], 'rb')
- bytes = f.read()
- f.close()
-
- p.feed(bytes)
- p.endDoc()
- for t in g.quads((None,None,None)):
-
- print t
-
-if __name__ == '__main__':
- main()
-
-#ends
-
diff --git a/creactistore/_templates/lib/rdflib/plugins/parsers/nquads.py b/creactistore/_templates/lib/rdflib/plugins/parsers/nquads.py
deleted file mode 100644
index fbb4a37..0000000
--- a/creactistore/_templates/lib/rdflib/plugins/parsers/nquads.py
+++ /dev/null
@@ -1,107 +0,0 @@
-"""
-This is a rdflib plugin for parsing NQuad files into Conjunctive
-graphs that can be used and queried. The store that backs the graph
-*must* be able to handle contexts.
-
->>> from rdflib import ConjunctiveGraph, URIRef, Namespace
->>> g = ConjunctiveGraph()
->>> data = open("test/example.nquads", "rb")
->>> g.parse(data, format="nquads") # doctest:+ELLIPSIS
-<Graph identifier=... (<class 'rdflib.graph.Graph'>)>
->>> assert len(g.store) == 449
->>> # There should be 16 separate contexts
->>> assert len([x for x in g.store.contexts()]) == 16
->>> # is the name of entity E10009 "Arco Publications"? (in graph http://bibliographica.org/entity/E10009)
->>> # Looking for:
->>> # <http://bibliographica.org/entity/E10009> <http://xmlns.com/foaf/0.1/name> "Arco Publications" <http://bibliographica.org/entity/E10009>
->>> s = URIRef("http://bibliographica.org/entity/E10009")
->>> FOAF = Namespace("http://xmlns.com/foaf/0.1/")
->>> assert(g.value(s, FOAF.name) == "Arco Publications")
-"""
-
-from rdflib.py3compat import b
-
-# Build up from the NTriples parser:
-from rdflib.plugins.parsers.ntriples import NTriplesParser
-from rdflib.plugins.parsers.ntriples import ParseError
-from rdflib.plugins.parsers.ntriples import r_tail
-from rdflib.plugins.parsers.ntriples import r_wspace
-from rdflib.plugins.parsers.ntriples import r_wspaces
-
-__all__ = ['QuadSink', 'NQuadsParser']
-
-class QuadSink(object):
- def __init__(self):
- class FakeStore(object):
- def __init__(self, addn):
- self.addN = addn
- self.length = 0
- self.__quads = []
- self.__store = FakeStore(self.addN)
-
- def addN(self, quads):
- self.length += 1
- self.__quads.append(quads)
-
- def quads(self, (s,p,o)):
- for s,p,o,ctx in self.__quads:
- yield s,p,o,ctx
-
-class NQuadsParser(NTriplesParser):
- def __init__(self, sink=None):
- if sink is not None:
- assert sink.store.context_aware, ("NQuadsParser must be given"
- " a context aware store.")
- self.sink = sink
- else: self.sink = QuadSink()
-
- def parse(self, inputsource, sink, **kwargs):
- """Parse f as an N-Triples file."""
- assert sink.store.context_aware, ("NQuadsParser must be given"
- " a context aware store.")
- self.sink = sink
-
- source = inputsource.getByteStream()
-
- if not hasattr(source, 'read'):
- raise ParseError("Item to parse must be a file-like object.")
-
- self.file = source
- self.buffer = ''
- while True:
- self.line = self.readline()
- if self.line is None: break
- try: self.parseline()
- except ParseError:
- raise ParseError("Invalid line: %r" % self.line)
- return self.sink
-
- def context(self):
- context = self.uriref()
- if not context:
- raise ParseError("Context must be a uriref")
- return context
-
- def parseline(self):
- self.eat(r_wspace)
- if (not self.line) or self.line.startswith(b('#')):
- return # The line is empty or a comment
-
- subject = self.subject()
- self.eat(r_wspaces)
-
- predicate = self.predicate()
- self.eat(r_wspaces)
-
- obj = self.object()
- self.eat(r_wspaces)
-
- context = self.context()
- self.eat(r_tail)
-
- if self.line:
- raise ParseError("Trailing garbage")
- # Must have a context aware store - add on a normal Graph
- # discards anything where the ctx != graph.identifier
- self.sink.store.add((subject, predicate, obj), context)
-
diff --git a/creactistore/_templates/lib/rdflib/plugins/parsers/nt.py b/creactistore/_templates/lib/rdflib/plugins/parsers/nt.py
deleted file mode 100644
index 1ec2282..0000000
--- a/creactistore/_templates/lib/rdflib/plugins/parsers/nt.py
+++ /dev/null
@@ -1,28 +0,0 @@
-from rdflib.parser import Parser
-from rdflib.plugins.parsers.ntriples import NTriplesParser
-
-__all__ = ['NTSink', 'NTParser']
-
-class NTSink(object):
- def __init__(self, graph):
- self.graph = graph
-
- def triple(self, s, p, o):
- self.graph.add((s, p, o))
-
-
-class NTParser(Parser):
- """parser for the ntriples format, often stored with the .nt extension
-
- See http://www.w3.org/TR/rdf-testcases/#ntriples"""
-
- def __init__(self):
- super(NTParser, self).__init__()
-
- def parse(self, source, sink, baseURI=None):
- f = source.getByteStream() # TODO getCharacterStream?
- parser = NTriplesParser(NTSink(sink))
- parser.parse(f)
- f.close()
-
-
diff --git a/creactistore/_templates/lib/rdflib/plugins/parsers/ntriples.py b/creactistore/_templates/lib/rdflib/plugins/parsers/ntriples.py
deleted file mode 100644
index 48fe327..0000000
--- a/creactistore/_templates/lib/rdflib/plugins/parsers/ntriples.py
+++ /dev/null
@@ -1,243 +0,0 @@
-#!/usr/bin/env python
-__doc__="""
-N-Triples Parser
-License: GPL 2, W3C, BSD, or MIT
-Author: Sean B. Palmer, inamidst.com
-"""
-
-import re
-from rdflib.term import URIRef as URI
-from rdflib.term import BNode as bNode
-from rdflib.term import Literal
-
-from rdflib.py3compat import b, cast_bytes
-
-__all__ = ['unquote', 'uriquote', 'Sink', 'NTriplesParser']
-
-uriref = b(r'<([^:]+:[^\s"<>]+)>')
-literal = b(r'"([^"\\]*(?:\\.[^"\\]*)*)"')
-litinfo = b(r'(?:@([a-z]+(?:-[a-z0-9]+)*)|\^\^') + uriref + b(r')?')
-
-r_line = re.compile(b(r'([^\r\n]*)(?:\r\n|\r|\n)'))
-r_wspace = re.compile(b(r'[ \t]*'))
-r_wspaces = re.compile(b(r'[ \t]+'))
-r_tail = re.compile(b(r'[ \t]*\.[ \t]*'))
-r_uriref = re.compile(uriref)
-r_nodeid = re.compile(b(r'_:([A-Za-z][A-Za-z0-9]*)'))
-r_literal = re.compile(literal + litinfo)
-
-bufsiz = 2048
-validate = False
-
-class Node(unicode): pass
-
-class ParseError(Exception): pass
-
-class Sink(object):
- def __init__(self):
- self.length = 0
-
- def triple(self, s, p, o):
- self.length += 1
- print (s, p, o)
-
-quot = {b('t'): u'\t', b('n'): u'\n', b('r'): u'\r', b('"'): u'"', b('\\'): u'\\'}
-r_safe = re.compile(b(r'([\x20\x21\x23-\x5B\x5D-\x7E]+)'))
-r_quot = re.compile(b(r'\\(t|n|r|"|\\)'))
-r_uniquot = re.compile(b(r'\\u([0-9A-F]{4})|\\U([0-9A-F]{8})'))
-
-def unquote(s):
- """Unquote an N-Triples string."""
- if not validate:
- return s.decode('unicode-escape')
- else:
- result = []
- while s:
- m = r_safe.match(s)
- if m:
- s = s[m.end():]
- result.append(m.group(1).decode('ascii'))
- continue
-
- m = r_quot.match(s)
- if m:
- s = s[2:]
- result.append(quot[m.group(1)])
- continue
-
- m = r_uniquot.match(s)
- if m:
- s = s[m.end():]
- u, U = m.groups()
- codepoint = int(u or U, 16)
- if codepoint > 0x10FFFF:
- raise ParseError("Disallowed codepoint: %08X" % codepoint)
- result.append(unichr(codepoint))
- elif s.startswith(b('\\')):
- raise ParseError("Illegal escape at: %s..." % s[:10])
- else: raise ParseError("Illegal literal character: %r" % s[0])
- return u''.join(result)
-
-r_hibyte = re.compile(ur'([\x80-\xFF])')
-
-def uriquote(uri):
- if not validate:
- return uri
- else:
- return r_hibyte.sub(
- lambda m: '%%%02X' % ord(m.group(1)), uri)
-
-class NTriplesParser(object):
- """An N-Triples Parser.
-
- Usage::
-
- p = NTriplesParser(sink=MySink())
- sink = p.parse(f) # file; use parsestring for a string
- """
-
- def __init__(self, sink=None):
- if sink is not None:
- self.sink = sink
- else: self.sink = Sink()
-
- def parse(self, f):
- """Parse f as an N-Triples file."""
- if not hasattr(f, 'read'):
- raise ParseError("Item to parse must be a file-like object.")
-
- self.file = f
- self.buffer = ''
- while True:
- self.line = self.readline()
- if self.line is None: break
- try: self.parseline()
- except ParseError:
- raise ParseError("Invalid line: %r" % self.line)
- return self.sink
-
- def parsestring(self, s):
- """Parse s as an N-Triples string."""
- if not isinstance(s, basestring):
- raise ParseError("Item to parse must be a string instance.")
- try:
- from io import BytesIO
- except ImportError:
- from cStringIO import StringIO as BytesIO
- f = BytesIO()
- f.write(cast_bytes(s))
- f.seek(0)
- self.parse(f)
-
- def readline(self):
- """Read an N-Triples line from buffered input."""
- # N-Triples lines end in either CRLF, CR, or LF
- # Therefore, we can't just use f.readline()
- if not self.buffer:
- buffer = self.file.read(bufsiz)
- if not buffer: return None
- self.buffer = buffer
-
- while True:
- m = r_line.match(self.buffer)
- if m: # the more likely prospect
- self.buffer = self.buffer[m.end():]
- return m.group(1)
- else:
- buffer = self.file.read(bufsiz)
- if not buffer and not self.buffer.isspace():
- raise ParseError("EOF in line")
- elif not buffer:
- return None
- self.buffer += buffer
-
- def parseline(self):
- self.eat(r_wspace)
- if (not self.line) or self.line.startswith(b('#')):
- return # The line is empty or a comment
-
- subject = self.subject()
- self.eat(r_wspaces)
-
- predicate = self.predicate()
- self.eat(r_wspaces)
-
- object = self.object()
- self.eat(r_tail)
-
- if self.line:
- raise ParseError("Trailing garbage")
- self.sink.triple(subject, predicate, object)
-
- def peek(self, token):
- return self.line.startswith(token)
-
- def eat(self, pattern):
- m = pattern.match(self.line)
- if not m: # @@ Why can't we get the original pattern?
- print(dir(pattern))
- print repr(self.line), type(self.line)
- raise ParseError("Failed to eat %s" % pattern)
- self.line = self.line[m.end():]
- return m
-
- def subject(self):
- # @@ Consider using dictionary cases
- subj = self.uriref() or self.nodeid()
- if not subj:
- raise ParseError("Subject must be uriref or nodeID")
- return subj
-
- def predicate(self):
- pred = self.uriref()
- if not pred:
- raise ParseError("Predicate must be uriref")
- return pred
-
- def object(self):
- objt = self.uriref() or self.nodeid() or self.literal()
- if objt is False:
- raise ParseError("Unrecognised object type")
- return objt
-
- def uriref(self):
- if self.peek(b('<')):
- uri = self.eat(r_uriref).group(1)
- uri = unquote(uri)
- uri = uriquote(uri)
- return URI(uri)
- return False
-
- def nodeid(self):
- if self.peek(b('_')):
- return bNode(self.eat(r_nodeid).group(1).decode())
- return False
-
- def literal(self):
- if self.peek(b('"')):
- lit, lang, dtype = self.eat(r_literal).groups()
- if lang:
- lang = lang.decode()
- else:
- lang = None
- if dtype:
- dtype = dtype.decode()
- else:
- dtype = None
- if lang and dtype:
- raise ParseError("Can't have both a language and a datatype")
- lit = unquote(lit)
- return Literal(lit, lang, dtype)
- return False
-
-# # Obsolete, unused
-# def parseURI(uri):
-# import urllib
-# parser = NTriplesParser()
-# u = urllib.urlopen(uri)
-# sink = parser.parse(u)
-# u.close()
-# # for triple in sink:
-# # print triple
-# print 'Length of input:', sink.length
-
diff --git a/creactistore/_templates/lib/rdflib/plugins/parsers/rdfa/__init__.py b/creactistore/_templates/lib/rdflib/plugins/parsers/rdfa/__init__.py
deleted file mode 100644
index 9553349..0000000
--- a/creactistore/_templates/lib/rdflib/plugins/parsers/rdfa/__init__.py
+++ /dev/null
@@ -1,168 +0,0 @@
-"""
-From a Python file, expecting an RDF/XML pretty printed output::
-
- import rdflib.graph as g
- graph = g.Graph()
- graph.parse('filename.html', format='rdfa')
- print graph.serialize(format='pretty-xml')
-
-For details on RDFa, the reader should consult the `RDFa syntax document`__.
-
-This is an adapted version of pyRdfa (`W3C RDFa Distiller page`__) by Ivan Herman
-
-.. __: http://www.w3.org/TR/rdfa-syntax
-.. __: http://www.w3.org/2007/08/pyRdfa/
-
-"""
-
-
-import sys
-import urllib
-import xml.dom.minidom
-
-from rdflib.term import URIRef
-from rdflib.parser import Parser
-from rdflib.plugins.parsers.rdfa.state import ExecutionContext
-from rdflib.plugins.parsers.rdfa.parse import parse_one_node
-from rdflib.plugins.parsers.rdfa.options import (Options, _add_to_comment_graph,
- DIST_NS, ERROR, GENERIC_XML, XHTML_RDFA, HTML5_RDFA)
-
-from rdflib.plugins.parsers.rdfa.transform.headabout import head_about_transform
-
-__all__ = ['RDFaParser']
-
-# These are part of the RDFa spec.
-BUILT_IN_TRANSFORMERS = [
- head_about_transform
-]
-
-# Exception handling. Essentially, all the different exceptions are re-packaged
-# into separate exception class, to allow for an easier management on the user
-# level
-class RDFaError(Exception) :
- """Just a wrapper around the local exceptions. It does not add any new
- functionality to the Exception class."""
- pass
-
-# For some doctype and element name combinations an automatic switch to an
-# input mode is done
-_HOST_LANG = {
- ("http://www.w3.org/1999/xhtml", "html"): XHTML_RDFA,
- ("http://www.w3.org/2000/svg", "svg"): GENERIC_XML
-}
-
-
-class RDFaParser(Parser):
-
- def parse(self, source, sink,
- warnings=False, space_preserve=True,
- transformers=None, xhtml=True, lax=True, html5=False, encoding=None):
- if transformers is None:
- transformers = []
- options = Options(warnings, space_preserve, transformers, xhtml, lax)
- baseURI = source.getPublicId()
- stream = source.getByteStream()
- if html5:
- dom = _process_html5_source(stream, options, encoding)
- else:
- dom = _try_process_source(stream, options, encoding)
- _process_DOM(dom, baseURI, sink, options)
-
-
-def _process_DOM(dom, base, graph, options=None):
- """
- Core processing. The transformers ("pre-processing") is done on the DOM
- tree, the state is initialized, and the "real" RDFa parsing is done.
- The result is put into the provided Graph.
-
- The real work is done in the parser function ``parse_one_node()``.
-
- Params:
- dom -- XML DOM Tree node (for the top level)
- base -- URI for the default "base" value (usually the URI of the file to be processed)
-
- Options:
- obj -- `Options` for the distiller
- raise RDFaError -- when called via CGI, this encapsulates the possible
- exceptions raised by the RDFLib serializer or the processing itself
- """
- html = dom.documentElement
- # Perform the built-in and external transformations on the HTML tree. This is,
- # in simulated form, the hGRDDL approach of Ben Adida.
- for trans in options.transformers + BUILT_IN_TRANSFORMERS:
- trans(html, options)
- # Collect the initial state. This takes care of things
- # like base, top level namespace settings, etc.
- # Ensure the proper initialization.
- state = ExecutionContext(html, graph, base=base, options=options)
- # The top level subject starts with the current document; this
- # is used by the recursion
- subject = URIRef(state.base)
- # Parse the whole thing recursively and fill the graph.
- parse_one_node(html, graph, subject, state, [])
- if options.comment_graph.graph != None:
- # Add the content of the comment graph to the output.
- graph.bind("dist", DIST_NS)
- for t in options.comment_graph.graph:
- graph.add(t)
-
-def _try_process_source(stream, options, encoding):
- """
- Tries to parse input as xhtml, xml (e.g. svg) or html(5), modifying options
- while figuring out input..
-
- Returns a DOM tree.
- """
- parse = xml.dom.minidom.parse
- try:
- dom = parse(stream)
- # Try to second-guess the input type
- # This is _not_ really kosher, but the minidom is not really namespace aware...
- # In practice the goal is to have the system recognize svg content automatically
- # First see if there is a default namespace defined for the document:
- top = dom.documentElement
- if top.hasAttribute("xmlns"):
- key = (top.getAttribute("xmlns"), top.nodeName)
- if key in _HOST_LANG:
- options.host_language = _HOST_LANG[key]
- return dom
- except:
- # XML Parsing error in the input
- type, value, traceback = sys.exc_info()
- if options.host_language == GENERIC_XML or options.lax == False:
- raise RDFaError('Parsing error in input file: "%s"' % value)
-
- # XML Parsing error in the input
- msg = "XHTML Parsing error in input file: %s. Falling back on the HTML5 parser" % value
- if options != None and options.warnings:
- options.comment_graph.add_warning(msg)
-
- # in Ivan's original code he reopened the stream if it was from urllib
- if isinstance(stream, urllib.addinfourl):
- stream = urllib.urlopen(stream.url)
-
- return _process_html5_source(stream, options, encoding)
-
-
-def _process_html5_source(stream, options, encoding):
- # Now try to see if and HTML5 parser is an alternative...
- try:
- from html5lib import HTMLParser, treebuilders
- except ImportError:
- # no alternative to the XHTML error, because HTML5 parser not available...
- msg2 = 'XHTML Parsing error in input file: %s. Though parsing is lax, HTML5 parser not available. Try installing html5lib <http://code.google.com/p/html5lib>'
- raise RDFaError(msg2)
-
- parser = HTMLParser(tree=treebuilders.getTreeBuilder("dom"))
- parse = parser.parse
- try:
- dom = parse(stream, encoding)
- # The host language has changed
- options.host_language = HTML5_RDFA
- except:
- # Well, even the HTML5 parser could not do anything with this...
- (type, value, traceback) = sys.exc_info()
- msg2 = 'Parsing error in input file as HTML5: "%s"' % value
- raise RDFaError, msg2
-
- return dom
diff --git a/creactistore/_templates/lib/rdflib/plugins/parsers/rdfa/embeddedrdf.py b/creactistore/_templates/lib/rdflib/plugins/parsers/rdfa/embeddedrdf.py
deleted file mode 100644
index 4a9b015..0000000
--- a/creactistore/_templates/lib/rdflib/plugins/parsers/rdfa/embeddedrdf.py
+++ /dev/null
@@ -1,36 +0,0 @@
-# -*- coding: utf-8 -*-
-"""
-Extracting possible embedded RDF/XML content from the file and parse it separately into the Graph. This is used, for example
-by U{SVG 1.2 Tiny<http://www.w3.org/TR/SVGMobile12/>}.
-
-@author: U{Ivan Herman<a href="http://www.w3.org/People/Ivan/">}
-@license: This software is available for use under the
-U{W3C® SOFTWARE NOTICE AND LICENSE<href="http://www.w3.org/Consortium/Legal/2002/copyright-software-20021231">}
-@contact: Ivan Herman, ivan@w3.org
-"""
-
-from StringIO import StringIO
-
-__all__ = ['handle_embeddedRDF']
-
-def handle_embeddedRDF(node, graph, state):
- """
- Check if the node is the top level rdf element for RDF/XML. If so, the content is parsed and added to the target graph. Note that if an separate
- base is defined in the state, the C{xml:base} attribute will be added to the C{rdf} node before parsing.
- @param node: a DOM node for the top level xml element
- @param graph: target rdf graph
- @type graph: RDFLib's Graph object instance
- @param state: the inherited state (namespaces, lang, etc)
- @type state: L{State.ExecutionContext}
- @return: whether an RDF/XML content has been detected or not. If TRUE, the RDFa processing should not occur on the node and its descendents.
- @rtype: Boolean
-
- """
- if node.localName == "RDF" and node.namespaceURI == "http://www.w3.org/1999/02/22-rdf-syntax-ns#":
- node.setAttribute("xml:base",state.base)
- rdf = StringIO(node.toxml())
- graph.parse(rdf)
- return True
- else:
- return False
-
diff --git a/creactistore/_templates/lib/rdflib/plugins/parsers/rdfa/literal.py b/creactistore/_templates/lib/rdflib/plugins/parsers/rdfa/literal.py
deleted file mode 100644
index 2ab9b44..0000000
--- a/creactistore/_templates/lib/rdflib/plugins/parsers/rdfa/literal.py
+++ /dev/null
@@ -1,180 +0,0 @@
-# -*- coding: utf-8 -*-
-"""
-Implementation of the Literal handling. Details of the algorithm are described on
-U{RDFa Task Force's wiki page<http://www.w3.org/2006/07/SWD/wiki/RDFa/LiteralObject>}.
-
-@summary: RDFa Literal generation
-@requires: U{RDFLib package<http://rdflib.net>}
-@organization: U{World Wide Web Consortium<http://www.w3.org>}
-@author: U{Ivan Herman<a href="http://www.w3.org/People/Ivan/">}
-@license: This software is available for use under the
-U{W3C® SOFTWARE NOTICE AND LICENSE<href="http://www.w3.org/Consortium/Legal/2002/copyright-software-20021231">}
-"""
-
-import re
-from rdflib.namespace import RDF
-from rdflib.term import Literal
-
-__all__ = ['generate_literal']
-
-XMLLiteral = RDF.XMLLiteral
-
-
-def __putBackEntities(str):
- """Put 'back' entities for the '&', '<', and '>' characters, to produce kosher XML string.
- Used by XML Literal
- @param str: string to be converted
- @return: string with entities
- @rtype: string
- """
- return str.replace('&', '&amp;').replace('<', '&lt;').replace('>', '&gt;')
-
-#### The real meat...
-def generate_literal(node, graph, subject, state):
- """Generate the literal the C{@property}, taking into account datatype, etc.
- Note: this method is called only if the C{@property} is indeed present, no need to check.
-
- This method is an encoding of the algorithm documented
- U{task force's wiki page<http://www.w3.org/2006/07/SWD/wiki/RDFa/LiteralObject>}.
-
- The method returns a value whether the literal is a 'normal' literal (regardless of its datatype)
- or an XML Literal. The return value is True or False, respectively. This value is used to control whether
- the parser should stop recursion. This also means that that if the literal is generated from @content,
- the return value is False, regardless of the possible @datatype value.
-
- @param node: DOM element node
- @param graph: the (RDF) graph to add the properies to
- @param subject: the RDFLib URIRef serving as a subject for the generated triples
- @param state: the current state to be used for the CURIE-s
- @type state: L{State.ExecutionContext}
- @return: whether the literal is a 'normal' or an XML Literal (return value is True or False, respectively). Note that if the literal is generated from @content, the return value is False, regardless of the possible @datatype value.
- @rtype: Boolean
- """
- def _get_literal(Pnode):
- """
- Get (recursively) the full text from a DOM Node.
-
- @param Pnode: DOM Node
- @return: string
- """
- rc = ""
- for node in Pnode.childNodes:
- if node.nodeType == node.TEXT_NODE:
- rc = rc + node.data
- elif node.nodeType == node.ELEMENT_NODE:
- rc = rc + _get_literal(node)
-
- # The decision of the group in February 2008 is not to normalize the result by default.
- # This is reflected in the default value of the option
- if state.options.space_preserve:
- return rc
- else:
- return re.sub(r'(\r| |\n|\t)+', " ", rc).strip()
- # end getLiteral
-
- def _get_XML_literal(Pnode):
- """
- Get (recursively) the XML Literal content of a DOM Node. (Most of the processing is done
- via a C{node.toxml} call of the xml minidom implementation.)
-
- @param Pnode: DOM Node
- @return: string
- """
- def collectPrefixes(prefixes, node):
- def addPf(prefx, string):
- pf = string.split(':')[0]
- if pf != string and pf not in prefx : prefx.append(pf)
- # edn addPf
-
- # first the local name of the node
- addPf(prefixes, node.tagName)
- # get all the attributes and children
- for child in node.childNodes:
- if child.nodeType == node.ELEMENT_NODE:
- collectPrefixes(prefixes, child)
- elif child.nodeType == node.ATTRIBUTE_NODE:
- addPf(prefixes, node.child.name)
- # end collectPrefixes
-
- rc = ""
- prefixes = []
- for node in Pnode.childNodes:
- if node.nodeType == node.ELEMENT_NODE:
- collectPrefixes(prefixes, node)
-
- for node in Pnode.childNodes:
- if node.nodeType == node.TEXT_NODE:
- rc = rc + __putBackEntities(node.data)
- elif node.nodeType == node.ELEMENT_NODE:
- # Decorate the element with namespaces and lang values
- for prefix in prefixes:
- if prefix in state.ns and not node.hasAttribute("xmlns:%s" % prefix):
- node.setAttribute("xmlns:%s" % prefix, "%s" % state.ns[prefix])
- # Set the default namespace, if not done (and is available)
- if not node.getAttribute("xmlns") and state.defaultNS != None:
- node.setAttribute("xmlns", state.defaultNS)
- # Get the lang, if necessary
- if not node.getAttribute("xml:lang") and state.lang != None:
- node.setAttribute("xml:lang", state.lang)
- rc = rc + node.toxml()
- return rc
- # If XML Literals must be canonicalized for space, then this is the return line:
- #return re.sub(r'(\r| |\n|\t)+', " ", rc).strip()
- # end getXMLLiteral
-
- # Most of the times the literal is a 'normal' one, ie, not an XML Literal
- retval = True
-
- # Get the Property URI-s
- props = state.get_resources(node.getAttribute("property"), prop=True)
-
- # Get, if exists, the value of @datatype, and figure out the language
- datatype = None
- dtset = False
- lang = state.lang
- if node.hasAttribute("datatype"):
- dtset = True
- dt = node.getAttribute("datatype")
- if dt != "":
- datatype = state.get_resource(dt)
- lang = None
-
- # The simple case: separate @content attribute
- if node.hasAttribute("content"):
- val = node.getAttribute("content")
- object = Literal(node.getAttribute("content"), datatype=datatype, lang=lang)
- # The value of datatype has been set, and the keyword paramaters take care of the rest
- else:
- # see if there *is* a datatype (even if it is empty!)
- if dtset:
- # yep. The Literal content is the pure text part of the current element:
- # We have to check whether the specified datatype is, in fact, and
- # explicit XML Literal
- if datatype == XMLLiteral:
- object = Literal(_get_XML_literal(node), datatype=XMLLiteral)
- retval = False
- else:
- object = Literal(_get_literal(node), datatype=datatype, lang=lang)
- else:
- # no controlling @datatype. We have to see if there is markup in the contained
- # element
- if True in [ n.nodeType == node.ELEMENT_NODE for n in node.childNodes ]:
- # yep, and XML Literal should be generated
- object = Literal(_get_XML_literal(node), datatype=XMLLiteral)
- retval = False
- else:
- val = _get_literal(node)
- # At this point, there might be entities in the string that are returned as real characters by the dom
- # implementation. That should be turned back
- object = Literal(_get_literal(node), lang=lang)
-
- # NOTE: rdflib<2.5 didn't equal Literal with lang="", hence this check
- # proably always passed?
- # All tests pass with this check removed; going with that..
- ## The object may be empty, for example in an ill-defined <meta> element...
- if True:#object != "":
- for prop in props:
- graph.add((subject, prop, object))
-
- return retval
-
diff --git a/creactistore/_templates/lib/rdflib/plugins/parsers/rdfa/options.py b/creactistore/_templates/lib/rdflib/plugins/parsers/rdfa/options.py
deleted file mode 100644
index 0329969..0000000
--- a/creactistore/_templates/lib/rdflib/plugins/parsers/rdfa/options.py
+++ /dev/null
@@ -1,173 +0,0 @@
-# -*- coding: utf-8 -*-
-"""
-
-Options class: collect the possible options that govern the parsing possibilities. It also includes a reference and
-handling of the extra Graph for warnings, informations, errors.
-
-
-@summary: RDFa parser (distiller)
-@requires: U{RDFLib<http://rdflib.net>}
-@requires: U{html5lib<http://code.google.com/p/html5lib/>} for the HTML5 parsing; note possible dependecies on Python's version on the project's web site
-@organization: U{World Wide Web Consortium<http://www.w3.org>}
-@author: U{Ivan Herman<a href="http://www.w3.org/People/Ivan/">}
-@license: This software is available for use under the
-U{W3C® SOFTWARE NOTICE AND LICENSE<href="http://www.w3.org/Consortium/Legal/2002/copyright-software-20021231">}
-
-"""
-
-import sys
-from rdflib.graph import Graph
-from rdflib.term import BNode, Literal, URIRef
-from rdflib.namespace import Namespace
-
-__all__ = ['CommentGraph', 'Options']
-
-DIST_URI = "http://www.w3.org/2007/08/pyRdfa/distiller"
-DIST_NS = DIST_URI + '#'
-
-ns_errors = Namespace(DIST_NS)
-distillerURI = URIRef(DIST_URI)
-
-WARNING = 'warning'
-ERROR = 'error'
-INFO = 'info'
-DEBUG = 'debug'
-
-_message_properties = {
- WARNING: ns_errors["warning"],
- ERROR: ns_errors["error"],
- INFO: ns_errors["information"],
- DEBUG: ns_errors["debug"]
-}
-
-def _add_to_comment_graph(graph, msg, prop, uri):
- """
- Add a distiller message to the graph.
-
- @param graph: RDFLib Graph
- @param msg: message of an exception
- @type msg: RDFLIb Literal
- @param prop: the property to be used
- @type prop: string, must be one of 'warning', 'error', 'info', 'debug'
- @param uri: the top URI used to invoke the distiller
- @type uri: URIRef
- """
- bnode = BNode()
- graph.add((distillerURI, _message_properties[prop], bnode))
- graph.add((bnode, ns_errors["onURI"], uri))
- graph.add((bnode, ns_errors["message"], msg))
-
-
-class CommentGraph(object):
- """Class to handle the 'comment graph', ie, the (RDF) Graph containing the warnings,
- error messages, and informational messages.
- """
- def __init__(self, warnings = False):
- """
- @param warnings: whether a graph should effectively be set up, or whether this
- should just be an empty shell for the various calls to work (without effect)
- """
- if warnings:
- self.graph = Graph()
- else:
- self.graph = None
- self.accumulated_literals = []
- self.baseURI = None
-
- def _add_triple(self, msg, prop):
- obj = Literal(msg)
- if self.baseURI == None:
- self.accumulated_literals.append((obj,prop))
- elif self.graph != None:
- _add_to_comment_graph(self.graph, obj, prop, self.baseURI)
-
- def set_base_URI(self, URI):
- """Set the base URI for the comment triples.
-
- Note that this method I{must} be called at some point to complete the triples. Without it the triples
- added via L{add_warning<CommentGraph.add_warning>}, L{add_info<CommentGraph.add_info>}, etc, will not be added to the final graph.
-
- @param URI: URIRef for the subject of the comments
- """
- self.baseURI = URI
- if self.graph != None:
- for obj, prop in self.accumulated_literals:
- _add_to_comment_graph(self.graph, obj, prop, self.baseURI)
- self.accumulated_literals = []
-
- def add_warning(self, txt):
- """Add a warning. A comment triplet is added to the separate "warning" graph.
- @param txt: the warning text. It will be preceded by the string "==== pyRdfa Warning ==== "
- """
- self._add_triple(txt, WARNING)
-
- def add_info(self, txt):
- """Add an informational comment. A comment triplet is added to the separate "warning" graph.
- @param txt: the information text. It will be preceded by the string "==== pyRdfa information ==== "
- """
- self._add_triple(txt, INFO)
-
- def add_error(self, txt):
- """Add an error comment. A comment triplet is added to the separate "warning" graph.
- @param txt: the information text. It will be preceded by the string "==== pyRdfa information ==== "
- """
- self._add_triple(txt, ERROR)
-
- def _add_debug(self, txt):
- self._add_triple(txt, DEBUG)
-
-
-GENERIC_XML = 0
-XHTML_RDFA = 1
-HTML5_RDFA = 2
-
-class Options(object):
- """Settable options. An instance of this class is stored in
- the L{execution context<ExecutionContext>} of the parser.
-
- @ivar space_preserve: whether plain literals should preserve spaces at output or not
- @type space_preserve: Boolean
- @ivar comment_graph: Graph for the storage of warnings
- @type comment_graph: L{CommentGraph}
- @ivar warnings: whether warnings should be generated or not
- @type warnings: Boolean
- @ivar transformers: extra transformers
- @type transformers: list
- @type host_language: the host language for the RDFa attributes. Default is XHTML_RDFA, but it can be GENERIC_XML and HTML5_RDFA
- @ivar host_language: integer (logically: an enumeration)
- @ivar lax: whether a 'lax' parsing of XHTML (ie, HTML5) is allowed. This means that the value of the host language might change run time
- @type lax: Boolean
- """
- def __init__(self, warnings=False, space_preserve=True, transformers=[], xhtml=True, lax=False):
- """
- @param space_preserve: whether plain literals should preserve spaces at output or not
- @type space_preserve: Boolean
- @param warnings: whether warnings should be generated or not
- @type warnings: Boolean
- @param transformers: extra transformers
- @type transformers: list
- @param xhtml: initial value for the host language. If True, the value is set to XHTML_RDFA. Note that run-time the class variable might be set ot HTML5_RDFA, depending on the value of the lax flag and the result of parsing.
- @type xhtml: Booelan
- @param lax: whether a 'lax' parsing of XHTML (ie, HTML5) is allowed. This means that the value of the host language might change run time
- @type lax: Boolean
- """
- self.space_preserve = space_preserve
- self.transformers = transformers
- self.comment_graph = CommentGraph(warnings)
- self.warnings = warnings
- self.lax = lax
- if xhtml:
- self.host_language = XHTML_RDFA
- else:
- self.host_language = GENERIC_XML
-
- def __str__(self):
- retval = """Current options:
- space_preserve : %s
- warnings : %s
- lax parsing : %s
- host language : %s
- """
- return retval % (self.space_preserve, self.warnings, self.lax, self.host_language)
-
-
diff --git a/creactistore/_templates/lib/rdflib/plugins/parsers/rdfa/parse.py b/creactistore/_templates/lib/rdflib/plugins/parsers/rdfa/parse.py
deleted file mode 100644
index d5b411f..0000000
--- a/creactistore/_templates/lib/rdflib/plugins/parsers/rdfa/parse.py
+++ /dev/null
@@ -1,200 +0,0 @@
-# -*- coding: utf-8 -*-
-"""
-The core parsing function of RDFa. Some details are
-put into other modules to make it clearer to update/modify (eg, generation of literals, or managing the current state).
-
-@summary: RDFa core parser processing step
-@requires: U{RDFLib package<http://rdflib.net>}
-@organization: U{World Wide Web Consortium<http://www.w3.org>}
-@author: U{Ivan Herman<a href="http://www.w3.org/People/Ivan/">}
-@license: This software is available for use under the
-U{W3C® SOFTWARE NOTICE AND LICENSE<href="http://www.w3.org/Consortium/Legal/2002/copyright-software-20021231">}
-"""
-
-from rdflib.term import BNode, URIRef
-from rdflib.namespace import RDF
-
-from rdflib.plugins.parsers.rdfa.state import ExecutionContext
-from rdflib.plugins.parsers.rdfa.literal import generate_literal
-from rdflib.plugins.parsers.rdfa.embeddedrdf import handle_embeddedRDF
-from rdflib.plugins.parsers.rdfa.options import GENERIC_XML, XHTML_RDFA, HTML5_RDFA
-
-__all__ = ['parse_one_node']
-
-def parse_one_node(node, graph, parent_object, incoming_state, parent_incomplete_triples):
- """The (recursive) step of handling a single node. See the
- U{RDFa syntax document<http://www.w3.org/TR/rdfa-syntax>} for further details.
-
- @param node: the DOM node to handle
- @param graph: the RDF graph
- @type graph: RDFLib's Graph object instance
- @param parent_object: the parent's object, as an RDFLib URIRef
- @param incoming_state: the inherited state (namespaces, lang, etc)
- @type incoming_state: L{State.ExecutionContext}
- @param parent_incomplete_triples: list of hanging triples (the missing resource set to None) to be handled (or not)
- by the current node.
- @return: whether the caller has to complete it's parent's incomplete triples
- @rtype: Boolean
- """
- def _get_resources_for_attr(attr):
- """Get a series of resources encoded via CURIE-s for an attribute on a specific node.
- @param attr: the name of the attribute
- @return: a list of RDFLib URIRef instances
- """
- if not node.hasAttribute(attr):
- return []
- else:
- rel = (attr == "rel") or (attr == "rev")
- prop = (attr == "property")
- return state.get_resources(node.getAttribute(attr), rel, prop)
-
- # Update the state. This means, for example, the possible local settings of
- # namespaces and lang
- state = ExecutionContext(node, graph, inherited_state=incoming_state)
-
- #---------------------------------------------------------------------------------
- # Handle the special case for embedded RDF, eg, in SVG1.2.
- # This may add some triples to the target graph that does not originate from RDFa parsing
- # If the function return TRUE, that means that an rdf:RDF has been found. No
- # RDFa parsing should be done on that subtree, so we simply return...
- if state.options.host_language == GENERIC_XML and node.nodeType == node.ELEMENT_NODE and handle_embeddedRDF(node, graph, state):
- return
-
- #---------------------------------------------------------------------------------
- # First, let us check whether there is anything to do at all. Ie,
- # whether there is any relevant RDFa specific attribute on the element
- #
- if not _has_one_of_attributes(node, "href", "resource", "about", "property", "rel", "rev", "typeof", "src"):
- # nop, there is nothing to do here, just go down the tree and return...
- for n in node.childNodes:
- if n.nodeType == node.ELEMENT_NODE : parse_one_node(n, graph, parent_object, state, parent_incomplete_triples)
- return
-
-
- #-----------------------------------------------------------------
- # The goal is to establish the subject and object for local processing
- # The behaviour is slightly different depending on the presense or not
- # of the @rel/@rev attributes
- current_subject = None
- current_object = None
-
- if _has_one_of_attributes(node, "rel", "rev"):
- # in this case there is the notion of 'left' and 'right' of @rel/@rev
- # in establishing the new Subject and the objectResource
-
- # set first the subject
- if node.hasAttribute("about"):
- current_subject = state.get_Curie_ref(node.getAttribute("about"))
- elif node.hasAttribute("src"):
- current_subject = state.get_URI_ref(node.getAttribute("src"))
- elif node.hasAttribute("typeof"):
- current_subject = BNode()
-
- # get_URI_ref may return None in case of an illegal Curie, so
- # we have to be careful here, not use only an 'else'
- if current_subject == None:
- current_subject = parent_object
-
- # set the object resource
- if node.hasAttribute("resource"):
- current_object = state.get_Curie_ref(node.getAttribute("resource"))
- elif node.hasAttribute("href"):
- current_object = state.get_URI_ref(node.getAttribute("href"))
- else:
- # in this case all the various 'resource' setting attributes
- # behave identically, except that their value might be different
- # in terms of CURIE-s and they also have their own priority, of course
- if node.hasAttribute("about"):
- current_subject = state.get_Curie_ref(node.getAttribute("about"))
- elif node.hasAttribute("src"):
- current_subject = state.get_URI_ref(node.getAttribute("src"))
- elif node.hasAttribute("resource"):
- current_subject = state.get_Curie_ref(node.getAttribute("resource"))
- elif node.hasAttribute("href"):
- current_subject = state.get_URI_ref(node.getAttribute("href"))
- elif node.hasAttribute("typeof"):
- current_subject = BNode()
-
- # get_URI_ref may return None in case of an illegal Curie, so
- # we have to be careful here, not use only an 'else'
- if current_subject == None:
- current_subject = parent_object
-
- # in this case no non-literal triples will be generated, so the
- # only role of the current_objectResource is to be transferred to
- # the children node
- current_object = current_subject
-
- # ---------------------------------------------------------------------
- # The possible typeof indicates a number of type statements on the newSubject
- for defined_type in _get_resources_for_attr("typeof"):
- graph.add((current_subject, RDF.type, defined_type))
-
- # ---------------------------------------------------------------------
- # In case of @rel/@rev, either triples or incomplete triples are generated
- # the (possible) incomplete triples are collected, to be forwarded to the children
- incomplete_triples = []
- for prop in _get_resources_for_attr("rel"):
- theTriple = (current_subject, prop, current_object)
- if current_object != None:
- graph.add(theTriple)
- else:
- incomplete_triples.append(theTriple)
- for prop in _get_resources_for_attr("rev"):
- theTriple = (current_object, prop, current_subject)
- if current_object != None:
- graph.add(theTriple)
- else:
- incomplete_triples.append(theTriple)
-
- # ----------------------------------------------------------------------
- # Generation of the literal values. The newSubject is the subject
- # A particularity of property is that it stops the parsing down the DOM tree if an XML Literal is generated,
- # because everything down there is part of the generated literal. For this purpose the recurse flag is set (and used later
- # in the parsing process).
- if node.hasAttribute("property"):
- # Generate the literal. It has been put it into a separate module to make it more managable
- # the overall return value should be set to true if any valid triple has been generated
- recurse = generate_literal(node, graph, current_subject, state)
- else:
- recurse = True
-
- # ----------------------------------------------------------------------
- # Setting the current object to a bnode is setting up a possible resource
- # for the incomplete triples downwards
- if current_object == None:
- object_to_children = BNode()
- else:
- object_to_children = current_object
-
- #-----------------------------------------------------------------------
- # Here is the recursion step for all the children
- if recurse:
- for n in node.childNodes:
- if n.nodeType == node.ELEMENT_NODE:
- parse_one_node(n, graph, object_to_children, state, incomplete_triples)
-
- # ---------------------------------------------------------------------
- # At this point, the parent's incomplete triples may be completed
- for s, p, o in parent_incomplete_triples:
- if s == None: s = current_subject
- if o == None: o = current_subject
- graph.add((s, p, o))
-
- # -------------------------------------------------------------------
- # This should be it...
- # -------------------------------------------------------------------
- return
-
-
-def _has_one_of_attributes(node, *args):
- """
- Check whether one of the listed attributes is present on a (DOM) node.
- @param node: DOM element node
- @param args: possible attribute names
- @return: True or False
- @rtype: Boolean
- """
- return True in [ node.hasAttribute(attr) for attr in args ]
-
-
diff --git a/creactistore/_templates/lib/rdflib/plugins/parsers/rdfa/state.py b/creactistore/_templates/lib/rdflib/plugins/parsers/rdfa/state.py
deleted file mode 100644
index 31caf41..0000000
--- a/creactistore/_templates/lib/rdflib/plugins/parsers/rdfa/state.py
+++ /dev/null
@@ -1,434 +0,0 @@
-# -*- coding: utf-8 -*-
-"""
-Parser's execution context (a.k.a. state) object and handling. The state includes:
-
- - dictionary for namespaces. Keys are the namespace prefixes, values are RDFLib Namespace instances
- - language, retrieved from C{@xml:lang}
- - URI base, determined by <base> (or set explicitly). This is a little bit superfluous, because the current RDFa syntax does not make use of C{@xml:base}; ie, this could be a global value. But the structure is prepared to add C{@xml:base} easily, if needed.
- - options, in the form of an L{Options<pyRdfa.Options>} instance
-
-The execution context object is also used to turn relative URI-s and CURIES into real URI references.
-
-@summary: RDFa core parser processing step
-@requires: U{RDFLib package<http://rdflib.net>}
-@organization: U{World Wide Web Consortium<http://www.w3.org>}
-@author: U{Ivan Herman<a href="http://www.w3.org/People/Ivan/">}
-@license: This software is available for use under the
-U{W3C® SOFTWARE NOTICE AND LICENSE<href="http://www.w3.org/Consortium/Legal/2002/copyright-software-20021231">}
-
-@var XHTML_PREFIX: prefix for the XHTML vocabulary namespace
-@var XHTML_URI: URI prefix of the XHTML vocabulary
-@var RDFa_PROFILE: the official RDFa profile URI
-@var RDFa_VERSION: the official version string of RDFa
-@var usual_protocols: list of "usual" protocols (used to generate warnings when CURIES are not protected)
-@var _predefined_rel: list of predefined C{@rev} and C{@rel} values that should be mapped onto the XHTML vocabulary URI-s.
-@var _predefined_property: list of predefined C{@property} values that should be mapped onto the XHTML vocabulary URI-s. (At present, this list is empty, but this has been an ongoing question in the group, so the I{mechanism} of checking is still there.)
-@var __bnodes: dictionary of blank node names to real blank node
-@var __empty_bnode: I{The} Bnode to be associated with the CURIE of the form "C{_:}".
-"""
-
-from rdflib.namespace import Namespace, RDF, RDFS
-from rdflib.term import BNode, URIRef
-from rdflib.plugins.parsers.rdfa.options import Options, GENERIC_XML, XHTML_RDFA, HTML5_RDFA
-
-import re
-import random
-import urlparse
-
-__all__ = ['ExecutionContext']
-
-RDFa_PROFILE = "http://www.w3.org/1999/xhtml/vocab"
-RDFa_VERSION = "XHTML+RDFa 1.0"
-RDFa_PublicID = "-//W3C//DTD XHTML+RDFa 1.0//EN"
-RDFa_SystemID = "http://www.w3.org/MarkUp/DTD/xhtml-rdfa-1.dtd"
-
-usual_protocols = ["http", "https", "mailto", "ftp", "urn", "gopher", "tel", "ldap", "doi", "news"]
-
-####Predefined @rel/@rev/@property values
-# predefined values for the @rel and @rev values. These are considered to be part of a specific
-# namespace, defined by the RDFa document.
-# At the moment, there are no predefined @property values, but the code is there in case
-# some will be defined
-XHTML_PREFIX = "xhv"
-XHTML_URI = "http://www.w3.org/1999/xhtml/vocab#"
-
-_predefined_rel = ['alternate', 'appendix', 'cite', 'bookmark', 'chapter', 'contents',
-'copyright', 'glossary', 'help', 'icon', 'index', 'meta', 'next', 'p3pv1', 'prev',
-'role', 'section', 'subsection', 'start', 'license', 'up', 'last', 'stylesheet', 'first', 'top']
-
-_predefined_property = []
-
-#### Managing blank nodes for CURIE-s
-__bnodes = {}
-__empty_bnode = BNode()
-def _get_bnode_from_Curie(var):
- """
- 'Var' gives the string after the coloumn in a CURIE of the form C{_:XXX}. If this variable has been used
- before, then the corresponding BNode is returned; otherwise a new BNode is created and
- associated to that value.
- @param var: CURIE BNode identifier
- @return: BNode
- """
- if len(var) == 0:
- return __empty_bnode
- if var in __bnodes:
- return __bnodes[var]
- else:
- retval = BNode()
- __bnodes[var] = retval
- return retval
-
-#### Quote URI-s
-import urllib
-# 'safe' characters for the URI quoting, ie, characters that can safely stay as they are. Other
-# special characters are converted to their %.. equivalents for namespace prefixes
-_unquotedChars = ':/\?=#'
-_warnChars = [' ', '\n', '\r', '\t']
-def _quote(uri, options):
- """
- 'quote' a URI, ie, exchange special characters for their '%..' equivalents. Some of the characters
- may stay as they are (listed in L{_unquotedChars}. If one of the characters listed in L{_warnChars}
- is also in the uri, an extra warning is also generated.
- @param uri: URI
- @param options:
- @type options: L{Options<pyRdfa.Options>}
- """
- suri = uri.strip()
- for c in _warnChars:
- if suri.find(c) != -1:
- if options != None:
- options.comment_graph.add_warning('Unusual character in uri:%s; possible error?' % suri)
- break
- return urllib.quote(suri, _unquotedChars)
-
-
-#### Core Class definition
-class ExecutionContext(object):
- """State at a specific node, including the current set
- of namespaces in the RDFLib sense, the
- current language, and the base. The class is also used to interpret URI-s and CURIE-s to produce
- URI references for RDFLib.
-
- @ivar options: reference to the overall options
- @type ivar: L{Options.Options}
- @ivar base: the 'base' URI
- @ivar defaultNS: default namespace
- @ivar lang: language tag (possibly None)
- @ivar ns: dictionary of namespaces
- @type ns: dictionary, each value is an RDFLib Namespace object
-
- """
- def __init__(self, node, graph, inherited_state=None, base="", options=None):
- """
- @param node: the current DOM Node
- @param graph: the RDFLib Graph
- @keyword inherited_state: the state as inherited
- from upper layers. This inherited_state is mixed with the state information
- retrieved from the current node.
- @type inherited_state: L{State.ExecutionContext}
- @keyword base: string denoting the base URI for the specific node. This overrides the possible
- base inherited from the upper layers. The
- current XHTML+RDFa syntax does not allow the usage of C{@xml:base}, but SVG1.2 does, so this is
- necessary for SVG (and other possible XML dialects that accept C{@xml:base})
- @keyword options: invocation option
- @type options: L{Options<pyRdfa.Options>}
- """
- #-----------------------------------------------------------------
- # settling the base
- # note that, strictly speaking, it is not necessary to add the base to the
- # context, because there is only one place to set it (<base> element of the <header>).
- # It is done because it is prepared for a possible future change in direction of
- # accepting xml:base on each element.
- # At the moment, it is invoked with a 'None' at the top level of parsing, that is
- # when the <base> element is looked for.
- if inherited_state:
- self.base = inherited_state.base
- self.options = inherited_state.options
- # for generic XML versions the xml:base attribute should be handled
- if self.options.host_language == GENERIC_XML and node.hasAttribute("xml:base"):
- self.base = node.getAttribute("xml:base")
- else:
- # this is the branch called from the very top
- self.base = ""
- for bases in node.getElementsByTagName("base"):
- if bases.hasAttribute("href"):
- self.base = bases.getAttribute("href")
- continue
- if self.base == "":
- self.base = base
-
- # this is just to play safe. I believe this branch should actually not happen...
- if options == None:
- from pyRdfa import Options
- self.options = Options()
- else:
- self.options = options
-
- # xml:base is not part of XHTML+RDFa, but it is a valid setting for, say, SVG1.2
- if self.options.host_language == GENERIC_XML and node.hasAttribute("xml:base"):
- self.base = node.getAttribute("xml:base")
-
- self.options.comment_graph.set_base_URI(URIRef(_quote(base, self.options)))
-
- # check the the presense of the @profile and or @version attribute for the RDFa profile...
- # This whole branch is, however, irrelevant if the host language is a generic XML one (eg, SVG)
- if self.options.host_language != GENERIC_XML:
- doctype = None
- try:
- # I am not 100% sure the HTML5 minidom implementation has this, so let us just be
- # cautious here...
- doctype = node.ownerDocument.doctype
- except:
- pass
- if doctype == None or not( doctype.publicId == RDFa_PublicID and doctype.systemId == RDFa_SystemID ):
- # next level: check the version
- html = node.ownerDocument.documentElement
- if not( html.hasAttribute("version") and RDFa_VERSION == html.getAttribute("version") ):
- # see if least the profile has been set
- # Find the <head> element
- head = None
- for index in range(0, html.childNodes.length-1):
- if html.childNodes.item(index).nodeName == "head":
- head = html.childNodes.item(index)
- break
- if not( head != None and head.hasAttribute("profile") and RDFa_PROFILE in head.getAttribute("profile").strip().split() ):
- if self.options.host_language == HTML5_RDFA:
- self.options.comment_graph.add_info("RDFa profile or RFDa version has not been set (for a correct identification of RDFa). This is not a requirement for RDFa, but it is advised to use one of those nevertheless. Note that in the case of HTML5, the DOCTYPE setting may not work...")
- else:
- self.options.comment_graph.add_info("None of the RDFa DOCTYPE, RDFa profile, or RFDa version has been set (for a correct identification of RDFa). This is not a requirement for RDFa, but it is advised to use one of those nevertheless.")
-
- #-----------------------------------------------------------------
- # Stripping the fragment ID from the base URI, as demanded by RFC 3986
- self.base = urlparse.urldefrag(self.base)[0]
-
- #-----------------------------------------------------------------
- # Settling the language tags
- # check first the lang or xml:lang attribute
- # RDFa does not allow the lang attribute. HTML5 relies :-( on @lang;
- # I just want to be prepared here...
- if options != None and options.host_language == HTML5_RDFA and node.hasAttribute("lang"):
- self.lang = node.getAttribute("lang")
- if len(self.lang) == 0 : self.lang = None
- elif node.hasAttribute("xml:lang"):
- self.lang = node.getAttribute("xml:lang")
- if len(self.lang) == 0 : self.lang = None
- elif inherited_state:
- self.lang = inherited_state.lang
- else:
- self.lang = None
-
- #-----------------------------------------------------------------
- # Handling namespaces
- # First get the local xmlns declarations/namespaces stuff.
- dict = {}
- for i in range(0, node.attributes.length):
- attr = node.attributes.item(i)
- if attr.name.find('xmlns:') == 0 :
- # yep, there is a namespace setting
- key = attr.localName
- if key != "" : # exclude the top level xmlns setting...
- if key == "_":
- if warning: self.options.comment_graph.add_error("The '_' local CURIE prefix is reserved for blank nodes, and cannot be changed" )
- elif key.find(':') != -1:
- if warning: self.options.comment_graph.add_error("The character ':' is not valid in a CURIE Prefix" )
- else :
- # quote the URI, ie, convert special characters into %.. This is
- # true, for example, for spaces
- uri = _quote(attr.value, self.options)
- # 1. create a new Namespace entry
- ns = Namespace(uri)
- # 2. 'bind' it in the current graph to
- # get a nicer output
- graph.bind(key, uri)
- # 3. Add an entry to the dictionary
- dict[key] = ns
-
- # See if anything has been collected at all.
- # If not, the namespaces of the incoming state is
- # taken over
- self.ns = {}
- if len(dict) == 0 and inherited_state:
- self.ns = inherited_state.ns
- else:
- if inherited_state:
- for k in inherited_state.ns : self.ns[k] = inherited_state.ns[k]
- # copying the newly found namespace, possibly overwriting
- # incoming values
- for k in dict : self.ns[k] = dict[k]
- else:
- self.ns = dict
-
- # see if the xhtml core vocabulary has been set
- self.xhtml_prefix = None
- for key in self.ns.keys():
- if XHTML_URI == str(self.ns[key]):
- self.xhtml_prefix = key
- break
- if self.xhtml_prefix == None:
- if XHTML_PREFIX not in self.ns:
- self.ns[XHTML_PREFIX] = Namespace(XHTML_URI)
- self.xhtml_prefix = XHTML_PREFIX
- else:
- # the most disagreeable thing, the user has used
- # the prefix for something else...
- self.xhtml_prefix = XHTML_PREFIX + '_' + ("%d" % random.randint(1, 1000))
- self.ns[self.xhtml_prefix] = Namespace(XHTML_URI)
- graph.bind(self.xhtml_prefix, XHTML_URI)
-
- # extra tricks for unusual usages...
- # if the 'rdf' prefix is not used, it is artificially added...
- if "rdf" not in self.ns:
- self.ns["rdf"] = RDF
- if "rdfs" not in self.ns:
- self.ns["rdfs"] = RDFS
-
- # Final touch: setting the default namespace...
- if node.hasAttribute("xmlns"):
- self.defaultNS = node.getAttribute("xmlns")
- elif inherited_state and inherited_state.defaultNS != None:
- self.defaultNS = inherited_state.defaultNS
- else:
- self.defaultNS = None
-
- def _get_predefined_rels(self, val, warning):
- """Get the predefined URI value for the C{@rel/@rev} attribute.
- @param val: attribute name
- @param warning: whether a warning should be generated or not
- @type warning: boolean
- @return: URIRef for the predefined URI (or None)
- """
- vv = val.strip().lower()
- if vv in _predefined_rel:
- return self.ns[self.xhtml_prefix][vv]
- else:
- if warning: self.options.comment_graph.add_warning("invalid @rel/@rev value: '%s'" % val)
- return None
-
- def _get_predefined_properties(self, val, warning):
- """Get the predefined value for the C{@property} attribute.
- @param val: attribute name
- @param warning: whether a warning should be generated or not
- @type warning: boolean
- @return: URIRef for the predefined URI (or None)
- """
- vv = val.strip().lower()
- if vv in _predefined_property:
- return self.ns[self.xhtml_prefix][vv]
- else:
- if warning: self.options.comment_graph.add_warning("invalid @property value: '%s'" % val)
- return None
-
- def get_resource(self, val, rel=False, prop=False, warning=True):
- """Get a resource for a CURIE.
- The input argument is a CURIE; this is interpreted
- via the current namespaces and the corresponding URI Reference is returned
- @param val: string of the form "prefix:lname"
- @keyword rel: whether the predefined C{@rel/@rev} values should also be interpreted
- @keyword prop: whether the predefined C{@property} values should also be interpreted
- @return: an RDFLib URIRef instance (or None)
- """
- if val == "":
- return None
- elif val.find(":") != -1:
- key = val.split(":", 1)[0]
- lname = val.split(":", 1)[1]
- if key == "_":
- # A possible error: this method is invoked for property URI-s, which
- # should not refer to a blank node. This case is checked and a possible
- # error condition is handled
- self.options.comment_graph.add_error("Blank node CURIE cannot be used in property position: _:%s" % lname)
- return None
- if key == "":
- # This is the ":blabla" case
- key = self.xhtml_prefix
- else:
- # if the resources correspond to a @rel or @rev or @property, then there
- # may be one more possibility here, namely that it is one of the
- # predefined values
- if rel:
- return self._get_predefined_rels(val, warning)
- elif prop:
- return self._get_predefined_properties(val, warning)
- else:
- self.options.comment_graph.add_warning("Invalid CURIE (without prefix): '%s'" % val)
- return None
-
- if key not in self.ns:
- self.options.comment_graph.add_error("CURIE used with non declared prefix: %s" % key)
- return None
- else:
- if lname == "":
- return URIRef(str(self.ns[key]))
- else:
- return self.ns[key][lname]
-
- def get_resources(self, val, rel=False, prop=False):
- """Get a series of resources encoded in CURIE-s.
- The input argument is a list of CURIE-s; these are interpreted
- via the current namespaces and the corresponding URI References are returned.
- @param val: strings of the form prefix':'lname, separated by space
- @keyword rel: whether the predefined C{@rel/@rev} values should also be interpreted
- @keyword prop: whether the predefined C{@property} values should also be interpreted
- @return: a list of RDFLib URIRef instances (possibly empty)
- """
- val.strip()
- resources = [ self.get_resource(v, rel, prop) for v in val.split() if v != None ]
- return [ r for r in resources if r != None ]
-
- def get_URI_ref(self, val):
- """Create a URI RDFLib resource for a URI.
- The input argument is a URI. It is checked whether it is a local
- reference with a '#' or not. If yes, a URIRef combined with the
- stored base value is returned. In both cases a URIRef for a full URI is created
- and returned
- @param val: URI string
- @return: an RDFLib URIRef instance
- """
- if val == "":
- return URIRef(self.base)
- elif val[0] == '[' and val[-1] == ']':
- self.options.comment_graph.add_error("Illegal usage of CURIE: %s" % val)
- return None
- else:
- return URIRef(urlparse.urljoin(self.base, val))
-
- def get_Curie_ref(self, val):
- """Create a URI RDFLib resource for a CURIE.
- The input argument is a CURIE. This means that it is:
- - either of the form [a:b] where a:b should be resolved as an
- 'unprotected' CURIE, or
- - it is a traditional URI (relative or absolute)
-
- If the second case the URI value is also compared to 'usual' URI
- protocols ('http', 'https', 'ftp', etc) (see L{usual_protocols}).
- If there is no match, a warning is generated (indeed, a frequent
- mistake in authoring RDFa is to forget the '[' and ']' characters to
- "protect" CURIE-s.)
-
- @param val: CURIE string
- @return: an RDFLib URIRef instance
- """
- if len(val) == 0:
- return URIRef(self.base)
- elif val[0] == "[":
- if val[-1] == "]":
- curie = val[1:-1]
- # A possible Blank node reference should be separated here:
- if len(curie) >= 2 and curie[0] == "_" and curie[1] == ":":
- return _get_bnode_from_Curie(curie[2:])
- else:
- return self.get_resource(val[1:-1])
- else:
- # illegal CURIE...
- self.options.comment_graph.add_error("Illegal CURIE: %s" % val)
- return None
- else:
- # check the value, to see if an error may have been made...
- # Usual protocol values in the URI
- v = val.strip().lower()
- protocol = urlparse.urlparse(val)[0]
- if protocol != "" and protocol not in usual_protocols:
- err = "Possible URI error with '%s'; the intention may have been to use a protected CURIE" % val
- self.options.comment_graph.add_warning(err)
- return self.get_URI_ref(val)
-
diff --git a/creactistore/_templates/lib/rdflib/plugins/parsers/rdfa/transform/__init__.py b/creactistore/_templates/lib/rdflib/plugins/parsers/rdfa/transform/__init__.py
deleted file mode 100644
index e69de29..0000000
--- a/creactistore/_templates/lib/rdflib/plugins/parsers/rdfa/transform/__init__.py
+++ /dev/null
diff --git a/creactistore/_templates/lib/rdflib/plugins/parsers/rdfa/transform/headabout.py b/creactistore/_templates/lib/rdflib/plugins/parsers/rdfa/transform/headabout.py
deleted file mode 100644
index 0cf8f7a..0000000
--- a/creactistore/_templates/lib/rdflib/plugins/parsers/rdfa/transform/headabout.py
+++ /dev/null
@@ -1,27 +0,0 @@
-# -*- coding: utf-8 -*-
-"""
-Simple transfomer: the C{@about=""} is added to the C{<head>} and C{<body>} elements (unless something is already there).
-Note that this transformer is always invoked by the parser because this behaviour is mandated by the RDFa syntax.
-
-@summary: Add a top "about" to <head> and <body>
-@requires: U{RDFLib package<http://rdflib.net>}
-@organization: U{World Wide Web Consortium<http://www.w3.org>}
-@author: U{Ivan Herman<a href="http://www.w3.org/People/Ivan/">}
-@license: This software is available for use under the
-U{W3C® SOFTWARE NOTICE AND LICENSE<href="http://www.w3.org/Consortium/Legal/2002/copyright-software-20021231">}
-@contact: Ivan Herman, ivan@w3.org
-"""
-
-def head_about_transform(html, options):
- """
- @param html: a DOM node for the top level html element
- @param options: invocation options
- @type options: L{Options<pyRdfa.Options>}
- """
- for top in html.getElementsByTagName("head"):
- if not top.hasAttribute("about"):
- top.setAttribute("about", "")
- for top in html.getElementsByTagName("body"):
- if not top.hasAttribute("about"):
- top.setAttribute("about", "")
-
diff --git a/creactistore/_templates/lib/rdflib/plugins/parsers/rdfxml.py b/creactistore/_templates/lib/rdflib/plugins/parsers/rdfxml.py
deleted file mode 100644
index 00e8d6a..0000000
--- a/creactistore/_templates/lib/rdflib/plugins/parsers/rdfxml.py
+++ /dev/null
@@ -1,579 +0,0 @@
-# Copyright (c) 2002, Daniel Krech, http://eikeon.com/
-# All rights reserved.
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions are
-# met:
-#
-# * Redistributions of source code must retain the above copyright
-# notice, this list of conditions and the following disclaimer.
-#
-# * Redistributions in binary form must reproduce the above
-# copyright notice, this list of conditions and the following
-# disclaimer in the documentation and/or other materials provided
-# with the distribution.
-#
-# * Neither the name of Daniel Krech nor the names of its
-# contributors may be used to endorse or promote products derived
-# from this software without specific prior written permission.
-#
-# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-"""
-"""
-from xml.sax import make_parser
-from xml.sax.handler import ErrorHandler
-from xml.sax.saxutils import handler, quoteattr, escape
-from urlparse import urljoin, urldefrag
-
-from rdflib.namespace import RDF, is_ncname
-from rdflib.term import URIRef
-from rdflib.term import BNode
-from rdflib.term import Literal
-from rdflib.exceptions import ParserError, Error
-from rdflib.parser import Parser
-
-__all__ = ['create_parser', 'BagID', 'ElementHandler', 'RDFXMLHandler', 'RDFXMLParser']
-
-RDFNS = RDF
-
-# http://www.w3.org/TR/rdf-syntax-grammar/#eventterm-attribute-URI
-# A mapping from unqualified terms to there qualified version.
-UNQUALIFIED = {"about" : RDF.about,
- "ID" : RDF.ID,
- "type" : RDF.type,
- "resource": RDF.resource,
- "parseType": RDF.parseType}
-
-# http://www.w3.org/TR/rdf-syntax-grammar/#coreSyntaxTerms
-CORE_SYNTAX_TERMS = [RDF.RDF, RDF.ID, RDF.about, RDF.parseType, RDF.resource, RDF.nodeID, RDF.datatype]
-
-# http://www.w3.org/TR/rdf-syntax-grammar/#syntaxTerms
-SYNTAX_TERMS = CORE_SYNTAX_TERMS + [RDF.Description, RDF.li]
-
-# http://www.w3.org/TR/rdf-syntax-grammar/#oldTerms
-OLD_TERMS = [
- URIRef("http://www.w3.org/1999/02/22-rdf-syntax-ns#aboutEach"),
- URIRef("http://www.w3.org/1999/02/22-rdf-syntax-ns#aboutEachPrefix"),
- URIRef("http://www.w3.org/1999/02/22-rdf-syntax-ns#bagID")]
-
-NODE_ELEMENT_EXCEPTIONS = CORE_SYNTAX_TERMS + [RDF.li,] + OLD_TERMS
-NODE_ELEMENT_ATTRIBUTES = [RDF.ID, RDF.nodeID, RDF.about]
-
-PROPERTY_ELEMENT_EXCEPTIONS = CORE_SYNTAX_TERMS + [RDF.Description,] + OLD_TERMS
-PROPERTY_ATTRIBUTE_EXCEPTIONS = CORE_SYNTAX_TERMS + [RDF.Description, RDF.li] + OLD_TERMS
-PROPERTY_ELEMENT_ATTRIBUTES = [RDF.ID, RDF.resource, RDF.nodeID]
-
-XMLNS = "http://www.w3.org/XML/1998/namespace"
-BASE = (XMLNS, "base")
-LANG = (XMLNS, "lang")
-
-
-class BagID(URIRef):
- __slots__ = ['li']
- def __init__(self, val):
- super(URIRef, self).__init__(val)
- self.li = 0
-
- def next_li(self):
- self.li += 1
- return RDFNS[self.li]
-
-
-class ElementHandler(object):
- __slots__ = ['start', 'char', 'end', 'li', 'id',
- 'base', 'subject', 'predicate', 'object',
- 'list', 'language', 'datatype', 'declared', 'data']
- def __init__(self):
- self.start = None
- self.char = None
- self.end = None
- self.li = 0
- self.id = None
- self.base = None
- self.subject = None
- self.object = None
- self.list = None
- self.language = None
- self.datatype = None
- self.declared = None
- self.data = None
-
- def next_li(self):
- self.li += 1
- return RDFNS[self.li]
-
-
-class RDFXMLHandler(handler.ContentHandler):
-
- def __init__(self, store):
- self.store = store
- self.preserve_bnode_ids = False
- self.reset()
-
- def reset(self):
- document_element = ElementHandler()
- document_element.start = self.document_element_start
- document_element.end = lambda name, qname: None
- self.stack = [None, document_element,]
- self.ids = {} # remember IDs we have already seen
- self.bnode = {}
- self._ns_contexts = [{}] # contains uri -> prefix dicts
- self._current_context = self._ns_contexts[-1]
-
- # ContentHandler methods
-
- def setDocumentLocator(self, locator):
- self.locator = locator
-
- def startDocument(self):
- pass
-
- def startPrefixMapping(self, prefix, namespace):
- self._ns_contexts.append(self._current_context.copy())
- self._current_context[namespace] = prefix
- self.store.bind(prefix, URIRef(namespace), override=False)
-
- def endPrefixMapping(self, prefix):
- self._current_context = self._ns_contexts[-1]
- del self._ns_contexts[-1]
-
- def startElementNS(self, name, qname, attrs):
- stack = self.stack
- stack.append(ElementHandler())
- current = self.current
- parent = self.parent
- base = attrs.get(BASE, None)
- if base is not None:
- base, frag = urldefrag(base)
- if parent and parent.base:
- base = urljoin(parent.base, base)
- else:
- systemId = self.locator.getPublicId() or self.locator.getSystemId()
- if systemId:
- base = urljoin(systemId, base)
- else:
- if parent:
- base = parent.base
- if base is None:
- systemId = self.locator.getPublicId() or self.locator.getSystemId()
- if systemId:
- base, frag = urldefrag(systemId)
- current.base = base
- language = attrs.get(LANG, None)
- if language is None:
- if parent:
- language = parent.language
- current.language = language
- current.start(name, qname, attrs)
-
- def endElementNS(self, name, qname):
- self.current.end(name, qname)
- self.stack.pop()
-
- def characters(self, content):
- char = self.current.char
- if char:
- char(content)
-
- def ignorableWhitespace(self, content):
- pass
-
- def processingInstruction(self, target, data):
- pass
-
- def add_reified(self, sid, (s, p, o)):
- self.store.add((sid, RDF.type, RDF.Statement))
- self.store.add((sid, RDF.subject, s))
- self.store.add((sid, RDF.predicate, p))
- self.store.add((sid, RDF.object, o))
-
- def error(self, message):
- locator = self.locator
- info = "%s:%s:%s: " % (locator.getSystemId(),
- locator.getLineNumber(), locator.getColumnNumber())
- raise ParserError(info + message)
-
- def get_current(self):
- return self.stack[-2]
- # Create a read only property called current so that self.current
- # give the current element handler.
- current = property(get_current)
-
- def get_next(self):
- return self.stack[-1]
- # Create a read only property that gives the element handler to be
- # used for the next element.
- next = property(get_next)
-
- def get_parent(self):
- return self.stack[-3]
- # Create a read only property that gives the current parent
- # element handler
- parent = property(get_parent)
-
- def absolutize(self, uri):
- result = urljoin(self.current.base, uri, allow_fragments=1)
- if uri and uri[-1]=="#" and result[-1]!="#":
- result = "%s#" % result
- return URIRef(result)
-
- def convert(self, name, qname, attrs):
- if name[0] is None:
- name = URIRef(name[1])
- else:
- name = URIRef("".join(name))
- atts = {}
- for (n, v) in attrs.items(): #attrs._attrs.iteritems(): #
- if n[0] is None:
- att = URIRef(n[1])
- else:
- att = URIRef("".join(n))
- if att.startswith(XMLNS) or att[0:3].lower()=="xml":
- pass
- elif att in UNQUALIFIED:
- #if not RDFNS[att] in atts:
- atts[RDFNS[att]] = v
- else:
- atts[URIRef(att)] = v
- return name, atts
-
- def document_element_start(self, name, qname, attrs):
- if name[0] and URIRef("".join(name)) == RDF.RDF:
- # Cheap hack so 2to3 doesn't turn it into __next__
- next = getattr(self, 'next')
- next.start = self.node_element_start
- next.end = self.node_element_end
- else:
- self.node_element_start(name, qname, attrs)
- #self.current.end = self.node_element_end
- # TODO... set end to something that sets start such that
- # another element will cause error
-
-
- def node_element_start(self, name, qname, attrs):
- name, atts = self.convert(name, qname, attrs)
- current = self.current
- absolutize = self.absolutize
-
- # Cheap hack so 2to3 doesn't turn it into __next__
- next = getattr(self, 'next')
- next.start = self.property_element_start
- next.end = self.property_element_end
-
- if name in NODE_ELEMENT_EXCEPTIONS:
- self.error("Invalid node element URI: %s" % name)
-
- if RDF.ID in atts:
- if RDF.about in atts or RDF.nodeID in atts:
- self.error("Can have at most one of rdf:ID, rdf:about, and rdf:nodeID")
-
- id = atts[RDF.ID]
- if not is_ncname(id):
- self.error("rdf:ID value is not a valid NCName: %s" % id)
- subject = absolutize("#%s" % id)
- if subject in self.ids:
- self.error("two elements cannot use the same ID: '%s'" % subject)
- self.ids[subject] = 1 # IDs can only appear once within a document
- elif RDF.nodeID in atts:
- if RDF.ID in atts or RDF.about in atts:
- self.error("Can have at most one of rdf:ID, rdf:about, and rdf:nodeID")
- nodeID = atts[RDF.nodeID]
- if not is_ncname(nodeID):
- self.error("rdf:nodeID value is not a valid NCName: %s" % nodeID)
- if self.preserve_bnode_ids is False:
- if nodeID in self.bnode:
- subject = self.bnode[nodeID]
- else:
- subject = BNode()
- self.bnode[nodeID] = subject
- else:
- subject = BNode(nodeID)
- elif RDF.about in atts:
- if RDF.ID in atts or RDF.nodeID in atts:
- self.error("Can have at most one of rdf:ID, rdf:about, and rdf:nodeID")
- subject = absolutize(atts[RDF.about])
- else:
- subject = BNode()
-
- if name!=RDF.Description: # S1
- self.store.add((subject, RDF.type, absolutize(name)))
-
- language = current.language
- for att in atts:
- if not att.startswith(str(RDFNS)):
- predicate = absolutize(att)
- try:
- object = Literal(atts[att], language)
- except Error, e:
- self.error(e.msg)
- elif att==RDF.type: #S2
- predicate = RDF.type
- object = absolutize(atts[RDF.type])
- elif att in NODE_ELEMENT_ATTRIBUTES:
- continue
- elif att in PROPERTY_ATTRIBUTE_EXCEPTIONS: #S3
- self.error("Invalid property attribute URI: %s" % att)
- continue # for when error does not throw an exception
- else:
- predicate = absolutize(att)
- try:
- object = Literal(atts[att], language)
- except Error, e:
- self.error(e.msg)
- self.store.add((subject, predicate, object))
-
- current.subject = subject
-
-
- def node_element_end(self, name, qname):
- self.parent.object = self.current.subject
-
- def property_element_start(self, name, qname, attrs):
- name, atts = self.convert(name, qname, attrs)
- current = self.current
- absolutize = self.absolutize
-
- # Cheap hack so 2to3 doesn't turn it into __next__
- next = getattr(self, 'next')
- object = None
- current.data = None
- current.list = None
-
- if not name.startswith(str(RDFNS)):
- current.predicate = absolutize(name)
- elif name==RDF.li:
- current.predicate = current.next_li()
- elif name in PROPERTY_ELEMENT_EXCEPTIONS:
- self.error("Invalid property element URI: %s" % name)
- else:
- current.predicate = absolutize(name)
-
- id = atts.get(RDF.ID, None)
- if id is not None:
- if not is_ncname(id):
- self.error("rdf:ID value is not a value NCName: %s" % id)
- current.id = absolutize("#%s" % id)
- else:
- current.id = None
-
- resource = atts.get(RDF.resource, None)
- nodeID = atts.get(RDF.nodeID, None)
- parse_type = atts.get(RDF.parseType, None)
- if resource is not None and nodeID is not None:
- self.error("Property element cannot have both rdf:nodeID and rdf:resource")
- if resource is not None:
- object = absolutize(resource)
- next.start = self.node_element_start
- next.end = self.node_element_end
- elif nodeID is not None:
- if not is_ncname(nodeID):
- self.error("rdf:nodeID value is not a valid NCName: %s" % nodeID)
- if self.preserve_bnode_ids is False:
- if nodeID in self.bnode:
- object = self.bnode[nodeID]
- else:
- subject = BNode()
- self.bnode[nodeID] = subject
- object = subject
- else:
- object = subject = BNode(nodeID)
- next.start = self.node_element_start
- next.end = self.node_element_end
- else:
- if parse_type is not None:
- for att in atts:
- if att!=RDF.parseType and att!=RDF.ID:
- self.error("Property attr '%s' now allowed here" % att)
- if parse_type=="Resource":
- current.subject = object = BNode()
- current.char = self.property_element_char
- next.start = self.property_element_start
- next.end = self.property_element_end
- elif parse_type=="Collection":
- current.char = None
- object = current.list = RDF.nil #BNode()#self.parent.subject
- next.start = self.node_element_start
- next.end = self.list_node_element_end
- else: #if parse_type=="Literal":
- # All other values are treated as Literal
- # See: http://www.w3.org/TR/rdf-syntax-grammar/#parseTypeOtherPropertyElt
- object = Literal("", datatype=RDF.XMLLiteral)
- current.char = self.literal_element_char
- current.declared = {}
- next.start = self.literal_element_start
- next.char = self.literal_element_char
- next.end = self.literal_element_end
- current.object = object
- return
- else:
- object = None
- current.char = self.property_element_char
- next.start = self.node_element_start
- next.end = self.node_element_end
-
- datatype = current.datatype = atts.get(RDF.datatype, None)
- language = current.language
- if datatype is not None:
- # TODO: check that there are no atts other than datatype and id
- datatype = absolutize(datatype)
- else:
- for att in atts:
- if not att.startswith(str(RDFNS)):
- predicate = absolutize(att)
- elif att in PROPERTY_ELEMENT_ATTRIBUTES:
- continue
- elif att in PROPERTY_ATTRIBUTE_EXCEPTIONS:
- self.error("""Invalid property attribute URI: %s""" % att)
- else:
- predicate = absolutize(att)
-
- if att==RDF.type:
- o = URIRef(atts[att])
- else:
- if datatype is not None:
- language = None
- o = Literal(atts[att], language, datatype)
-
- if object is None:
- object = BNode()
- self.store.add((object, predicate, o))
- if object is None:
- current.data = ""
- current.object = None
- else:
- current.data = None
- current.object = object
-
- def property_element_char(self, data):
- current = self.current
- if current.data is not None:
- current.data += data
-
- def property_element_end(self, name, qname):
- current = self.current
- if current.data is not None and current.object is None:
- literalLang = current.language
- if current.datatype is not None:
- literalLang = None
- current.object = Literal(current.data, literalLang, current.datatype)
- current.data = None
- if self.next.end==self.list_node_element_end:
- if current.object!=RDF.nil:
- self.store.add((current.list, RDF.rest, RDF.nil))
- if current.object is not None:
- self.store.add((self.parent.subject, current.predicate, current.object))
- if current.id is not None:
- self.add_reified(current.id, (self.parent.subject,
- current.predicate, current.object))
- current.subject = None
-
- def list_node_element_end(self, name, qname):
- current = self.current
- if self.parent.list==RDF.nil:
- list = BNode()
- # Removed between 20030123 and 20030905
- #self.store.add((list, RDF.type, LIST))
- self.parent.list = list
- self.store.add((self.parent.list, RDF.first, current.subject))
- self.parent.object = list
- self.parent.char = None
- else:
- list = BNode()
- # Removed between 20030123 and 20030905
- #self.store.add((list, RDF.type, LIST))
- self.store.add((self.parent.list, RDF.rest, list))
- self.store.add((list, RDF.first, current.subject))
- self.parent.list = list
-
- def literal_element_start(self, name, qname, attrs):
- current = self.current
- self.next.start = self.literal_element_start
- self.next.char = self.literal_element_char
- self.next.end = self.literal_element_end
- current.declared = self.parent.declared.copy()
- if name[0]:
- prefix = self._current_context[name[0]]
- if prefix:
- current.object = "<%s:%s" % (prefix, name[1])
- else:
- current.object = "<%s" % name[1]
- if not name[0] in current.declared:
- current.declared[name[0]] = prefix
- if prefix:
- current.object += (' xmlns:%s="%s"' % (prefix, name[0]))
- else:
- current.object += (' xmlns="%s"' % name[0])
- else:
- current.object = "<%s" % name[1]
-
- for (name, value) in attrs.items():
- if name[0]:
- if not name[0] in current.declared:
- current.declared[name[0]] = self._current_context[name[0]]
- name = current.declared[name[0]] + ":" + name[1]
- else:
- name = name[1]
- current.object += (' %s=%s' % (name, quoteattr(value)))
- current.object += ">"
-
- def literal_element_char(self, data):
- self.current.object += escape(data)
-
- def literal_element_end(self, name, qname):
- if name[0]:
- prefix = self._current_context[name[0]]
- if prefix:
- end = u"</%s:%s>" % (prefix, name[1])
- else:
- end = u"</%s>" % name[1]
- else:
- end = u"</%s>" % name[1]
- self.parent.object += self.current.object + end
-
-
-def create_parser(target, store):
- parser = make_parser()
- try:
- # Workaround for bug in expatreader.py. Needed when
- # expatreader is trying to guess a prefix.
- parser.start_namespace_decl("xml", "http://www.w3.org/XML/1998/namespace")
- except AttributeError:
- pass # Not present in Jython (at least)
- parser.setFeature(handler.feature_namespaces, 1)
- rdfxml = RDFXMLHandler(store)
- rdfxml.setDocumentLocator(target)
- #rdfxml.setDocumentLocator(_Locator(self.url, self.parser))
- parser.setContentHandler(rdfxml)
- parser.setErrorHandler(ErrorHandler())
- return parser
-
-
-class RDFXMLParser(Parser):
-
- def __init__(self):
- pass
-
- def parse(self, source, sink, **args):
- self._parser = create_parser(source, sink)
- content_handler = self._parser.getContentHandler()
- preserve_bnode_ids = args.get("preserve_bnode_ids", None)
- if preserve_bnode_ids is not None:
- content_handler.preserve_bnode_ids = preserve_bnode_ids
- # We're only using it once now
- #content_handler.reset()
- #self._parser.reset()
- self._parser.parse(source)
-
-
-
diff --git a/creactistore/_templates/lib/rdflib/plugins/parsers/trix.py b/creactistore/_templates/lib/rdflib/plugins/parsers/trix.py
deleted file mode 100644
index 0c2e708..0000000
--- a/creactistore/_templates/lib/rdflib/plugins/parsers/trix.py
+++ /dev/null
@@ -1,286 +0,0 @@
-# Copyright (c) 2002, Daniel Krech, http://eikeon.com/
-# All rights reserved.
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions are
-# met:
-#
-# * Redistributions of source code must retain the above copyright
-# notice, this list of conditions and the following disclaimer.
-#
-# * Redistributions in binary form must reproduce the above
-# copyright notice, this list of conditions and the following
-# disclaimer in the documentation and/or other materials provided
-# with the distribution.
-#
-# * Neither the name of Daniel Krech nor the names of its
-# contributors may be used to endorse or promote products derived
-# from this software without specific prior written permission.
-#
-# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-"""
-"""
-from rdflib.namespace import Namespace
-from rdflib.term import URIRef
-from rdflib.term import BNode
-from rdflib.term import Literal
-from rdflib.graph import Graph, ConjunctiveGraph
-from rdflib.exceptions import ParserError
-from rdflib.parser import Parser
-
-from xml.sax.saxutils import handler
-from xml.sax import make_parser
-from xml.sax.handler import ErrorHandler
-
-__all__ = ['create_parser', 'TriXHandler', 'TriXParser']
-
-
-TRIXNS=Namespace("http://www.w3.org/2004/03/trix/trix-1/")
-XMLNS=Namespace("http://www.w3.org/XML/1998/namespace")
-
-class TriXHandler(handler.ContentHandler):
- """An Sax Handler for TriX. See http://sw.nokia.com/trix/"""
-
- def __init__(self, store):
- self.store = store
- self.preserve_bnode_ids = False
- self.reset()
-
- def reset(self):
- self.bnode = {}
- self.graph=self.store
- self.triple=None
- self.state=0
- self.lang=None
- self.datatype=None
-
- # ContentHandler methods
-
- def setDocumentLocator(self, locator):
- self.locator = locator
-
- def startDocument(self):
- pass
-
- def startPrefixMapping(self, prefix, namespace):
- pass
-
- def endPrefixMapping(self, prefix):
- pass
-
- def startElementNS(self, name, qname, attrs):
-
- if name[0]!=str(TRIXNS):
- self.error("Only elements in the TriX namespace are allowed. %s!=%s"%(name[0],TRIXNS))
-
- if name[1]=="TriX":
- if self.state==0:
- self.state=1
- else:
- self.error("Unexpected TriX element")
-
- elif name[1]=="graph":
- if self.state==1:
- self.state=2
- else:
- self.error("Unexpected graph element")
-
- elif name[1]=="uri":
- if self.state==2:
- # the context uri
- self.state=3
- elif self.state==4:
- # part of a triple
- pass
- else:
- self.error("Unexpected uri element")
-
- elif name[1]=="triple":
- if self.state==2:
- if self.graph==None:
- # anonymous graph, create one with random bnode id
- self.graph=Graph(store=self.store.store)
- # start of a triple
- self.triple=[]
- self.state=4
- else:
- self.error("Unexpected triple element")
-
- elif name[1]=="typedLiteral":
- if self.state==4:
- # part of triple
- self.lang=None
- self.datatype=None
-
- try:
- self.lang=attrs.getValue((unicode(XMLNS), u"lang"))
- except:
- # language not required - ignore
- pass
- try:
- self.datatype=attrs.getValueByQName(u"datatype")
- except KeyError:
- self.error("No required attribute 'datatype'")
- else:
- self.error("Unexpected typedLiteral element")
-
- elif name[1]=="plainLiteral":
- if self.state==4:
- # part of triple
- self.lang=None
- self.datatype=None
- try:
- self.lang=attrs.getValue((unicode(XMLNS), u"lang"))
- except:
- # language not required - ignore
- pass
-
- else:
- self.error("Unexpected plainLiteral element")
-
- elif name[1]=="id":
- if self.state==2:
- # the context uri
- self.state=3
-
- elif self.state==4:
- # part of triple
- pass
- else:
- self.error("Unexpected id element")
-
- else:
- self.error("Unknown element %s in TriX namespace"%name[1])
-
- self.chars=""
-
-
- def endElementNS(self, name, qname):
- if name[0]!=str(TRIXNS):
- self.error("Only elements in the TriX namespace are allowed. %s!=%s"%(name[0], TRIXNS))
-
- if name[1]=="uri":
- if self.state==3:
- self.graph=Graph(store=self.store.store, identifier=URIRef(self.chars.strip()))
- self.state=2
- elif self.state==4:
- self.triple+=[URIRef(self.chars.strip())]
- else:
- self.error("Illegal internal self.state - This should never happen if the SAX parser ensures XML syntax correctness")
-
- elif name[1]=="id":
- if self.state==3:
- self.graph=Graph(self.store.store,identifier=self.get_bnode(self.chars.strip()))
- self.state=2
- elif self.state==4:
- self.triple+=[self.get_bnode(self.chars.strip())]
- else:
- self.error("Illegal internal self.state - This should never happen if the SAX parser ensures XML syntax correctness")
-
- elif name[1]=="plainLiteral" or name[1]=="typedLiteral":
- if self.state==4:
- self.triple+=[Literal(self.chars, lang=self.lang, datatype=self.datatype)]
- else:
- self.error("This should never happen if the SAX parser ensures XML syntax correctness")
-
- elif name[1]=="triple":
- if self.state==4:
- if len(self.triple)!=3:
- self.error("Triple has wrong length, got %d elements: %s"%(len(self.triple),self.triple))
-
- self.graph.add(self.triple)
- #self.store.store.add(self.triple,context=self.graph)
- #self.store.addN([self.triple+[self.graph]])
- self.state=2
- else:
- self.error("This should never happen if the SAX parser ensures XML syntax correctness")
-
- elif name[1]=="graph":
- self.graph=None
- self.state=1
-
- elif name[1]=="TriX":
- self.state=0
-
- else:
- self.error("Unexpected close element")
-
-
- def get_bnode(self,label):
- if self.preserve_bnode_ids:
- bn=BNode(label)
- else:
- if label in self.bnode:
- bn=self.bnode[label]
- else:
- bn=BNode(label)
- self.bnode[label]=bn
- return bn
-
-
- def characters(self, content):
- self.chars+=content
-
-
- def ignorableWhitespace(self, content):
- pass
-
- def processingInstruction(self, target, data):
- pass
-
-
- def error(self, message):
- locator = self.locator
- info = "%s:%s:%s: " % (locator.getSystemId(),
- locator.getLineNumber(), locator.getColumnNumber())
- raise ParserError(info + message)
-
-
-def create_parser(store):
- parser = make_parser()
- try:
- # Workaround for bug in expatreader.py. Needed when
- # expatreader is trying to guess a prefix.
- parser.start_namespace_decl("xml", "http://www.w3.org/XML/1998/namespace")
- except AttributeError:
- pass # Not present in Jython (at least)
- parser.setFeature(handler.feature_namespaces, 1)
- trix = TriXHandler(store)
- parser.setContentHandler(trix)
- parser.setErrorHandler(ErrorHandler())
- return parser
-
-
-class TriXParser(Parser):
- """A parser for TriX. See http://sw.nokia.com/trix/"""
-
- def __init__(self):
- pass
-
- def parse(self, source, sink, **args):
- assert sink.store.context_aware
- g=ConjunctiveGraph(store=sink.store)
-
- self._parser = create_parser(g)
- content_handler = self._parser.getContentHandler()
- preserve_bnode_ids = args.get("preserve_bnode_ids", None)
- if preserve_bnode_ids is not None:
- content_handler.preserve_bnode_ids = preserve_bnode_ids
- # We're only using it once now
- #content_handler.reset()
- #self._parser.reset()
- self._parser.parse(source)
-
-
-
diff --git a/creactistore/_templates/lib/rdflib/plugins/serializers/__init__.py b/creactistore/_templates/lib/rdflib/plugins/serializers/__init__.py
deleted file mode 100644
index e69de29..0000000
--- a/creactistore/_templates/lib/rdflib/plugins/serializers/__init__.py
+++ /dev/null
diff --git a/creactistore/_templates/lib/rdflib/plugins/serializers/n3.py b/creactistore/_templates/lib/rdflib/plugins/serializers/n3.py
deleted file mode 100644
index 63faf9d..0000000
--- a/creactistore/_templates/lib/rdflib/plugins/serializers/n3.py
+++ /dev/null
@@ -1,123 +0,0 @@
-"""
-Notation 3 (N3) RDF graph serializer for RDFLib.
-"""
-from rdflib.graph import Graph
-from rdflib.namespace import Namespace, OWL
-from rdflib.plugins.serializers.turtle import (TurtleSerializer,
- SUBJECT, VERB, OBJECT)
-
-__all__ = ['N3Serializer']
-
-SWAP_LOG = Namespace("http://www.w3.org/2000/10/swap/log#")
-
-
-class N3Serializer(TurtleSerializer):
-
- short_name = "n3"
-
- def __init__(self, store, parent=None):
- super(N3Serializer, self).__init__(store)
- self.keywords.update({
- OWL.sameAs: '=',
- SWAP_LOG.implies: '=>'
- })
- self.parent = parent
-
- def reset(self):
- super(N3Serializer, self).reset()
- self._stores = {}
-
- def subjectDone(self, subject):
- super(N3Serializer, self).subjectDone(subject)
- if self.parent:
- self.parent.subjectDone(subject)
-
- def isDone(self, subject):
- return (super(N3Serializer, self).isDone(subject)
- and (not self.parent or self.parent.isDone(subject)))
-
- def startDocument(self):
- super(N3Serializer, self).startDocument()
- #if not isinstance(self.store, N3Store):
- # return
- #
- #all_list = [self.label(var) for var in
- # self.store.get_universals(recurse=False)]
- #all_list.sort()
- #some_list = [self.label(var) for var in
- # self.store.get_existentials(recurse=False)]
- #some_list.sort()
- #
- #for var in all_list:
- # self.write('\n'+self.indent()+'@forAll %s. '%var)
- #for var in some_list:
- # self.write('\n'+self.indent()+'@forSome %s. '%var)
- #
- #if (len(all_list) + len(some_list)) > 0:
- # self.write('\n')
-
- def endDocument(self):
- if not self.parent:
- super(N3Serializer, self).endDocument()
-
- def indent(self, modifier=0):
- indent = super(N3Serializer, self).indent(modifier)
- if self.parent is not None:
- indent += self.parent.indent()#modifier)
- return indent
-
- def preprocessTriple(self, triple):
- super(N3Serializer, self).preprocessTriple(triple)
- if isinstance(triple[0], Graph):
- for t in triple[0]:
- self.preprocessTriple(t)
- if isinstance(triple[2], Graph):
- for t in triple[2]:
- self.preprocessTriple(t)
-
- def getQName(self, uri, gen_prefix=True):
- qname = None
- if self.parent is not None:
- qname = self.parent.getQName(uri, gen_prefix)
- if qname is None:
- qname = super(N3Serializer, self).getQName(uri, gen_prefix)
- return qname
-
- def statement(self, subject):
- self.subjectDone(subject)
- properties = self.buildPredicateHash(subject)
- if len(properties) == 0:
- return False
- return (self.s_clause(subject)
- or super(N3Serializer, self).statement(subject))
-
- def path(self, node, position, newline=False):
- if not self.p_clause(node, position):
- super(N3Serializer, self).path(node, position, newline)
-
- def s_clause(self, subject):
- if isinstance(subject, Graph):
- self.write('\n'+self.indent())
- self.p_clause(subject, SUBJECT)
- self.predicateList(subject)
- self.write(' .')
- return True
- else:
- return False
-
- def p_clause(self, node, position):
- if isinstance(node, Graph):
- self.subjectDone(node)
- if position is OBJECT:
- self.write(' ')
- self.write('{')
- self.depth += 1
- serializer = N3Serializer(node, parent=self)
- serializer.serialize(self.stream)
- self.depth -= 1
- self.write(self.indent()+'}')
- return True
- else:
- return False
-
-
diff --git a/creactistore/_templates/lib/rdflib/plugins/serializers/nquads.py b/creactistore/_templates/lib/rdflib/plugins/serializers/nquads.py
deleted file mode 100644
index 29e0dff..0000000
--- a/creactistore/_templates/lib/rdflib/plugins/serializers/nquads.py
+++ /dev/null
@@ -1,34 +0,0 @@
-import warnings
-
-from rdflib.serializer import Serializer
-from rdflib.py3compat import b
-
-from rdflib.plugins.serializers.nt import _xmlcharref_encode
-
-__all__ = ['NQuadsSerializer']
-
-class NQuadsSerializer(Serializer):
-
- def __init__(self, store):
- if not store.context_aware:
- raise Exception("NQuads serialization only makes sense for context-aware stores!")
-
- super(NQuadsSerializer, self).__init__(store)
-
- def serialize(self, stream, base=None, encoding=None, **args):
- if base is not None:
- warnings.warn("NQuadsSerializer does not support base.")
- if encoding is not None:
- warnings.warn("NQuadsSerializer does not use custom encoding.")
- encoding = self.encoding
- for context in self.store.contexts():
- for triple in context:
- stream.write(_nq_row(triple, context.identifier).encode(encoding, "replace"))
- stream.write(b("\n"))
-
-def _nq_row(triple,context):
- return u"%s %s %s %s .\n" % (triple[0].n3(),
- triple[1].n3(),
- _xmlcharref_encode(triple[2].n3()),
- context.n3())
-
diff --git a/creactistore/_templates/lib/rdflib/plugins/serializers/nt.py b/creactistore/_templates/lib/rdflib/plugins/serializers/nt.py
deleted file mode 100644
index bbbe720..0000000
--- a/creactistore/_templates/lib/rdflib/plugins/serializers/nt.py
+++ /dev/null
@@ -1,76 +0,0 @@
-"""
-N-Triples RDF graph serializer for RDFLib.
-See <http://www.w3.org/TR/rdf-testcases/#ntriples> for details about the
-format.
-"""
-from rdflib.serializer import Serializer
-from rdflib.py3compat import b
-import warnings
-
-__all__ = ['NTSerializer']
-
-class NTSerializer(Serializer):
- """
- Serializes RDF graphs to NTriples format.
- """
-
- def serialize(self, stream, base=None, encoding=None, **args):
- if base is not None:
- warnings.warn("NTSerializer does not support base.")
- if encoding is not None:
- warnings.warn("NTSerializer does not use custom encoding.")
- encoding = self.encoding
- for triple in self.store:
- stream.write(_nt_row(triple).encode(encoding, "replace"))
- stream.write(b("\n"))
-
-
-def _nt_row(triple):
- return u"%s %s %s .\n" % (triple[0].n3(),
- triple[1].n3(),
- _xmlcharref_encode(triple[2].n3()))
-
-# from <http://code.activestate.com/recipes/303668/>
-def _xmlcharref_encode(unicode_data, encoding="ascii"):
- """Emulate Python 2.3's 'xmlcharrefreplace' encoding error handler."""
- chars = []
-
- # nothing to do about xmlchars, but replace newlines with escapes:
- unicode_data=unicode_data.replace("\n","\\n")
- if unicode_data.startswith('"""'):
- # Updated with Bernhard Schandl's patch...
- # unicode_data = unicode_data.replace('"""', '"') # original
-
- last_triplequote_pos = unicode_data.rfind('"""')
- payload = unicode_data[3:last_triplequote_pos]
- trail = unicode_data[last_triplequote_pos+3:]
-
- # fix three-quotes encoding
- payload = payload.replace('\\"""', '"""')
-
- # corner case: if string ends with " it is already encoded.
- # so we need to de-escape it before it will be re-escaped in the next step.
- if payload.endswith('\\"'):
- payload = payload.replace('\\"', '"')
-
- # escape quotes in payload
- payload = payload.replace('"', '\\"')
-
- # reconstruct result using single quotes
- unicode_data = '"%s"%s' % (payload, trail)
-
- # Step through the unicode_data string one character at a time in
- # order to catch unencodable characters:
- for char in unicode_data:
- try:
- char.encode(encoding, 'strict')
- except UnicodeError:
- if ord(char) <= 0xFFFF:
- chars.append('\\u%04X' % ord(char))
- else:
- chars.append('\\U%08X' % ord(char))
- else:
- chars.append(char)
-
- return ''.join(chars)
-
diff --git a/creactistore/_templates/lib/rdflib/plugins/serializers/rdfxml.py b/creactistore/_templates/lib/rdflib/plugins/serializers/rdfxml.py
deleted file mode 100644
index d72c27e..0000000
--- a/creactistore/_templates/lib/rdflib/plugins/serializers/rdfxml.py
+++ /dev/null
@@ -1,282 +0,0 @@
-from __future__ import generators
-
-from rdflib.plugins.serializers.xmlwriter import XMLWriter
-
-from rdflib.namespace import Namespace, RDF, RDFS, split_uri
-
-from rdflib.term import URIRef, Literal, BNode
-from rdflib.util import first, more_than
-from rdflib.collection import Collection
-from rdflib.serializer import Serializer
-
-from rdflib.exceptions import Error
-
-from rdflib.py3compat import b
-
-from xml.sax.saxutils import quoteattr, escape
-
-__all__ = ['fix', 'XMLSerializer', 'PrettyXMLSerializer']
-
-class XMLSerializer(Serializer):
-
- def __init__(self, store):
- super(XMLSerializer, self).__init__(store)
-
- def __bindings(self):
- store = self.store
- nm = store.namespace_manager
- bindings = {}
- for predicate in set(store.predicates()):
- prefix, namespace, name = nm.compute_qname(predicate)
- bindings[prefix] = URIRef(namespace)
- RDFNS = URIRef("http://www.w3.org/1999/02/22-rdf-syntax-ns#")
- if "rdf" in bindings:
- assert bindings["rdf"]==RDFNS
- else:
- bindings["rdf"] = RDFNS
- for prefix, namespace in bindings.iteritems():
- yield prefix, namespace
-
-
- def serialize(self, stream, base=None, encoding=None, **args):
- self.base = base
- self.__stream = stream
- self.__serialized = {}
- encoding = self.encoding
- self.write = write = lambda uni: stream.write(uni.encode(encoding, 'replace'))
-
- # startDocument
- write('<?xml version="1.0" encoding="%s"?>\n' % self.encoding)
-
- # startRDF
- write('<rdf:RDF\n')
- # If provided, write xml:base attribute for the RDF
- if "xml_base" in args:
- write(' xml:base="%s"\n' % args['xml_base'])
- # TODO: assert(namespaces["http://www.w3.org/1999/02/22-rdf-syntax-ns#"]=='rdf')
- bindings = list(self.__bindings())
- bindings.sort()
- for prefix, namespace in bindings:
- if prefix:
- write(' xmlns:%s="%s"\n' % (prefix, namespace))
- else:
- write(' xmlns="%s"\n' % namespace)
- write('>\n')
-
- # write out triples by subject
- for subject in self.store.subjects():
- self.subject(subject, 1)
-
- # endRDF
- write( "</rdf:RDF>\n" )
-
- # Set to None so that the memory can get garbage collected.
- #self.__serialized = None
- del self.__serialized
-
-
- def subject(self, subject, depth=1):
- if not subject in self.__serialized:
- self.__serialized[subject] = 1
- if isinstance(subject, (BNode,URIRef)):
- write = self.write
- indent = " " * depth
- element_name = "rdf:Description"
- if isinstance(subject, BNode):
- write( '%s<%s rdf:nodeID="%s"' %
- (indent, element_name, subject))
- else:
- uri = quoteattr(self.relativize(subject))
- write( "%s<%s rdf:about=%s" % (indent, element_name, uri))
- if (subject, None, None) in self.store:
- write( ">\n" )
- for predicate, object in self.store.predicate_objects(subject):
- self.predicate(predicate, object, depth+1)
- write( "%s</%s>\n" % (indent, element_name))
- else:
- write( "/>\n" )
-
- def predicate(self, predicate, object, depth=1):
- write = self.write
- indent = " " * depth
- qname = self.store.namespace_manager.qname(predicate)
- if isinstance(object, Literal):
- attributes = ""
- if object.language:
- attributes += ' xml:lang="%s"'%object.language
-
- if object.datatype:
- attributes += ' rdf:datatype="%s"'%object.datatype
-
- write("%s<%s%s>%s</%s>\n" %
- (indent, qname, attributes,
- escape(object), qname) )
- else:
- if isinstance(object, BNode):
- write('%s<%s rdf:nodeID="%s"/>\n' %
- (indent, qname, object))
- else:
- write("%s<%s rdf:resource=%s/>\n" %
- (indent, qname, quoteattr(self.relativize(object))))
-
-
-
-XMLLANG = "http://www.w3.org/XML/1998/namespacelang"
-XMLBASE = "http://www.w3.org/XML/1998/namespacebase"
-OWL_NS = Namespace('http://www.w3.org/2002/07/owl#')
-
-# TODO:
-def fix(val):
- "strip off _: from nodeIDs... as they are not valid NCNames"
- if val.startswith("_:"):
- return val[2:]
- else:
- return val
-
-
-class PrettyXMLSerializer(Serializer):
-
- def __init__(self, store, max_depth=3):
- super(PrettyXMLSerializer, self).__init__(store)
- self.forceRDFAbout=set()
-
- def serialize(self, stream, base=None, encoding=None, **args):
- self.__serialized = {}
- store = self.store
- self.base = base
- self.max_depth = args.get("max_depth", 3)
- assert self.max_depth>0, "max_depth must be greater than 0"
-
- self.nm = nm = store.namespace_manager
- self.writer = writer = XMLWriter(stream, nm, encoding)
-
- namespaces = {}
- possible = set(store.predicates()).union(store.objects(None, RDF.type))
- for predicate in possible:
- prefix, namespace, local = nm.compute_qname(predicate)
- namespaces[prefix] = namespace
- namespaces["rdf"] = "http://www.w3.org/1999/02/22-rdf-syntax-ns#"
- writer.push(RDF.RDF)
- if "xml_base" in args:
- writer.attribute(XMLBASE, args["xml_base"])
- writer.namespaces(namespaces.iteritems())
-
- # Write out subjects that can not be inline
- for subject in store.subjects():
- if (None, None, subject) in store:
- if (subject, None, subject) in store:
- self.subject(subject, 1)
- else:
- self.subject(subject, 1)
-
- # write out anything that has not yet been reached
- # write out BNodes last (to ensure they can be inlined where possible)
- bnodes=set()
- for subject in store.subjects():
- if isinstance(subject,BNode):
- bnodes.add(subject)
- continue
- self.subject(subject, 1)
- #now serialize only those BNodes that have not been serialized yet
- for bnode in bnodes:
- if bnode not in self.__serialized:
- self.subject(subject, 1)
- writer.pop(RDF.RDF)
- stream.write(b("\n"))
-
- # Set to None so that the memory can get garbage collected.
- self.__serialized = None
-
-
-
- def subject(self, subject, depth=1):
- store = self.store
- writer = self.writer
- if subject in self.forceRDFAbout:
- writer.push(RDF.Description)
- writer.attribute(RDF.about, self.relativize(subject))
- writer.pop(RDF.Description)
- self.forceRDFAbout.remove(subject)
- elif not subject in self.__serialized:
- self.__serialized[subject] = 1
- type = first(store.objects(subject, RDF.type))
- try:
- self.nm.qname(type)
- except:
- type = None
- element = type or RDF.Description
- writer.push(element)
- if isinstance(subject, BNode):
- def subj_as_obj_more_than(ceil):
- return True
- # more_than(store.triples((None, None, subject)), ceil)
-
- #here we only include BNode labels if they are referenced
- #more than once (this reduces the use of redundant BNode identifiers)
- if subj_as_obj_more_than(1):
- writer.attribute(RDF.nodeID, fix(subject))
- else:
- writer.attribute(RDF.about, self.relativize(subject))
- if (subject, None, None) in store:
- for predicate, object in store.predicate_objects(subject):
- if not (predicate==RDF.type and object==type):
- self.predicate(predicate, object, depth+1)
- writer.pop(element)
- elif subject in self.forceRDFAbout:
- writer.push(RDF.Description)
- writer.attribute(RDF.about, self.relativize(subject))
- writer.pop(RDF.Description)
- self.forceRDFAbout.remove(subject)
-
- def predicate(self, predicate, object, depth=1):
- writer = self.writer
- store = self.store
- writer.push(predicate)
- if isinstance(object, Literal):
- attributes = ""
- if object.language:
- writer.attribute(XMLLANG, object.language)
- if object.datatype:
- writer.attribute(RDF.datatype, object.datatype)
- writer.text(object)
- elif object in self.__serialized or not (object, None, None) in store:
- if isinstance(object, BNode):
- if more_than(store.triples((None, None, object)), 0):
- writer.attribute(RDF.nodeID, fix(object))
- else:
- writer.attribute(RDF.resource, self.relativize(object))
- else:
- if first(store.objects(object, RDF.first)): # may not have type RDF.List
- collection = object
- self.__serialized[object] = 1
- # TODO: warn that any assertions on object other than
- # RDF.first and RDF.rest are ignored... including RDF.List
- writer.attribute(RDF.parseType, "Collection")
- col=Collection(store,object)
- for item in col:
- if isinstance(item,URIRef):
- self.forceRDFAbout.add(item)
- self.subject(item)
- if not isinstance(item,URIRef):
- self.__serialized[item] = 1
- else:
- if first(store.triples_choices((object,
- RDF.type,
- [OWL_NS.Class,RDFS.Class]))) and\
- isinstance(object, URIRef):
- writer.attribute(RDF.resource, self.relativize(object))
- elif depth<=self.max_depth:
- self.subject(object, depth+1)
- elif isinstance(object, BNode):
- if not object in self.__serialized and \
- (object, None, None) in store and \
- len(list(store.subjects(object=object)))==1:
- #inline blank nodes if they haven't been serialized yet and are
- #only referenced once (regardless of depth)
- self.subject(object, depth+1)
- else:
- writer.attribute(RDF.nodeID, fix(object))
- else:
- writer.attribute(RDF.resource, self.relativize(object))
- writer.pop(predicate)
-
diff --git a/creactistore/_templates/lib/rdflib/plugins/serializers/trix.py b/creactistore/_templates/lib/rdflib/plugins/serializers/trix.py
deleted file mode 100644
index c7115c0..0000000
--- a/creactistore/_templates/lib/rdflib/plugins/serializers/trix.py
+++ /dev/null
@@ -1,72 +0,0 @@
-from rdflib.serializer import Serializer
-from rdflib.plugins.serializers.xmlwriter import XMLWriter
-
-from rdflib.term import URIRef, Literal, BNode
-from rdflib.namespace import Namespace
-
-from rdflib.graph import Graph, ConjunctiveGraph
-
-from rdflib.py3compat import b
-
-__all__ = ['TriXSerializer']
-
-## TODO: MOve this somewhere central
-TRIXNS=Namespace("http://www.w3.org/2004/03/trix/trix-1/")
-XMLNS=Namespace("http://www.w3.org/XML/1998/namespace")
-
-class TriXSerializer(Serializer):
- def __init__(self, store):
- super(TriXSerializer, self).__init__(store)
-
- def serialize(self, stream, base=None, encoding=None, **args):
-
- nm=self.store.namespace_manager
-
- self.writer=XMLWriter(stream, nm, encoding, extra_ns={"": TRIXNS})
-
- self.writer.push(TRIXNS[u"TriX"])
- self.writer.namespaces()
-
- if isinstance(self.store, ConjunctiveGraph):
- for subgraph in self.store.contexts():
- self._writeGraph(subgraph)
- elif isinstance(self.store, Graph):
- self._writeGraph(self.store)
- else:
- raise Exception("Unknown graph type: "+type(self.store))
-
- self.writer.pop()
- stream.write(b("\n"))
-
-
- def _writeGraph(self, graph):
- self.writer.push(TRIXNS[u"graph"])
- if isinstance(graph.identifier, URIRef):
- self.writer.element(TRIXNS[u"uri"], content=unicode(graph.identifier))
-
- for triple in graph.triples((None,None,None)):
- self._writeTriple(triple)
- self.writer.pop()
-
- def _writeTriple(self, triple):
- self.writer.push(TRIXNS[u"triple"])
- for component in triple:
- if isinstance(component, URIRef):
- self.writer.element(TRIXNS[u"uri"],
- content=unicode(component))
- elif isinstance(component, BNode):
- self.writer.element(TRIXNS[u"id"],
- content=unicode(component))
- elif isinstance(component, Literal):
- if component.datatype:
- self.writer.element(TRIXNS[u"typedLiteral"],
- content=unicode(component),
- attributes={ TRIXNS[u"datatype"]: unicode(component.datatype) })
- elif component.language:
- self.writer.element(TRIXNS[u"plainLiteral"],
- content=unicode(component),
- attributes={ XMLNS[u"lang"]: unicode(component.language) })
- else:
- self.writer.element(TRIXNS[u"plainLiteral"],
- content=unicode(component))
- self.writer.pop()
diff --git a/creactistore/_templates/lib/rdflib/plugins/serializers/turtle.py b/creactistore/_templates/lib/rdflib/plugins/serializers/turtle.py
deleted file mode 100644
index 6878013..0000000
--- a/creactistore/_templates/lib/rdflib/plugins/serializers/turtle.py
+++ /dev/null
@@ -1,364 +0,0 @@
-"""
-Turtle RDF graph serializer for RDFLib.
-See <http://www.w3.org/TeamSubmission/turtle/> for syntax specification.
-"""
-from rdflib.term import BNode, Literal, URIRef
-
-from rdflib.exceptions import Error
-
-from rdflib.serializer import Serializer
-
-from rdflib.namespace import RDF, RDFS
-
-__all__ = ['RecursiveSerializer', 'TurtleSerializer']
-
-class RecursiveSerializer(Serializer):
-
- topClasses = [RDFS.Class]
- predicateOrder = [RDF.type, RDFS.label]
- maxDepth = 10
- indentString = u" "
-
- def __init__(self, store):
-
- super(RecursiveSerializer, self).__init__(store)
- self.stream = None
- self.reset()
-
- def addNamespace(self, prefix, uri):
- self.namespaces[prefix] = uri
-
- def checkSubject(self, subject):
- """Check to see if the subject should be serialized yet"""
- if ((self.isDone(subject))
- or (subject not in self._subjects)
- or ((subject in self._topLevels) and (self.depth > 1))
- or (isinstance(subject, URIRef) and (self.depth >= self.maxDepth))
- ):
- return False
- return True
-
- def isDone(self, subject):
- """Return true if subject is serialized"""
- return subject in self._serialized
-
- def orderSubjects(self):
- seen = {}
- subjects = []
-
- for classURI in self.topClasses:
- members = list(self.store.subjects(RDF.type, classURI))
- members.sort()
-
- for member in members:
- subjects.append(member)
- self._topLevels[member] = True
- seen[member] = True
-
- recursable = [(isinstance(subject,BNode), self.refCount(subject), subject) for subject in self._subjects
- if subject not in seen]
-
- recursable.sort()
- subjects.extend([subject for (isbnode, refs, subject) in recursable])
-
- return subjects
-
- def preprocess(self):
- for triple in self.store.triples((None,None,None)):
- self.preprocessTriple(triple)
-
- def preprocessTriple(self, (s,p,o)):
- references = self.refCount(o) + 1
- self._references[o] = references
- self._subjects[s] = True
-
- def refCount(self, node):
- """Return the number of times this node has been referenced in the object position"""
- return self._references.get(node, 0)
-
- def reset(self):
- self.depth = 0
- self.lists = {}
- self.namespaces = {}
- self._references = {}
- self._serialized = {}
- self._subjects = {}
- self._topLevels = {}
-
- def buildPredicateHash(self, subject):
- """Build a hash key by predicate to a list of objects for the given subject"""
- properties = {}
- for s,p,o in self.store.triples((subject, None, None)):
- oList = properties.get(p, [])
- oList.append(o)
- properties[p] = oList
- return properties
-
- def sortProperties(self, properties):
- """Take a hash from predicate uris to lists of values.
- Sort the lists of values. Return a sorted list of properties."""
- # Sort object lists
- for prop, objects in properties.items():
- objects.sort()
-
- # Make sorted list of properties
- propList = []
- seen = {}
- for prop in self.predicateOrder:
- if (prop in properties) and (prop not in seen):
- propList.append(prop)
- seen[prop] = True
- props = properties.keys()
- props.sort()
- for prop in props:
- if prop not in seen:
- propList.append(prop)
- seen[prop] = True
- return propList
-
- def subjectDone(self, subject):
- """Mark a subject as done."""
- self._serialized[subject] = True
-
- def indent(self, modifier=0):
- """Returns indent string multiplied by the depth"""
- return (self.depth+modifier)*self.indentString
-
- def write(self, text):
- """Write text in given encoding."""
- self.stream.write(text.encode(self.encoding, 'replace'))
-
-
-SUBJECT = 0
-VERB = 1
-OBJECT = 2
-
-_GEN_QNAME_FOR_DT = False
-_SPACIOUS_OUTPUT = False
-
-
-class TurtleSerializer(RecursiveSerializer):
-
- short_name = "turtle"
- indentString = ' '
-
- def __init__(self, store):
- super(TurtleSerializer, self).__init__(store)
- self.keywords = {
- RDF.type: 'a'
- }
- self.reset()
- self.stream = None
- self._spacious = _SPACIOUS_OUTPUT
-
- def reset(self):
- super(TurtleSerializer, self).reset()
- self._shortNames = {}
- self._started = False
-
- def serialize(self, stream, base=None, encoding=None, spacious=None, **args):
- self.reset()
- self.stream = stream
- self.base = base
-
- if spacious is not None:
- self._spacious = spacious
- # In newer rdflibs these are always in the namespace manager
- #self.store.prefix_mapping('rdf', RDFNS)
- #self.store.prefix_mapping('rdfs', RDFSNS)
-
- self.preprocess()
- subjects_list = self.orderSubjects()
-
- self.startDocument()
-
- firstTime = True
- for subject in subjects_list:
- if self.isDone(subject):
- continue
- if firstTime:
- firstTime = False
- if self.statement(subject) and not firstTime:
- self.write('\n')
-
- self.endDocument()
- stream.write(u"\n".encode('ascii'))
-
- def preprocessTriple(self, triple):
- super(TurtleSerializer, self).preprocessTriple(triple)
- for i, node in enumerate(triple):
- if node in self.keywords:
- continue
- # Don't use generated prefixes for subjects and objects
- self.getQName(node, gen_prefix=(i==VERB))
- if isinstance(node, Literal) and node.datatype:
- self.getQName(node.datatype, gen_prefix=_GEN_QNAME_FOR_DT)
- p = triple[1]
- if isinstance(p, BNode):
- self._references[p] = self.refCount(p) + 1
-
- def getQName(self, uri, gen_prefix=True):
- if not isinstance(uri, URIRef):
- return None
-
- parts=None
-
- try:
- parts = self.store.compute_qname(uri, generate=gen_prefix)
- except:
-
- # is the uri a namespace in itself?
- pfx = self.store.store.prefix(uri)
-
- if pfx is not None:
- parts = (pfx, uri, '')
- else:
- # nothing worked
- return None
-
- prefix, namespace, local = parts
- # Local parts with '.' will mess up serialization
- if '.' in local:
- return None
- self.addNamespace(prefix, namespace)
- return u'%s:%s' % (prefix, local)
-
- def startDocument(self):
- self._started = True
- ns_list = sorted(self.namespaces.items())
- for prefix, uri in ns_list:
- self.write(self.indent()+'@prefix %s: <%s> .\n' % (prefix, uri))
- if ns_list and self._spacious:
- self.write('\n')
-
- def endDocument(self):
- if self._spacious:
- self.write('\n')
-
- def statement(self, subject):
- self.subjectDone(subject)
- return self.s_squared(subject) or self.s_default(subject)
-
- def s_default(self, subject):
- self.write('\n'+self.indent())
- self.path(subject, SUBJECT)
- self.predicateList(subject)
- self.write(' .')
- return True
-
- def s_squared(self, subject):
- if (self.refCount(subject) > 0) or not isinstance(subject, BNode):
- return False
- self.write('\n'+self.indent()+'[]')
- #self.depth+=1
- self.predicateList(subject)
- #self.depth-=1
- self.write(' .')
- return True
-
- def path(self, node, position, newline=False):
- if not (self.p_squared(node, position, newline)
- or self.p_default(node, position, newline)):
- raise Error("Cannot serialize node '%s'"%(node, ))
-
- def p_default(self, node, position, newline=False):
- if position != SUBJECT and not newline:
- self.write(' ')
- self.write(self.label(node, position))
- return True
-
- def label(self, node, position):
- if node == RDF.nil:
- return '()'
- if position is VERB and node in self.keywords:
- return self.keywords[node]
- if isinstance(node, Literal):
- return node._literal_n3(use_plain=True,
- qname_callback=lambda dt:
- self.getQName(dt, _GEN_QNAME_FOR_DT))
- else:
- return self.getQName(node, position==VERB) or node.n3()
-
- def p_squared(self, node, position, newline=False):
- if (not isinstance(node, BNode)
- or node in self._serialized
- or self.refCount(node) > 1
- or position == SUBJECT):
- return False
-
- if not newline:
- self.write(' ')
-
- if self.isValidList(node):
- # this is a list
- self.write('(')
- self.depth += 1#2
- self.doList(node)
- self.depth -= 1#2
- self.write(' )')
- else:
- self.subjectDone(node)
- self.depth += 2
- #self.write('[\n' + self.indent())
- self.write('[')
- self.depth -= 1
- #self.predicateList(node, newline=True)
- self.predicateList(node, newline=False)
- #self.write('\n' + self.indent() + ']')
- self.write(' ]')
- self.depth -= 1
-
- return True
-
- def isValidList(self, l):
- """
- Checks if l is a valid RDF list, i.e. no nodes have other properties.
- """
- try:
- if not self.store.value(l, RDF.first):
- return False
- except:
- return False
- while l:
- if l != RDF.nil and len(
- list(self.store.predicate_objects(l))) != 2:
- return False
- l = self.store.value(l, RDF.rest)
- return True
-
- def doList(self,l):
- while l:
- item = self.store.value(l, RDF.first)
- if item:
- self.path(item, OBJECT)
- self.subjectDone(l)
- l = self.store.value(l, RDF.rest)
-
- def predicateList(self, subject, newline=False):
- properties = self.buildPredicateHash(subject)
- propList = self.sortProperties(properties)
- if len(propList) == 0:
- return
- self.verb(propList[0], newline=newline)
- self.objectList(properties[propList[0]])
- for predicate in propList[1:]:
- self.write(';\n' + self.indent(1))
- self.verb(predicate, newline=True)
- self.objectList(properties[predicate])
-
- def verb(self, node, newline=False):
- self.path(node, VERB, newline)
-
- def objectList(self, objects):
- count = len(objects)
- if count == 0:
- return
- depthmod = (count == 1) and 0 or 1
- self.depth += depthmod
- self.path(objects[0], OBJECT)
- for obj in objects[1:]:
- self.write(',\n' + self.indent(1))
- self.path(obj, OBJECT, newline=True)
- self.depth -= depthmod
-
-
diff --git a/creactistore/_templates/lib/rdflib/plugins/serializers/xmlwriter.py b/creactistore/_templates/lib/rdflib/plugins/serializers/xmlwriter.py
deleted file mode 100644
index d36af4b..0000000
--- a/creactistore/_templates/lib/rdflib/plugins/serializers/xmlwriter.py
+++ /dev/null
@@ -1,103 +0,0 @@
-import codecs
-from xml.sax.saxutils import quoteattr, escape
-
-__all__ = ['XMLWriter']
-
-class XMLWriter(object):
- def __init__(self, stream, namespace_manager, encoding=None, decl=1, extra_ns={}):
- encoding = encoding or 'utf-8'
- encoder, decoder, stream_reader, stream_writer = codecs.lookup(encoding)
- self.stream = stream = stream_writer(stream)
- if decl:
- stream.write('<?xml version="1.0" encoding="%s"?>' % encoding)
- self.element_stack = []
- self.nm = namespace_manager
- self.extra_ns=extra_ns
- self.closed = True
-
- def __get_indent(self):
- return " " * len(self.element_stack)
- indent = property(__get_indent)
-
- def __close_start_tag(self):
- if not self.closed: # TODO:
- self.closed = True
- self.stream.write(">")
-
- def push(self, uri):
- self.__close_start_tag()
- write = self.stream.write
- write("\n")
- write(self.indent)
- write("<%s" % self.qname(uri))
- self.element_stack.append(uri)
- self.closed = False
- self.parent = False
-
- def pop(self, uri=None):
- top = self.element_stack.pop()
- if uri:
- assert uri == top
- write = self.stream.write
- if not self.closed:
- self.closed = True
- write("/>")
- else:
- if self.parent:
- write("\n")
- write(self.indent)
- write("</%s>" % self.qname(top))
- self.parent = True
-
- def element(self, uri, content, attributes={}):
- """Utility method for adding a complete simple element"""
- self.push(uri)
- for k, v in attributes.iteritems():
- self.attribute(k,v)
- self.text(content)
- self.pop()
-
- def namespaces(self, namespaces=None):
- if not namespaces:
- namespaces=self.nm.namespaces()
-
- write = self.stream.write
- write("\n")
- for prefix, namespace in namespaces:
- if prefix:
- write(' xmlns:%s="%s"\n' % (prefix, namespace))
- else:
- write(' xmlns="%s"\n' % namespace)
-
- for prefix, namespace in self.extra_ns.items():
- if prefix:
- write(' xmlns:%s="%s"\n' % (prefix, namespace))
- else:
- write(' xmlns="%s"\n' % namespace)
-
-
- def attribute(self, uri, value):
- write = self.stream.write
- write(" %s=%s" % (self.qname(uri), quoteattr(value)))
-
- def text(self, text):
- self.__close_start_tag()
- if "<" in text and ">" in text and not "]]>" in text:
- self.stream.write("<![CDATA[")
- self.stream.write(text)
- self.stream.write("]]>")
- else:
- self.stream.write(escape(text))
-
- def qname(self,uri):
- """Compute qname for a uri using our extra namespaces,
- or the given namespace manager"""
-
- for pre,ns in self.extra_ns.items():
- if uri.startswith(ns):
- if pre!="":
- return ":".join(pre,uri[len(ns):])
- else:
- return uri[len(ns):]
-
- return self.nm.qname(uri)
diff --git a/creactistore/_templates/lib/rdflib/plugins/sleepycat.py b/creactistore/_templates/lib/rdflib/plugins/sleepycat.py
deleted file mode 100644
index 67fcc17..0000000
--- a/creactistore/_templates/lib/rdflib/plugins/sleepycat.py
+++ /dev/null
@@ -1,531 +0,0 @@
-from rdflib.store import Store, VALID_STORE, CORRUPTED_STORE, NO_STORE, UNKNOWN
-from rdflib.term import URIRef
-from rdflib.py3compat import b
-def bb(u): return u.encode('utf-8')
-
-try:
- from bsddb import db
- has_bsddb = True
-except ImportError:
- try:
- from bsddb3 import db
- has_bsddb = True
- except ImportError:
- has_bsddb = False
-from os import mkdir
-from os.path import exists, abspath
-from urllib import pathname2url
-from threading import Thread
-
-import logging
-_logger = logging.getLogger(__name__)
-
-__all__ = ['Sleepycat']
-
-class Sleepycat(Store):
- context_aware = True
- formula_aware = True
- transaction_aware = False
- db_env = None
-
- def __init__(self, configuration=None, identifier=None):
- if not has_bsddb: raise Exception("Unable to import bsddb/bsddb3, store is unusable.")
- self.__open = False
- self.__identifier = identifier
- super(Sleepycat, self).__init__(configuration)
- self._loads = self.node_pickler.loads
- self._dumps = self.node_pickler.dumps
-
- def __get_identifier(self):
- return self.__identifier
- identifier = property(__get_identifier)
-
- def _init_db_environment(self, homeDir, create=True):
- envsetflags = db.DB_CDB_ALLDB
- envflags = db.DB_INIT_MPOOL | db.DB_INIT_CDB | db.DB_THREAD
- if not exists(homeDir):
- if create==True:
- mkdir(homeDir) # TODO: implement create method and refactor this to it
- self.create(homeDir)
- else:
- return NO_STORE
- db_env = db.DBEnv()
- db_env.set_cachesize(0, 1024*1024*50) # TODO
- #db_env.set_lg_max(1024*1024)
- db_env.set_flags(envsetflags, 1)
- db_env.open(homeDir, envflags | db.DB_CREATE)
- return db_env
-
- def is_open(self):
- return self.__open
-
- def open(self, path, create=True):
- if not has_bsddb: return NO_STORE
- homeDir = path
-
- if self.__identifier is None:
- self.__identifier = URIRef(pathname2url(abspath(homeDir)))
-
- db_env = self._init_db_environment(homeDir, create)
- if db_env == NO_STORE:
- return NO_STORE
- self.db_env = db_env
- self.__open = True
-
- dbname = None
- dbtype = db.DB_BTREE
- # auto-commit ensures that the open-call commits when transactions are enabled
- dbopenflags = db.DB_THREAD
- if self.transaction_aware == True:
- dbopenflags |= db.DB_AUTO_COMMIT
-
- dbmode = 0660
- dbsetflags = 0
-
- # create and open the DBs
- self.__indicies = [None,] * 3
- self.__indicies_info = [None,] * 3
- for i in xrange(0, 3):
- index_name = to_key_func(i)((b("s"), b("p"), b("o")), b("c")).decode()
- index = db.DB(db_env)
- index.set_flags(dbsetflags)
- index.open(index_name, dbname, dbtype, dbopenflags|db.DB_CREATE, dbmode)
- self.__indicies[i] = index
- self.__indicies_info[i] = (index, to_key_func(i), from_key_func(i))
-
- lookup = {}
- for i in xrange(0, 8):
- results = []
- for start in xrange(0, 3):
- score = 1
- len = 0
- for j in xrange(start, start+3):
- if i & (1<<(j%3)):
- score = score << 1
- len += 1
- else:
- break
- tie_break = 2-start
- results.append(((score, tie_break), start, len))
-
- results.sort()
- score, start, len = results[-1]
-
- def get_prefix_func(start, end):
- def get_prefix(triple, context):
- if context is None:
- yield ""
- else:
- yield context
- i = start
- while i<end:
- yield triple[i%3]
- i += 1
- yield ""
- return get_prefix
-
- lookup[i] = (self.__indicies[start], get_prefix_func(start, start + len), from_key_func(start), results_from_key_func(start, self._from_string))
-
-
- self.__lookup_dict = lookup
-
- self.__contexts = db.DB(db_env)
- self.__contexts.set_flags(dbsetflags)
- self.__contexts.open("contexts", dbname, dbtype, dbopenflags|db.DB_CREATE, dbmode)
-
- self.__namespace = db.DB(db_env)
- self.__namespace.set_flags(dbsetflags)
- self.__namespace.open("namespace", dbname, dbtype, dbopenflags|db.DB_CREATE, dbmode)
-
- self.__prefix = db.DB(db_env)
- self.__prefix.set_flags(dbsetflags)
- self.__prefix.open("prefix", dbname, dbtype, dbopenflags|db.DB_CREATE, dbmode)
-
- self.__k2i = db.DB(db_env)
- self.__k2i.set_flags(dbsetflags)
- self.__k2i.open("k2i", dbname, db.DB_HASH, dbopenflags|db.DB_CREATE, dbmode)
-
- self.__i2k = db.DB(db_env)
- self.__i2k.set_flags(dbsetflags)
- self.__i2k.open("i2k", dbname, db.DB_RECNO, dbopenflags|db.DB_CREATE, dbmode)
-
- self.__needs_sync = False
- t = Thread(target=self.__sync_run)
- t.setDaemon(True)
- t.start()
- self.__sync_thread = t
- return VALID_STORE
-
-
- def __sync_run(self):
- from time import sleep, time
- try:
- min_seconds, max_seconds = 10, 300
- while self.__open:
- if self.__needs_sync:
- t0 = t1 = time()
- self.__needs_sync = False
- while self.__open:
- sleep(.1)
- if self.__needs_sync:
- t1 = time()
- self.__needs_sync = False
- if time()-t1 > min_seconds or time()-t0 > max_seconds:
- self.__needs_sync = False
- _logger.debug("sync")
- self.sync()
- break
- else:
- sleep(1)
- except Exception, e:
- _logger.exception(e)
-
- def sync(self):
- if self.__open:
- for i in self.__indicies:
- i.sync()
- self.__contexts.sync()
- self.__namespace.sync()
- self.__prefix.sync()
- self.__i2k.sync()
- self.__k2i.sync()
-
- def close(self, commit_pending_transaction=False):
- self.__open = False
- self.__sync_thread.join()
- for i in self.__indicies:
- i.close()
- self.__contexts.close()
- self.__namespace.close()
- self.__prefix.close()
- self.__i2k.close()
- self.__k2i.close()
- self.db_env.close()
-
- def add(self, (subject, predicate, object), context, quoted=False, txn=None):
- """\
- Add a triple to the store of triples.
- """
- assert self.__open, "The Store must be open."
- assert context!=self, "Can not add triple directly to store"
- Store.add(self, (subject, predicate, object), context, quoted)
-
- _to_string = self._to_string
-
- s = _to_string(subject, txn=txn)
- p = _to_string(predicate, txn=txn)
- o = _to_string(object, txn=txn)
- c = _to_string(context, txn=txn)
-
- cspo, cpos, cosp = self.__indicies
-
- value = cspo.get(bb("%s^%s^%s^%s^" % (c, s, p, o)), txn=txn)
- if value is None:
- self.__contexts.put(bb(c), "", txn=txn)
-
- contexts_value = cspo.get(bb("%s^%s^%s^%s^" % ("", s, p, o)), txn=txn) or b("")
- contexts = set(contexts_value.split(b("^")))
- contexts.add(bb(c))
- contexts_value = b("^").join(contexts)
- assert contexts_value!=None
-
- cspo.put(bb("%s^%s^%s^%s^" % (c, s, p, o)), "", txn=txn)
- cpos.put(bb("%s^%s^%s^%s^" % (c, p, o, s)), "", txn=txn)
- cosp.put(bb("%s^%s^%s^%s^" % (c, o, s, p)), "", txn=txn)
- if not quoted:
- cspo.put(bb("%s^%s^%s^%s^" % ("", s, p, o)), contexts_value, txn=txn)
- cpos.put(bb("%s^%s^%s^%s^" % ("", p, o, s)), contexts_value, txn=txn)
- cosp.put(bb("%s^%s^%s^%s^" % ("", o, s, p)), contexts_value, txn=txn)
-
- self.__needs_sync = True
-
- def __remove(self, (s, p, o), c, quoted=False, txn=None):
- cspo, cpos, cosp = self.__indicies
- contexts_value = cspo.get(b("^").join([b(""), s, p, o, b("")]), txn=txn) or b("")
- contexts = set(contexts_value.split(b("^")))
- contexts.discard(c)
- contexts_value = b("^").join(contexts)
- for i, _to_key, _from_key in self.__indicies_info:
- i.delete(_to_key((s, p, o), c), txn=txn)
- if not quoted:
- if contexts_value:
- for i, _to_key, _from_key in self.__indicies_info:
- i.put(_to_key((s, p, o), b("")), contexts_value, txn=txn)
- else:
- for i, _to_key, _from_key in self.__indicies_info:
- try:
- i.delete(_to_key((s, p, o), b("")), txn=txn)
- except db.DBNotFoundError, e:
- pass # TODO: is it okay to ignore these?
-
- def remove(self, (subject, predicate, object), context, txn=None):
- assert self.__open, "The Store must be open."
- Store.remove(self, (subject, predicate, object), context)
- _to_string = self._to_string
-
- if context is not None:
- if context == self:
- context = None
-
- if subject is not None and predicate is not None and object is not None and context is not None:
- s = _to_string(subject, txn=txn)
- p = _to_string(predicate, txn=txn)
- o = _to_string(object, txn=txn)
- c = _to_string(context, txn=txn)
- value = self.__indicies[0].get(bb("%s^%s^%s^%s^" % (c, s, p, o)), txn=txn)
- if value is not None:
- self.__remove((bb(s), bb(p), bb(o)), bb(c), txn=txn)
- self.__needs_sync = True
- else:
- cspo, cpos, cosp = self.__indicies
- index, prefix, from_key, results_from_key = self.__lookup((subject, predicate, object), context, txn=txn)
-
- cursor = index.cursor(txn=txn)
- try:
- current = cursor.set_range(prefix)
- needs_sync = True
- except db.DBNotFoundError:
- current = None
- needs_sync = False
- cursor.close()
- while current:
- key, value = current
- cursor = index.cursor(txn=txn)
- try:
- cursor.set_range(key)
- # Hack to stop 2to3 converting this to next(cursor)
- current = getattr(cursor, 'next')()
- except db.DBNotFoundError:
- current = None
- cursor.close()
- if key.startswith(prefix):
- c, s, p, o = from_key(key)
- if context is None:
- contexts_value = index.get(key, txn=txn) or b("")
- contexts = set(contexts_value.split(b("^"))) # remove triple from all non quoted contexts
- contexts.add(b("")) # and from the conjunctive index
- for c in contexts:
- for i, _to_key, _ in self.__indicies_info:
- i.delete(_to_key((s, p, o), c), txn=txn)
- else:
- self.__remove((s, p, o), c, txn=txn)
- else:
- break
-
- if context is not None:
- if subject is None and predicate is None and object is None:
- # TODO: also if context becomes empty and not just on remove((None, None, None), c)
- try:
- self.__contexts.delete(bb(_to_string(context, txn=txn)), txn=txn)
- except db.DBNotFoundError, e:
- pass
-
- self.__needs_sync = needs_sync
-
- def triples(self, (subject, predicate, object), context=None, txn=None):
- """A generator over all the triples matching """
- assert self.__open, "The Store must be open."
-
- if context is not None:
- if context == self:
- context = None
-
- _from_string = self._from_string
- index, prefix, from_key, results_from_key = self.__lookup((subject, predicate, object), context, txn=txn)
-
- cursor = index.cursor(txn=txn)
- try:
- current = cursor.set_range(prefix)
- except db.DBNotFoundError:
- current = None
- cursor.close()
- while current:
- key, value = current
- cursor = index.cursor(txn=txn)
- try:
- cursor.set_range(key)
- # Cheap hack so 2to3 doesn't convert to next(cursor)
- current = getattr(cursor, 'next')()
- except db.DBNotFoundError:
- current = None
- cursor.close()
- if key and key.startswith(prefix):
- contexts_value = index.get(key, txn=txn)
- yield results_from_key(key, subject, predicate, object, contexts_value)
- else:
- break
-
- def __len__(self, context=None):
- assert self.__open, "The Store must be open."
- if context is not None:
- if context == self:
- context = None
-
- if context is None:
- prefix = b("^")
- else:
- prefix = bb("%s^" % self._to_string(context))
-
- index = self.__indicies[0]
- cursor = index.cursor()
- current = cursor.set_range(prefix)
- count = 0
- while current:
- key, value = current
- if key.startswith(prefix):
- count +=1
- # Hack to stop 2to3 converting this to next(cursor)
- current = getattr(cursor, 'next')()
- else:
- break
- cursor.close()
- return count
-
- def bind(self, prefix, namespace):
- prefix = prefix.encode("utf-8")
- namespace = namespace.encode("utf-8")
- bound_prefix = self.__prefix.get(namespace)
- if bound_prefix:
- self.__namespace.delete(bound_prefix)
- self.__prefix[namespace] = prefix
- self.__namespace[prefix] = namespace
-
- def namespace(self, prefix):
- prefix = prefix.encode("utf-8")
- ns = self.__namespace.get(prefix, None)
- if ns is not None:
- return ns.decode('utf-8')
- return None
-
- def prefix(self, namespace):
- namespace = namespace.encode("utf-8")
- prefix = self.__prefix.get(namespace, None)
- if prefix is not None:
- return prefix.decode('utf-8')
- return None
-
- def namespaces(self):
- cursor = self.__namespace.cursor()
- results = []
- current = cursor.first()
- while current:
- prefix, namespace = current
- results.append((prefix.decode('utf-8'), namespace.decode('utf-8')))
- # Hack to stop 2to3 converting this to next(cursor)
- current = getattr(cursor, 'next')()
- cursor.close()
- for prefix, namespace in results:
- yield prefix, URIRef(namespace)
-
- def contexts(self, triple=None):
- _from_string = self._from_string
- _to_string = self._to_string
-
- if triple:
- s, p, o = triple
- s = _to_string(s)
- p = _to_string(p)
- o = _to_string(o)
- contexts = self.__indicies[0].get(bb("%s^%s^%s^%s^" % ("", s, p, o)))
- if contexts:
- for c in contexts.split(b("^")):
- if c:
- yield _from_string(c)
- else:
- index = self.__contexts
- cursor = index.cursor()
- current = cursor.first()
- cursor.close()
- while current:
- key, value = current
- context = _from_string(key)
- yield context
- cursor = index.cursor()
- try:
- cursor.set_range(key)
- # Hack to stop 2to3 converting this to next(cursor)
- current = getattr(cursor, 'next')()
- except db.DBNotFoundError:
- current = None
- cursor.close()
-
- def _from_string(self, i):
- k = self.__i2k.get(int(i))
- return self._loads(k)
-
- def _to_string(self, term, txn=None):
- k = self._dumps(term)
- i = self.__k2i.get(k, txn=txn)
- if i is None:
- # weird behavoir from bsddb not taking a txn as a keyword argument
- # for append
- if self.transaction_aware:
- i = "%s" % self.__i2k.append(k, txn)
- else:
- i = "%s" % self.__i2k.append(k)
-
- self.__k2i.put(k, i, txn=txn)
- else:
- i = i.decode()
- return i
-
- def __lookup(self, (subject, predicate, object), context, txn=None):
- _to_string = self._to_string
- if context is not None:
- context = _to_string(context, txn=txn)
- i = 0
- if subject is not None:
- i += 1
- subject = _to_string(subject, txn=txn)
- if predicate is not None:
- i += 2
- predicate = _to_string(predicate, txn=txn)
- if object is not None:
- i += 4
- object = _to_string(object, txn=txn)
- index, prefix_func, from_key, results_from_key = self.__lookup_dict[i]
- #print (subject, predicate, object), context, prefix_func, index #DEBUG
- prefix = bb("^".join(prefix_func((subject, predicate, object), context)))
- return index, prefix, from_key, results_from_key
-
-
-def to_key_func(i):
- def to_key(triple, context):
- "Takes a string; returns key"
- return b("^").join((context, triple[i%3], triple[(i+1)%3], triple[(i+2)%3], b(""))) # "" to tac on the trailing ^
- return to_key
-
-def from_key_func(i):
- def from_key(key):
- "Takes a key; returns string"
- parts = key.split(b("^"))
- return parts[0], parts[(3-i+0)%3+1], parts[(3-i+1)%3+1], parts[(3-i+2)%3+1]
- return from_key
-
-def results_from_key_func(i, from_string):
- def from_key(key, subject, predicate, object, contexts_value):
- "Takes a key and subject, predicate, object; returns tuple for yield"
- parts = key.split(b("^"))
- if subject is None:
- # TODO: i & 1: # dis assemble and/or measure to see which is faster
- # subject is None or i & 1
- s = from_string(parts[(3-i+0)%3+1])
- else:
- s = subject
- if predicate is None:#i & 2:
- p = from_string(parts[(3-i+1)%3+1])
- else:
- p = predicate
- if object is None:#i & 4:
- o = from_string(parts[(3-i+2)%3+1])
- else:
- o = object
- return (s, p, o), (from_string(c) for c in contexts_value.split(b("^")) if c)
- return from_key
-
-def readable_index(i):
- s, p, o = "?" * 3
- if i & 1: s = "s"
- if i & 2: p = "p"
- if i & 4: o = "o"
- return "%s,%s,%s" % (s, p, o)