Web   ·   Wiki   ·   Activities   ·   Blog   ·   Lists   ·   Chat   ·   Meeting   ·   Bugs   ·   Git   ·   Translate   ·   Archive   ·   People   ·   Donate
summaryrefslogtreecommitdiffstats
path: root/creactistore/_templates/lib/rdflib/plugins/parsers/notation3.py
diff options
context:
space:
mode:
Diffstat (limited to 'creactistore/_templates/lib/rdflib/plugins/parsers/notation3.py')
-rw-r--r--creactistore/_templates/lib/rdflib/plugins/parsers/notation3.py2314
1 files changed, 2314 insertions, 0 deletions
diff --git a/creactistore/_templates/lib/rdflib/plugins/parsers/notation3.py b/creactistore/_templates/lib/rdflib/plugins/parsers/notation3.py
new file mode 100644
index 0000000..ac48340
--- /dev/null
+++ b/creactistore/_templates/lib/rdflib/plugins/parsers/notation3.py
@@ -0,0 +1,2314 @@
+#!/usr/bin/env python
+u"""
+notation3.py - Standalone Notation3 Parser
+Derived from CWM, the Closed World Machine
+
+Authors of the original suite:
+
+* Dan Connolly <@@>
+* Tim Berners-Lee <@@>
+* Yosi Scharf <@@>
+* Joseph M. Reagle Jr. <reagle@w3.org>
+* Rich Salz <rsalz@zolera.com>
+
+http://www.w3.org/2000/10/swap/notation3.py
+
+Copyright 2000-2007, World Wide Web Consortium.
+Copyright 2001, MIT.
+Copyright 2001, Zolera Systems Inc.
+
+License: W3C Software License
+http://www.w3.org/Consortium/Legal/copyright-software
+
+Modified by Sean B. Palmer
+Copyright 2007, Sean B. Palmer. \u32E1
+
+Modified to work with rdflib by Gunnar Aastrand Grimnes
+Copyright 2010, Gunnar A. Grimnes
+
+"""
+
+# Python standard libraries
+import types
+import sys
+import os
+import string
+import re
+import time
+import StringIO
+import codecs
+
+from binascii import a2b_hex
+from decimal import Decimal
+
+from rdflib.term import URIRef, BNode, Literal, Variable, _XSD_PFX, _unique_id
+from rdflib.graph import QuotedGraph, ConjunctiveGraph
+from rdflib import py3compat
+b = py3compat.b
+
+__all__ = ['URISyntaxError', 'BadSyntax', 'N3Parser', "verbosity", "setVerbosity", "progress", "splitFrag", "splitFragP", "join", "refTo", "base", "canonical", "runNamespace", "uniqueURI", "Canonicalize", "stripCR", "dummyWrite", "toBool", "stringToN3", "backslashUify", "hexify", "dummy"]
+
+from rdflib.parser import Parser
+
+# Incestuous.. would be nice to separate N3 and XML
+# from sax2rdf import XMLtoDOM
+def XMLtoDOM(*args, **kargs):
+ # print >> sys.stderr, args, kargs
+ pass
+
+# SWAP http://www.w3.org/2000/10/swap
+# from diag import verbosity, setVerbosity, progress
+def verbosity(*args, **kargs):
+ # print >> sys.stderr, args, kargs
+ pass
+def setVerbosity(*args, **kargs):
+ # print >> sys.stderr, args, kargs
+ pass
+def progress(*args, **kargs):
+ # print >> sys.stderr, args, kargs
+ pass
+
+
+
+def splitFrag(uriref):
+ """split a URI reference between the fragment and the rest.
+
+ Punctuation is thrown away.
+
+ e.g.
+
+ >>> splitFrag("abc#def")
+ ('abc', 'def')
+
+ >>> splitFrag("abcdef")
+ ('abcdef', None)
+
+ """
+
+ i = uriref.rfind("#")
+ if i>= 0: return uriref[:i], uriref[i+1:]
+ else: return uriref, None
+
+def splitFragP(uriref, punct=0):
+ """split a URI reference before the fragment
+
+ Punctuation is kept.
+
+ e.g.
+
+ >>> splitFragP("abc#def")
+ ('abc', '#def')
+
+ >>> splitFragP("abcdef")
+ ('abcdef', '')
+
+ """
+
+ i = uriref.rfind("#")
+ if i>= 0: return uriref[:i], uriref[i:]
+ else: return uriref, ''
+
+@py3compat.format_doctest_out
+def join(here, there):
+ """join an absolute URI and URI reference
+ (non-ascii characters are supported/doctested;
+ haven't checked the details of the IRI spec though)
+
+ here is assumed to be absolute.
+ there is URI reference.
+
+ >>> join('http://example/x/y/z', '../abc')
+ 'http://example/x/abc'
+
+ Raise ValueError if there uses relative path
+ syntax but here has no hierarchical path.
+
+ >>> join('mid:foo@example', '../foo')
+ Traceback (most recent call last):
+ raise ValueError, here
+ ValueError: Base <mid:foo@example> has no slash after colon - with relative '../foo'.
+
+ >>> join('http://example/x/y/z', '')
+ 'http://example/x/y/z'
+
+ >>> join('mid:foo@example', '#foo')
+ 'mid:foo@example#foo'
+
+ We grok IRIs
+
+ >>> len(u'Andr\\xe9')
+ 5
+
+ >>> join('http://example.org/', u'#Andr\\xe9')
+ %(u)s'http://example.org/#Andr\\xe9'
+ """
+
+ assert(here.find("#") < 0), "Base may not contain hash: '%s'"% here # caller must splitFrag (why?)
+
+ slashl = there.find('/')
+ colonl = there.find(':')
+
+ # join(base, 'foo:/') -- absolute
+ if colonl >= 0 and (slashl < 0 or colonl < slashl):
+ return there
+
+ bcolonl = here.find(':')
+ assert(bcolonl >= 0), "Base uri '%s' is not absolute" % here # else it's not absolute
+
+ path, frag = splitFragP(there)
+ if not path: return here + frag
+
+ # join('mid:foo@example', '../foo') bzzt
+ if here[bcolonl+1:bcolonl+2] <> '/':
+ raise ValueError ("Base <%s> has no slash after colon - with relative '%s'." %(here, there))
+
+ if here[bcolonl+1:bcolonl+3] == '//':
+ bpath = here.find('/', bcolonl+3)
+ else:
+ bpath = bcolonl+1
+
+ # join('http://xyz', 'foo')
+ if bpath < 0:
+ bpath = len(here)
+ here = here + '/'
+
+ # join('http://xyz/', '//abc') => 'http://abc'
+ if there[:2] == '//':
+ return here[:bcolonl+1] + there
+
+ # join('http://xyz/', '/abc') => 'http://xyz/abc'
+ if there[:1] == '/':
+ return here[:bpath] + there
+
+ slashr = here.rfind('/')
+
+ while 1:
+ if path[:2] == './':
+ path = path[2:]
+ if path == '.':
+ path = ''
+ elif path[:3] == '../' or path == '..':
+ path = path[3:]
+ i = here.rfind('/', bpath, slashr)
+ if i >= 0:
+ here = here[:i+1]
+ slashr = i
+ else:
+ break
+
+ return here[:slashr+1] + path + frag
+
+commonHost = re.compile(r'^[-_a-zA-Z0-9.]+:(//[^/]*)?/[^/]*$')
+
+def refTo(base, uri):
+ """figure out a relative URI reference from base to uri
+
+ >>> refTo('http://example/x/y/z', 'http://example/x/abc')
+ '../abc'
+
+ >>> refTo('file:/ex/x/y', 'file:/ex/x/q/r#s')
+ 'q/r#s'
+
+ >>> refTo(None, 'http://ex/x/y')
+ 'http://ex/x/y'
+
+ >>> refTo('http://ex/x/y', 'http://ex/x/y')
+ ''
+
+ Note the relationship between refTo and join:
+ join(x, refTo(x, y)) == y
+ which points out certain strings which cannot be URIs. e.g.
+ >>> x='http://ex/x/y';y='http://ex/x/q:r';join(x, refTo(x, y)) == y
+ 0
+
+ So 'http://ex/x/q:r' is not a URI. Use 'http://ex/x/q%3ar' instead:
+ >>> x='http://ex/x/y';y='http://ex/x/q%3ar';join(x, refTo(x, y)) == y
+ 1
+
+ This one checks that it uses a root-realtive one where that is
+ all they share. Now uses root-relative where no path is shared.
+ This is a matter of taste but tends to give more resilience IMHO
+ -- and shorter paths
+
+ Note that base may be None, meaning no base. In some situations, there
+ just ain't a base. Slife. In these cases, relTo returns the absolute value.
+ The axiom abs(,rel(b,x))=x still holds.
+ This saves people having to set the base to "bogus:".
+
+ >>> refTo('http://ex/x/y/z', 'http://ex/r')
+ '/r'
+
+ """
+
+# assert base # don't mask bugs -danc # not a bug. -tim
+ if not base: return uri
+ if base == uri: return ""
+
+ # Find how many path segments in common
+ i=0
+ while i<len(uri) and i<len(base):
+ if uri[i] == base[i]: i = i + 1
+ else: break
+ # print "# relative", base, uri, " same up to ", i
+ # i point to end of shortest one or first difference
+
+ m = commonHost.match(base[:i])
+ if m:
+ k=uri.find("//")
+ if k<0: k=-2 # no host
+ l=uri.find("/", k+2)
+ if uri[l+1:l+2] != "/" and base[l+1:l+2] != "/" and uri[:l]==base[:l]:
+ return uri[l:]
+
+ if uri[i:i+1] =="#" and len(base) == i: return uri[i:] # fragment of base
+
+ while i>0 and uri[i-1] != '/' : i=i-1 # scan for slash
+
+ if i < 3: return uri # No way.
+ if base.find("//", i-2)>0 \
+ or uri.find("//", i-2)>0: return uri # An unshared "//"
+ if base.find(":", i)>0: return uri # An unshared ":"
+ n = base.count("/", i)
+ if n == 0 and i<len(uri) and uri[i] == '#':
+ return "./" + uri[i:]
+ elif n == 0 and i == len(uri):
+ return "./"
+ else:
+ return ("../" * n) + uri[i:]
+
+
+def base():
+ """The base URI for this process - the Web equiv of cwd
+
+ Relative or abolute unix-standard filenames parsed relative to
+ this yeild the URI of the file.
+ If we had a reliable way of getting a computer name,
+ we should put it in the hostname just to prevent ambiguity
+
+ """
+# return "file://" + hostname + os.getcwd() + "/"
+ return "file://" + _fixslash(os.getcwd()) + "/"
+
+
+def _fixslash(str):
+ """ Fix windowslike filename to unixlike - (#ifdef WINDOWS)"""
+ s = str
+ for i in range(len(s)):
+ if s[i] == "\\": s = s[:i] + "/" + s[i+1:]
+ if s[0] != "/" and s[1] == ":": s = s[2:] # @@@ Hack when drive letter present
+ return s
+
+URI_unreserved = b("ABCDEFGHIJJLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-._~")
+ # unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~"
+
+@py3compat.format_doctest_out
+def canonical(str_in):
+ """Convert equivalent URIs (or parts) to the same string
+
+ There are many differenet levels of URI canonicalization
+ which are possible. See http://www.ietf.org/rfc/rfc3986.txt
+ Done:
+ - Converfting unicode IRI to utf-8
+ - Escaping all non-ASCII
+ - De-escaping, if escaped, ALPHA (%%41-%%5A and %%61-%%7A), DIGIT (%%30-%%39),
+ hyphen (%%2D), period (%%2E), underscore (%%5F), or tilde (%%7E) (Sect 2.4)
+ - Making all escapes uppercase hexadecimal
+
+ Not done:
+ - Making URI scheme lowercase
+ - changing /./ or /foo/../ to / with care not to change host part
+
+
+ >>> canonical("foo bar")
+ %(b)s'foo%%20bar'
+
+ >>> canonical(u'http:')
+ %(b)s'http:'
+
+ >>> canonical('fran%%c3%%83%%c2%%a7ois')
+ %(b)s'fran%%C3%%83%%C2%%A7ois'
+
+ >>> canonical('a')
+ %(b)s'a'
+
+ >>> canonical('%%4e')
+ %(b)s'N'
+
+ >>> canonical('%%9d')
+ %(b)s'%%9D'
+
+ >>> canonical('%%2f')
+ %(b)s'%%2F'
+
+ >>> canonical('%%2F')
+ %(b)s'%%2F'
+
+ """
+ if type(str_in) == type(u''):
+ s8 = str_in.encode('utf-8')
+ else:
+ s8 = str_in
+ s = b('')
+ i = 0
+ while i < len(s8):
+ if py3compat.PY3:
+ n = s8[i]; ch = bytes([n])
+ else:
+ ch = s8[i]; n = ord(ch)
+ if (n > 126) or (n < 33) : # %-encode controls, SP, DEL, and utf-8
+ s += b("%%%02X" % ord(ch))
+ elif ch == b('%') and i+2 < len(s8):
+ ch2 = a2b_hex(s8[i+1:i+3])
+ if ch2 in URI_unreserved: s += ch2
+ else: s += b("%%%02X" % ord(ch2))
+ i = i+3
+ continue
+ else:
+ s += ch
+ i = i +1
+ return s
+
+
+
+
+
+
+CONTEXT = 0
+PRED = 1
+SUBJ = 2
+OBJ = 3
+
+PARTS = PRED, SUBJ, OBJ
+ALL4 = CONTEXT, PRED, SUBJ, OBJ
+
+SYMBOL = 0
+FORMULA = 1
+LITERAL = 2
+LITERAL_DT = 21
+LITERAL_LANG = 22
+ANONYMOUS = 3
+XMLLITERAL = 25
+
+Logic_NS = "http://www.w3.org/2000/10/swap/log#"
+NODE_MERGE_URI = Logic_NS + "is" # Pseudo-property indicating node merging
+forSomeSym = Logic_NS + "forSome"
+forAllSym = Logic_NS + "forAll"
+
+RDF_type_URI = "http://www.w3.org/1999/02/22-rdf-syntax-ns#type"
+RDF_NS_URI = "http://www.w3.org/1999/02/22-rdf-syntax-ns#"
+OWL_NS = "http://www.w3.org/2002/07/owl#"
+DAML_sameAs_URI = OWL_NS+"sameAs"
+parsesTo_URI = Logic_NS + "parsesTo"
+RDF_spec = "http://www.w3.org/TR/REC-rdf-syntax/"
+
+List_NS = RDF_NS_URI # From 20030808
+_Old_Logic_NS = "http://www.w3.org/2000/10/swap/log.n3#"
+
+N3_first = (SYMBOL, List_NS + "first")
+N3_rest = (SYMBOL, List_NS + "rest")
+N3_li = (SYMBOL, List_NS + "li")
+N3_nil = (SYMBOL, List_NS + "nil")
+N3_List = (SYMBOL, List_NS + "List")
+N3_Empty = (SYMBOL, List_NS + "Empty")
+
+
+
+runNamespaceValue = None
+
+def runNamespace():
+ "Return a URI suitable as a namespace for run-local objects"
+ # @@@ include hostname (privacy?) (hash it?)
+ global runNamespaceValue
+ if runNamespaceValue == None:
+ runNamespaceValue = join(base(), _unique_id()) + '#'
+ return runNamespaceValue
+
+nextu = 0
+def uniqueURI():
+ "A unique URI"
+ global nextu
+ nextu += 1
+ return runNamespace() + "u_" + `nextu`
+
+class URISyntaxError(ValueError):
+ """A parameter is passed to a routine that requires a URI reference"""
+ pass
+
+
+tracking = False
+chatty_flag = 50
+
+
+from xml.dom import Node
+try:
+ from xml.ns import XMLNS
+except:
+ class XMLNS:
+ BASE = "http://www.w3.org/2000/xmlns/"
+ XML = "http://www.w3.org/XML/1998/namespace"
+
+
+_attrs = lambda E: (E.attributes and E.attributes.values()) or []
+_children = lambda E: E.childNodes or []
+_IN_XML_NS = lambda n: n.namespaceURI == XMLNS.XML
+_inclusive = lambda n: n.unsuppressedPrefixes == None
+
+# Does a document/PI has lesser/greater document order than the
+# first element?
+_LesserElement, _Element, _GreaterElement = range(3)
+
+def _sorter(n1,n2):
+ '''_sorter(n1,n2) -> int
+ Sorting predicate for non-NS attributes.'''
+
+ i = cmp(n1.namespaceURI, n2.namespaceURI)
+ if i: return i
+ return cmp(n1.localName, n2.localName)
+
+
+def _sorter_ns(n1,n2):
+ '''_sorter_ns((n,v),(n,v)) -> int
+ "(an empty namespace URI is lexicographically least)."'''
+
+ if n1[0] == 'xmlns': return -1
+ if n2[0] == 'xmlns': return 1
+ return cmp(n1[0], n2[0])
+
+def _utilized(n, node, other_attrs, unsuppressedPrefixes):
+ '''_utilized(n, node, other_attrs, unsuppressedPrefixes) -> boolean
+ Return true if that nodespace is utilized within the node'''
+
+ if n.startswith('xmlns:'):
+ n = n[6:]
+ elif n.startswith('xmlns'):
+ n = n[5:]
+ if (n=="" and node.prefix in ["#default", None]) or \
+ n == node.prefix or n in unsuppressedPrefixes:
+ return 1
+ for attr in other_attrs:
+ if n == attr.prefix: return 1
+ return 0
+
+#_in_subset = lambda subset, node: not subset or node in subset
+_in_subset = lambda subset, node: subset is None or node in subset # rich's tweak
+
+class _implementation:
+ '''Implementation class for C14N. This accompanies a node during it's
+ processing and includes the parameters and processing state.'''
+
+ # Handler for each node type; populated during module instantiation.
+ handlers = {}
+
+ def __init__(self, node, write, **kw):
+ '''Create and run the implementation.'''
+ self.write = write
+ self.subset = kw.get('subset')
+ self.comments = kw.get('comments', 0)
+ self.unsuppressedPrefixes = kw.get('unsuppressedPrefixes')
+ nsdict = kw.get('nsdict', { 'xml': XMLNS.XML, 'xmlns': XMLNS.BASE })
+
+ # Processing state.
+ self.state = (nsdict, {'xml':''}, {}) #0422
+
+ if node.nodeType == Node.DOCUMENT_NODE:
+ self._do_document(node)
+ elif node.nodeType == Node.ELEMENT_NODE:
+ self.documentOrder = _Element # At document element
+ if not _inclusive(self):
+ self._do_element(node)
+ else:
+ inherited = self._inherit_context(node)
+ self._do_element(node, inherited)
+ elif node.nodeType == Node.DOCUMENT_TYPE_NODE:
+ pass
+ elif node.nodeType == Node.TEXT_NODE:
+ self._do_text(node)
+ else:
+ raise TypeError, str(node)
+
+
+ def _inherit_context(self, node):
+ '''_inherit_context(self, node) -> list
+ Scan ancestors of attribute and namespace context. Used only
+ for single element node canonicalization, not for subset
+ canonicalization.'''
+
+ # Collect the initial list of xml:foo attributes.
+ xmlattrs = filter(_IN_XML_NS, _attrs(node))
+
+ # Walk up and get all xml:XXX attributes we inherit.
+ inherited, parent = [], node.parentNode
+ while parent and parent.nodeType == Node.ELEMENT_NODE:
+ for a in filter(_IN_XML_NS, _attrs(parent)):
+ n = a.localName
+ if n not in xmlattrs:
+ xmlattrs.append(n)
+ inherited.append(a)
+ parent = parent.parentNode
+ return inherited
+
+
+ def _do_document(self, node):
+ '''_do_document(self, node) -> None
+ Process a document node. documentOrder holds whether the document
+ element has been encountered such that PIs/comments can be written
+ as specified.'''
+
+ self.documentOrder = _LesserElement
+ for child in node.childNodes:
+ if child.nodeType == Node.ELEMENT_NODE:
+ self.documentOrder = _Element # At document element
+ self._do_element(child)
+ self.documentOrder = _GreaterElement # After document element
+ elif child.nodeType == Node.PROCESSING_INSTRUCTION_NODE:
+ self._do_pi(child)
+ elif child.nodeType == Node.COMMENT_NODE:
+ self._do_comment(child)
+ elif child.nodeType == Node.DOCUMENT_TYPE_NODE:
+ pass
+ else:
+ raise TypeError, str(child)
+ handlers[Node.DOCUMENT_NODE] = _do_document
+
+
+ def _do_text(self, node):
+ '''_do_text(self, node) -> None
+ Process a text or CDATA node. Render various special characters
+ as their C14N entity representations.'''
+ if not _in_subset(self.subset, node): return
+ s = node.data.replace("&", "&amp;")
+ s = s.replace("<", "&lt;")
+ s = s.replace(">", "&gt;")
+ s = s.replace("\015", "&#xD;")
+ if s: self.write(s)
+ handlers[Node.TEXT_NODE] = _do_text
+ handlers[Node.CDATA_SECTION_NODE] = _do_text
+
+
+ def _do_pi(self, node):
+ '''_do_pi(self, node) -> None
+ Process a PI node. Render a leading or trailing #xA if the
+ document order of the PI is greater or lesser (respectively)
+ than the document element.
+ '''
+ if not _in_subset(self.subset, node): return
+ W = self.write
+ if self.documentOrder == _GreaterElement: W('\n')
+ W('<?')
+ W(node.nodeName)
+ s = node.data
+ if s:
+ W(' ')
+ W(s)
+ W('?>')
+ if self.documentOrder == _LesserElement: W('\n')
+ handlers[Node.PROCESSING_INSTRUCTION_NODE] = _do_pi
+
+
+ def _do_comment(self, node):
+ '''_do_comment(self, node) -> None
+ Process a comment node. Render a leading or trailing #xA if the
+ document order of the comment is greater or lesser (respectively)
+ than the document element.
+ '''
+ if not _in_subset(self.subset, node): return
+ if self.comments:
+ W = self.write
+ if self.documentOrder == _GreaterElement: W('\n')
+ W('<!--')
+ W(node.data)
+ W('-->')
+ if self.documentOrder == _LesserElement: W('\n')
+ handlers[Node.COMMENT_NODE] = _do_comment
+
+
+ def _do_attr(self, n, value):
+ ''''_do_attr(self, node) -> None
+ Process an attribute.'''
+
+ W = self.write
+ W(' ')
+ W(n)
+ W('="')
+ s = value.replace(value, "&", "&amp;")
+ s = s.replace("<", "&lt;")
+ s = s.replace('"', '&quot;')
+ s = s.replace('\011', '&#x9')
+ s = s.replace('\012', '&#xA')
+ s = s.replace('\015', '&#xD')
+ W(s)
+ W('"')
+
+
+ def _do_element(self, node, initial_other_attrs = []):
+ '''_do_element(self, node, initial_other_attrs = []) -> None
+ Process an element (and its children).'''
+
+ # Get state (from the stack) make local copies.
+ # ns_parent -- NS declarations in parent
+ # ns_rendered -- NS nodes rendered by ancestors
+ # ns_local -- NS declarations relevant to this element
+ # xml_attrs -- Attributes in XML namespace from parent
+ # xml_attrs_local -- Local attributes in XML namespace.
+ ns_parent, ns_rendered, xml_attrs = \
+ self.state[0], self.state[1].copy(), self.state[2].copy() #0422
+ ns_local = ns_parent.copy()
+ xml_attrs_local = {}
+
+ # progress("_do_element node.nodeName=", node.nodeName)
+ # progress("_do_element node.namespaceURI", node.namespaceURI)
+ # progress("_do_element node.tocml()", node.toxml())
+ # Divide attributes into NS, XML, and others.
+ other_attrs = initial_other_attrs[:]
+ in_subset = _in_subset(self.subset, node)
+ for a in _attrs(node):
+ # progress("\t_do_element a.nodeName=", a.nodeName)
+ if a.namespaceURI == XMLNS.BASE:
+ n = a.nodeName
+ if n == "xmlns:": n = "xmlns" # DOM bug workaround
+ ns_local[n] = a.nodeValue
+ elif a.namespaceURI == XMLNS.XML:
+ if _inclusive(self) or in_subset:
+ xml_attrs_local[a.nodeName] = a #0426
+ else:
+ other_attrs.append(a)
+ #add local xml:foo attributes to ancestor's xml:foo attributes
+ xml_attrs.update(xml_attrs_local)
+
+ # Render the node
+ W, name = self.write, None
+ if in_subset:
+ name = node.nodeName
+ W('<')
+ W(name)
+
+ # Create list of NS attributes to render.
+ ns_to_render = []
+ for n,v in ns_local.items():
+
+ # If default namespace is XMLNS.BASE or empty,
+ # and if an ancestor was the same
+ if n == "xmlns" and v in [ XMLNS.BASE, '' ] \
+ and ns_rendered.get('xmlns') in [ XMLNS.BASE, '', None ]:
+ continue
+
+ # "omit namespace node with local name xml, which defines
+ # the xml prefix, if its string value is
+ # http://www.w3.org/XML/1998/namespace."
+ if n in ["xmlns:xml", "xml"] \
+ and v in [ 'http://www.w3.org/XML/1998/namespace' ]:
+ continue
+
+
+ # If not previously rendered
+ # and it's inclusive or utilized
+ if (n,v) not in ns_rendered.items() \
+ and (_inclusive(self) or \
+ _utilized(n, node, other_attrs, self.unsuppressedPrefixes)):
+ ns_to_render.append((n, v))
+
+ # Sort and render the ns, marking what was rendered.
+ ns_to_render.sort(_sorter_ns)
+ for n,v in ns_to_render:
+ self._do_attr(n, v)
+ ns_rendered[n]=v #0417
+
+ # If exclusive or the parent is in the subset, add the local xml attributes
+ # Else, add all local and ancestor xml attributes
+ # Sort and render the attributes.
+ if not _inclusive(self) or _in_subset(self.subset,node.parentNode): #0426
+ other_attrs.extend(xml_attrs_local.values())
+ else:
+ other_attrs.extend(xml_attrs.values())
+ other_attrs.sort(_sorter)
+ for a in other_attrs:
+ self._do_attr(a.nodeName, a.value)
+ W('>')
+
+ # Push state, recurse, pop state.
+ state, self.state = self.state, (ns_local, ns_rendered, xml_attrs)
+ for c in _children(node):
+ _implementation.handlers[c.nodeType](self, c)
+ self.state = state
+
+ if name: W('</%s>' % name)
+ handlers[Node.ELEMENT_NODE] = _do_element
+
+
+def Canonicalize(node, output=None, **kw):
+ '''Canonicalize(node, output=None, **kw) -> UTF-8
+
+ Canonicalize a DOM document/element node and all descendents.
+ Return the text; if output is specified then output.write will
+ be called to output the text and None will be returned
+ Keyword parameters:
+ nsdict -- a dictionary of prefix:uri namespace entries
+ assumed to exist in the surrounding context
+ comments -- keep comments if non-zero (default is 0)
+ subset -- Canonical XML subsetting resulting from XPath (default is [])
+ unsuppressedPrefixes -- do exclusive C14N, and this specifies the
+ prefixes that should be inherited.
+ '''
+ if output:
+ apply(_implementation, (node, output.write), kw)
+ else:
+ s = StringIO.StringIO()
+ apply(_implementation, (node, s.write), kw)
+ return s.getvalue()
+
+# end of xmlC14n.py
+
+# from why import BecauseOfData, becauseSubexpression
+def BecauseOfData(*args, **kargs):
+ # print args, kargs
+ pass
+def becauseSubexpression(*args, **kargs):
+ # print args, kargs
+ pass
+
+N3_forSome_URI = forSomeSym
+N3_forAll_URI = forAllSym
+
+# Magic resources we know about
+
+
+
+ADDED_HASH = "#" # Stop where we use this in case we want to remove it!
+# This is the hash on namespace URIs
+
+RDF_type = ( SYMBOL , RDF_type_URI )
+DAML_sameAs = ( SYMBOL, DAML_sameAs_URI )
+
+LOG_implies_URI = "http://www.w3.org/2000/10/swap/log#implies"
+
+BOOLEAN_DATATYPE = _XSD_PFX + "boolean"
+DECIMAL_DATATYPE = _XSD_PFX + "decimal"
+DOUBLE_DATATYPE = _XSD_PFX + "double"
+FLOAT_DATATYPE = _XSD_PFX + "float"
+INTEGER_DATATYPE = _XSD_PFX + "integer"
+
+option_noregen = 0 # If set, do not regenerate genids on output
+
+# @@ I18n - the notname chars need extending for well known unicode non-text
+# characters. The XML spec switched to assuming unknown things were name
+# characaters.
+# _namechars = string.lowercase + string.uppercase + string.digits + '_-'
+_notQNameChars = "\t\r\n !\"#$%&'()*.,+/;<=>?@[\\]^`{|}~" # else valid qname :-/
+_notNameChars = _notQNameChars + ":" # Assume anything else valid name :-/
+_rdfns = 'http://www.w3.org/1999/02/22-rdf-syntax-ns#'
+
+
+N3CommentCharacter = "#" # For unix script #! compatabilty
+
+########################################## Parse string to sink
+#
+# Regular expressions:
+eol = re.compile(r'[ \t]*(#[^\n]*)?\r?\n') # end of line, poss. w/comment
+eof = re.compile(r'[ \t]*(#[^\n]*)?$') # end of file, poss. w/comment
+ws = re.compile(r'[ \t]*') # Whitespace not including NL
+signed_integer = re.compile(r'[-+]?[0-9]+') # integer
+number_syntax = re.compile(r'(?P<integer>[-+]?[0-9]+)(?P<decimal>\.[0-9]+)?(?P<exponent>e[-+]?[0-9]+)?')
+digitstring = re.compile(r'[0-9]+') # Unsigned integer
+interesting = re.compile(r'[\\\r\n\"]')
+langcode = re.compile(r'[a-zA-Z0-9]+(-[a-zA-Z0-9]+)?')
+#"
+
+
+
+class SinkParser:
+ def __init__(self, store, openFormula=None, thisDoc="", baseURI=None,
+ genPrefix = "", flags="",
+ why=None):
+ """ note: namespace names should *not* end in #;
+ the # will get added during qname processing """
+
+ self._bindings = {}
+ self._flags = flags
+ if thisDoc != "":
+ assert ':' in thisDoc, "Document URI not absolute: <%s>" % thisDoc
+ self._bindings[""] = thisDoc + "#" # default
+
+ self._store = store
+ if genPrefix: store.setGenPrefix(genPrefix) # pass it on
+
+ self._thisDoc = thisDoc
+ self.lines = 0 # for error handling
+ self.startOfLine = 0 # For calculating character number
+ self._genPrefix = genPrefix
+ self.keywords = ['a', 'this', 'bind', 'has', 'is', 'of', 'true', 'false' ]
+ self.keywordsSet = 0 # Then only can others be considerd qnames
+ self._anonymousNodes = {} # Dict of anon nodes already declared ln: Term
+ self._variables = {}
+ self._parentVariables = {}
+ self._reason = why # Why the parser was asked to parse this
+
+ self._reason2 = None # Why these triples
+ # was: diag.tracking
+ if tracking: self._reason2 = BecauseOfData(
+ store.newSymbol(thisDoc), because=self._reason)
+
+ if baseURI: self._baseURI = baseURI
+ else:
+ if thisDoc:
+ self._baseURI = thisDoc
+ else:
+ self._baseURI = None
+
+ assert not self._baseURI or ':' in self._baseURI
+
+ if not self._genPrefix:
+ if self._thisDoc: self._genPrefix = self._thisDoc + "#_g"
+ else: self._genPrefix = uniqueURI()
+
+ if openFormula ==None:
+ if self._thisDoc:
+ self._formula = store.newFormula(thisDoc + "#_formula")
+ else:
+ self._formula = store.newFormula()
+ else:
+ self._formula = openFormula
+
+
+ self._context = self._formula
+ self._parentContext = None
+
+
+ def here(self, i):
+ """String generated from position in file
+
+ This is for repeatability when refering people to bnodes in a document.
+ This has diagnostic uses less formally, as it should point one to which
+ bnode the arbitrary identifier actually is. It gives the
+ line and character number of the '[' charcacter or path character
+ which introduced the blank node. The first blank node is boringly _L1C1.
+ It used to be used only for tracking, but for tests in general
+ it makes the canonical ordering of bnodes repeatable."""
+
+ return "%s_L%iC%i" % (self._genPrefix , self.lines,
+ i - self.startOfLine + 1)
+
+ def formula(self):
+ return self._formula
+
+ def loadStream(self, stream):
+ return self.loadBuf(stream.read()) # Not ideal
+
+ def loadBuf(self, buf):
+ """Parses a buffer and returns its top level formula"""
+ self.startDoc()
+
+ self.feed(buf)
+ return self.endDoc() # self._formula
+
+
+ def feed(self, octets):
+ """Feed an octet stream tothe parser
+
+ if BadSyntax is raised, the string
+ passed in the exception object is the
+ remainder after any statements have been parsed.
+ So if there is more data to feed to the
+ parser, it should be straightforward to recover."""
+
+ if not isinstance(octets, unicode):
+ s = octets.decode('utf-8')
+ # NB already decoded, so \ufeff
+ if len(s) > 0 and s[0] == codecs.BOM_UTF8.decode('utf-8'):
+ s = s[1:]
+ else:
+ s=octets
+
+ i = 0
+ while i >= 0:
+ j = self.skipSpace(s, i)
+ if j<0: return
+
+ i = self.directiveOrStatement(s,j)
+ if i<0:
+ print "# next char: ", `s[j]`
+ raise BadSyntax(self._thisDoc, self.lines, s, j,
+ "expected directive or statement")
+
+ def directiveOrStatement(self, str,h):
+
+ i = self.skipSpace(str, h)
+ if i<0: return i # EOF
+
+ j = self.directive(str, i)
+ if j>=0: return self.checkDot(str,j)
+
+ j = self.statement(str, i)
+ if j>=0: return self.checkDot(str,j)
+
+ return j
+
+
+ #@@I18N
+ global _notNameChars
+ #_namechars = string.lowercase + string.uppercase + string.digits + '_-'
+
+ def tok(self, tok, str, i):
+ """Check for keyword. Space must have been stripped on entry and
+ we must not be at end of file."""
+
+ assert tok[0] not in _notNameChars # not for punctuation
+ if str[i:i+1] == "@":
+ i = i+1
+ else:
+ if tok not in self.keywords:
+ return -1 # No, this has neither keywords declaration nor "@"
+
+ if (str[i:i+len(tok)] == tok
+ and (str[i+len(tok)] in _notQNameChars )):
+ i = i + len(tok)
+ return i
+ else:
+ return -1
+
+ def directive(self, str, i):
+ j = self.skipSpace(str, i)
+ if j<0: return j # eof
+ res = []
+
+ j = self.tok('bind', str, i) # implied "#". Obsolete.
+ if j>0: raise BadSyntax(self._thisDoc, self.lines, str, i,
+ "keyword bind is obsolete: use @prefix")
+
+ j = self.tok('keywords', str, i)
+ if j>0:
+ i = self.commaSeparatedList(str, j, res, self.bareWord)
+ if i < 0:
+ raise BadSyntax(self._thisDoc, self.lines, str, i,
+ "'@keywords' needs comma separated list of words")
+ self.setKeywords(res[:])
+ # was: diag.chatty_flag
+ if chatty_flag > 80: progress("Keywords ", self.keywords)
+ return i
+
+
+ j = self.tok('forAll', str, i)
+ if j > 0:
+ i = self.commaSeparatedList(str, j, res, self.uri_ref2)
+ if i <0: raise BadSyntax(self._thisDoc, self.lines, str, i,
+ "Bad variable list after @forAll")
+ for x in res:
+ #self._context.declareUniversal(x)
+ if x not in self._variables or x in self._parentVariables:
+ self._variables[x] = self._context.newUniversal(x)
+ return i
+
+ j = self.tok('forSome', str, i)
+ if j > 0:
+ i = self. commaSeparatedList(str, j, res, self.uri_ref2)
+ if i <0: raise BadSyntax(self._thisDoc, self.lines, str, i,
+ "Bad variable list after @forSome")
+ for x in res:
+ self._context.declareExistential(x)
+ return i
+
+
+ j=self.tok('prefix', str, i) # no implied "#"
+ if j>=0:
+ t = []
+ i = self.qname(str, j, t)
+ if i<0: raise BadSyntax(self._thisDoc, self.lines, str, j,
+ "expected qname after @prefix")
+ j = self.uri_ref2(str, i, t)
+ if j<0: raise BadSyntax(self._thisDoc, self.lines, str, i,
+ "expected <uriref> after @prefix _qname_")
+ ns = self.uriOf(t[1])
+
+ if self._baseURI:
+ ns = join(self._baseURI, ns)
+ elif ":" not in ns:
+ raise BadSyntax(self._thisDoc, self.lines, str, j,
+ "With no base URI, cannot use relative URI in @prefix <"+ns+">")
+ assert ':' in ns # must be absolute
+ self._bindings[t[0][0]] = ns
+ self.bind(t[0][0], hexify(ns))
+ return j
+
+ j=self.tok('base', str, i) # Added 2007/7/7
+ if j >= 0:
+ t = []
+ i = self.uri_ref2(str, j, t)
+ if i<0: raise BadSyntax(self._thisDoc, self.lines, str, j,
+ "expected <uri> after @base ")
+ ns = self.uriOf(t[0])
+
+ if self._baseURI:
+ ns = join(self._baseURI, ns)
+ else:
+ raise BadSyntax(self._thisDoc, self.lines, str, j,
+ "With no previous base URI, cannot use relative URI in @base <"+ns+">")
+ assert ':' in ns # must be absolute
+ self._baseURI = ns
+ return i
+
+ return -1 # Not a directive, could be something else.
+
+ def bind(self, qn, uri):
+ assert isinstance(uri,
+ types.StringType), "Any unicode must be %x-encoded already"
+ if qn == "":
+ self._store.setDefaultNamespace(uri)
+ else:
+ self._store.bind(qn, uri)
+
+ def setKeywords(self, k):
+ "Takes a list of strings"
+ if k == None:
+ self.keywordsSet = 0
+ else:
+ self.keywords = k
+ self.keywordsSet = 1
+
+
+ def startDoc(self):
+ # was: self._store.startDoc()
+ self._store.startDoc(self._formula)
+
+ def endDoc(self):
+ """Signal end of document and stop parsing. returns formula"""
+ self._store.endDoc(self._formula) # don't canonicalize yet
+ return self._formula
+
+ def makeStatement(self, quadruple):
+ #$$$$$$$$$$$$$$$$$$$$$
+# print "# Parser output: ", `quadruple`
+ self._store.makeStatement(quadruple, why=self._reason2)
+
+
+
+ def statement(self, str, i):
+ r = []
+
+ i = self.object(str, i, r) # Allow literal for subject - extends RDF
+ if i<0: return i
+
+ j = self.property_list(str, i, r[0])
+
+ if j<0: raise BadSyntax(self._thisDoc, self.lines,
+ str, i, "expected propertylist")
+ return j
+
+ def subject(self, str, i, res):
+ return self.item(str, i, res)
+
+ def verb(self, str, i, res):
+ """ has _prop_
+ is _prop_ of
+ a
+ =
+ _prop_
+ >- prop ->
+ <- prop -<
+ _operator_"""
+
+ j = self.skipSpace(str, i)
+ if j<0:return j # eof
+
+ r = []
+
+ j = self.tok('has', str, i)
+ if j>=0:
+ i = self.prop(str, j, r)
+ if i < 0: raise BadSyntax(self._thisDoc, self.lines,
+ str, j, "expected property after 'has'")
+ res.append(('->', r[0]))
+ return i
+
+ j = self.tok('is', str, i)
+ if j>=0:
+ i = self.prop(str, j, r)
+ if i < 0: raise BadSyntax(self._thisDoc, self.lines, str, j,
+ "expected <property> after 'is'")
+ j = self.skipSpace(str, i)
+ if j<0:
+ raise BadSyntax(self._thisDoc, self.lines, str, i,
+ "End of file found, expected property after 'is'")
+ return j # eof
+ i=j
+ j = self.tok('of', str, i)
+ if j<0: raise BadSyntax(self._thisDoc, self.lines, str, i,
+ "expected 'of' after 'is' <prop>")
+ res.append(('<-', r[0]))
+ return j
+
+ j = self.tok('a', str, i)
+ if j>=0:
+ res.append(('->', RDF_type))
+ return j
+
+
+ if str[i:i+2] == "<=":
+ res.append(('<-', self._store.newSymbol(Logic_NS+"implies")))
+ return i+2
+
+ if str[i:i+1] == "=":
+ if str[i+1:i+2] == ">":
+ res.append(('->', self._store.newSymbol(Logic_NS+"implies")))
+ return i+2
+ res.append(('->', DAML_sameAs))
+ return i+1
+
+ if str[i:i+2] == ":=":
+ # patch file relates two formulae, uses this @@ really?
+ res.append(('->', Logic_NS+"becomes"))
+ return i+2
+
+ j = self.prop(str, i, r)
+ if j >= 0:
+ res.append(('->', r[0]))
+ return j
+
+ if str[i:i+2] == ">-" or str[i:i+2] == "<-":
+ raise BadSyntax(self._thisDoc, self.lines, str, j,
+ ">- ... -> syntax is obsolete.")
+
+ return -1
+
+ def prop(self, str, i, res):
+ return self.item(str, i, res)
+
+ def item(self, str, i, res):
+ return self.path(str, i, res)
+
+ def blankNode(self, uri=None):
+ if "B" not in self._flags:
+ return self._context.newBlankNode(uri, why=self._reason2)
+ x = self._context.newSymbol(uri)
+ self._context.declareExistential(x)
+ return x
+
+ def path(self, str, i, res):
+ """Parse the path production.
+ """
+ j = self.nodeOrLiteral(str, i, res)
+ if j<0: return j # nope
+
+ while str[j:j+1] in "!^.": # no spaces, must follow exactly (?)
+ ch = str[j:j+1] # @@ Allow "." followed IMMEDIATELY by a node.
+ if ch == ".":
+ ahead = str[j+1:j+2]
+ if not ahead or (ahead in _notNameChars
+ and ahead not in ":?<[{("): break
+ subj = res.pop()
+ obj = self.blankNode(uri=self.here(j))
+ j = self.node(str, j+1, res)
+ if j<0: raise BadSyntax(self._thisDoc, self.lines, str, j,
+ "EOF found in middle of path syntax")
+ pred = res.pop()
+ if ch == "^": # Reverse traverse
+ self.makeStatement((self._context, pred, obj, subj))
+ else:
+ self.makeStatement((self._context, pred, subj, obj))
+ res.append(obj)
+ return j
+
+ def anonymousNode(self, ln):
+ """Remember or generate a term for one of these _: anonymous nodes"""
+ term = self._anonymousNodes.get(ln, None)
+ if term != None: return term
+ term = self._store.newBlankNode(self._context, why=self._reason2)
+ self._anonymousNodes[ln] = term
+ return term
+
+ def node(self, str, i, res, subjectAlready=None):
+ """Parse the <node> production.
+ Space is now skipped once at the beginning
+ instead of in multipe calls to self.skipSpace().
+ """
+ subj = subjectAlready
+
+ j = self.skipSpace(str,i)
+ if j<0: return j #eof
+ i=j
+ ch = str[i:i+1] # Quick 1-character checks first:
+
+ if ch == "[":
+ bnodeID = self.here(i)
+ j=self.skipSpace(str,i+1)
+ if j<0: raise BadSyntax(self._thisDoc,
+ self.lines, str, i, "EOF after '['")
+ if str[j:j+1] == "=": # Hack for "is" binding name to anon node
+ i = j+1
+ objs = []
+ j = self.objectList(str, i, objs);
+ if j>=0:
+ subj = objs[0]
+ if len(objs)>1:
+ for obj in objs:
+ self.makeStatement((self._context,
+ DAML_sameAs, subj, obj))
+ j = self.skipSpace(str, j)
+ if j<0: raise BadSyntax(self._thisDoc, self.lines, str, i,
+ "EOF when objectList expected after [ = ")
+ if str[j:j+1] == ";":
+ j=j+1
+ else:
+ raise BadSyntax(self._thisDoc, self.lines, str, i,
+ "objectList expected after [= ")
+
+ if subj is None:
+ subj=self.blankNode(uri= bnodeID)
+
+ i = self.property_list(str, j, subj)
+ if i<0: raise BadSyntax(self._thisDoc, self.lines, str, j,
+ "property_list expected")
+
+ j = self.skipSpace(str, i)
+ if j<0: raise BadSyntax(self._thisDoc, self.lines, str, i,
+ "EOF when ']' expected after [ <propertyList>")
+ if str[j:j+1] != "]":
+ raise BadSyntax(self._thisDoc,
+ self.lines, str, j, "']' expected")
+ res.append(subj)
+ return j+1
+
+ if ch == "{":
+ ch2 = str[i+1:i+2]
+ if ch2 == '$':
+ i += 1
+ j = i + 1
+ List = []
+ first_run = True
+ while 1:
+ i = self.skipSpace(str, j)
+ if i<0: raise BadSyntax(self._thisDoc, self.lines, str, i,
+ "needed '$}', found end.")
+ if str[i:i+2] == '$}':
+ j = i+2
+ break
+
+ if not first_run:
+ if str[i:i+1] == ',':
+ i+=1
+ else:
+ raise BadSyntax(self._thisDoc, self.lines,
+ str, i, "expected: ','")
+ else: first_run = False
+
+ item = []
+ j = self.item(str,i, item) #@@@@@ should be path, was object
+ if j<0: raise BadSyntax(self._thisDoc, self.lines, str, i,
+ "expected item in set or '$}'")
+ List.append(self._store.intern(item[0]))
+ res.append(self._store.newSet(List, self._context))
+ return j
+ else:
+ j=i+1
+ oldParentContext = self._parentContext
+ self._parentContext = self._context
+ parentAnonymousNodes = self._anonymousNodes
+ grandParentVariables = self._parentVariables
+ self._parentVariables = self._variables
+ self._anonymousNodes = {}
+ self._variables = self._variables.copy()
+ reason2 = self._reason2
+ self._reason2 = becauseSubexpression
+ if subj is None: subj = self._store.newFormula()
+ self._context = subj
+
+ while 1:
+ i = self.skipSpace(str, j)
+ if i<0: raise BadSyntax(self._thisDoc, self.lines,
+ str, i, "needed '}', found end.")
+
+ if str[i:i+1] == "}":
+ j = i+1
+ break
+
+ j = self.directiveOrStatement(str,i)
+ if j<0: raise BadSyntax(self._thisDoc, self.lines,
+ str, i, "expected statement or '}'")
+
+ self._anonymousNodes = parentAnonymousNodes
+ self._variables = self._parentVariables
+ self._parentVariables = grandParentVariables
+ self._context = self._parentContext
+ self._reason2 = reason2
+ self._parentContext = oldParentContext
+ res.append(subj.close()) # No use until closed
+ return j
+
+ if ch == "(":
+ thing_type = self._store.newList
+ ch2 = str[i+1:i+2]
+ if ch2 == '$':
+ thing_type = self._store.newSet
+ i += 1
+ j=i+1
+
+ List = []
+ while 1:
+ i = self.skipSpace(str, j)
+ if i<0: raise BadSyntax(self._thisDoc, self.lines,
+ str, i, "needed ')', found end.")
+ if str[i:i+1] == ')':
+ j = i+1
+ break
+
+ item = []
+ j = self.item(str,i, item) #@@@@@ should be path, was object
+ if j<0: raise BadSyntax(self._thisDoc, self.lines, str, i,
+ "expected item in list or ')'")
+ List.append(self._store.intern(item[0]))
+ res.append(thing_type(List, self._context))
+ return j
+
+ j = self.tok('this', str, i) # This context
+ if j>=0:
+ raise BadSyntax(self._thisDoc, self.lines, str, i,
+ "Keyword 'this' was ancient N3. Now use @forSome and @forAll keywords.")
+ res.append(self._context)
+ return j
+
+ #booleans
+ j = self.tok('true', str, i)
+ if j>=0:
+ res.append(True)
+ return j
+ j = self.tok('false', str, i)
+ if j>=0:
+ res.append(False)
+ return j
+
+ if subj is None: # If this can be a named node, then check for a name.
+ j = self.uri_ref2(str, i, res)
+ if j >= 0:
+ return j
+
+ return -1
+
+ def property_list(self, str, i, subj):
+ """Parse property list
+ Leaves the terminating punctuation in the buffer
+ """
+ while 1:
+ j = self.skipSpace(str, i)
+ if j<0:
+ raise BadSyntax(self._thisDoc, self.lines, str, i,
+ "EOF found when expected verb in property list")
+ return j #eof
+
+ if str[j:j+2] ==":-":
+ i = j + 2
+ res = []
+ j = self.node(str, i, res, subj)
+ if j<0: raise BadSyntax(self._thisDoc, self.lines, str, i,
+ "bad {} or () or [] node after :- ")
+ i=j
+ continue
+ i=j
+ v = []
+ j = self.verb(str, i, v)
+ if j<=0:
+ return i # void but valid
+
+ objs = []
+ i = self.objectList(str, j, objs)
+ if i<0: raise BadSyntax(self._thisDoc, self.lines, str, j,
+ "objectList expected")
+ for obj in objs:
+ dir, sym = v[0]
+ if dir == '->':
+ self.makeStatement((self._context, sym, subj, obj))
+ else:
+ self.makeStatement((self._context, sym, obj, subj))
+
+ j = self.skipSpace(str, i)
+ if j<0:
+ raise BadSyntax(self._thisDoc, self.lines, str, j,
+ "EOF found in list of objects")
+ return j #eof
+ if str[i:i+1] != ";":
+ return i
+ i = i+1 # skip semicolon and continue
+
+ def commaSeparatedList(self, str, j, res, what):
+ """return value: -1 bad syntax; >1 new position in str
+ res has things found appended
+ """
+ i = self.skipSpace(str, j)
+ if i<0:
+ raise BadSyntax(self._thisDoc, self.lines, str, i,
+ "EOF found expecting comma sep list")
+ return i
+ if str[i] == ".": return j # empty list is OK
+ i = what(str, i, res)
+ if i<0: return -1
+
+ while 1:
+ j = self.skipSpace(str, i)
+ if j<0: return j # eof
+ ch = str[j:j+1]
+ if ch != ",":
+ if ch != ".":
+ return -1
+ return j # Found but not swallowed "."
+ i = what(str, j+1, res)
+ if i<0:
+ raise BadSyntax(self._thisDoc, self.lines, str, i,
+ "bad list content")
+ return i
+
+ def objectList(self, str, i, res):
+ i = self.object(str, i, res)
+ if i<0: return -1
+ while 1:
+ j = self.skipSpace(str, i)
+ if j<0:
+ raise BadSyntax(self._thisDoc, self.lines, str, j,
+ "EOF found after object")
+ return j #eof
+ if str[j:j+1] != ",":
+ return j # Found something else!
+ i = self.object(str, j+1, res)
+ if i<0: return i
+
+ def checkDot(self, str, i):
+ j = self.skipSpace(str, i)
+ if j<0: return j #eof
+ if str[j:j+1] == ".":
+ return j+1 # skip
+ if str[j:j+1] == "}":
+ return j # don't skip it
+ if str[j:j+1] == "]":
+ return j
+ raise BadSyntax(self._thisDoc, self.lines,
+ str, j, "expected '.' or '}' or ']' at end of statement")
+ return i
+
+
+ def uri_ref2(self, str, i, res):
+ """Generate uri from n3 representation.
+
+ Note that the RDF convention of directly concatenating
+ NS and local name is now used though I prefer inserting a '#'
+ to make the namesapces look more like what XML folks expect.
+ """
+ qn = []
+ j = self.qname(str, i, qn)
+ if j>=0:
+ pfx, ln = qn[0]
+ if pfx is None:
+ assert 0, "not used?"
+ ns = self._baseURI + ADDED_HASH
+ else:
+ try:
+ ns = self._bindings[pfx]
+ except KeyError:
+ if pfx == "_": # Magic prefix 2001/05/30, can be overridden
+ res.append(self.anonymousNode(ln))
+ return j
+ raise BadSyntax(self._thisDoc, self.lines, str, i,
+ "Prefix \"%s:\" not bound" % (pfx))
+ symb = self._store.newSymbol(ns + ln)
+ if symb in self._variables:
+ res.append(self._variables[symb])
+ else:
+ res.append(symb) # @@@ "#" CONVENTION
+ if not ns.find("#"):progress(
+ "Warning: no # on namespace %s," % ns)
+ return j
+
+
+ i = self.skipSpace(str, i)
+ if i<0: return -1
+
+ if str[i] == "?":
+ v = []
+ j = self.variable(str,i,v)
+ if j>0: #Forget varibles as a class, only in context.
+ res.append(v[0])
+ return j
+ return -1
+
+ elif str[i]=="<":
+ i = i + 1
+ st = i
+ while i < len(str):
+ if str[i] == ">":
+ uref = str[st:i] # the join should dealt with "":
+ if self._baseURI:
+ uref = join(self._baseURI, uref) # was: uripath.join
+ else:
+ assert ":" in uref, \
+ "With no base URI, cannot deal with relative URIs"
+ if str[i-1:i]=="#" and not uref[-1:]=="#":
+ uref = uref + "#" # She meant it! Weirdness in urlparse?
+ symb = self._store.newSymbol(uref)
+ if symb in self._variables:
+ res.append(self._variables[symb])
+ else:
+ res.append(symb)
+ return i+1
+ i = i + 1
+ raise BadSyntax(self._thisDoc, self.lines, str, j,
+ "unterminated URI reference")
+
+ elif self.keywordsSet:
+ v = []
+ j = self.bareWord(str,i,v)
+ if j<0: return -1 #Forget varibles as a class, only in context.
+ if v[0] in self.keywords:
+ raise BadSyntax(self._thisDoc, self.lines, str, i,
+ 'Keyword "%s" not allowed here.' % v[0])
+ res.append(self._store.newSymbol(self._bindings[""]+v[0]))
+ return j
+ else:
+ return -1
+
+ def skipSpace(self, str, i):
+ """Skip white space, newlines and comments.
+ return -1 if EOF, else position of first non-ws character"""
+ while 1:
+ m = eol.match(str, i)
+ if m == None: break
+ self.lines = self.lines + 1
+ i = m.end() # Point to first character unmatched
+ self.startOfLine = i
+ m = ws.match(str, i)
+ if m != None:
+ i = m.end()
+ m = eof.match(str, i)
+ if m != None: return -1
+ return i
+
+ def variable(self, str, i, res):
+ """ ?abc -> variable(:abc)
+ """
+
+ j = self.skipSpace(str, i)
+ if j<0: return -1
+
+ if str[j:j+1] != "?": return -1
+ j=j+1
+ i = j
+ if str[j] in "0123456789-":
+ raise BadSyntax(self._thisDoc, self.lines, str, j,
+ "Varible name can't start with '%s'" % str[j])
+ return -1
+ while i <len(str) and str[i] not in _notNameChars:
+ i = i+1
+ if self._parentContext == None:
+ varURI = self._store.newSymbol(self._baseURI + "#" +str[j:i])
+ if varURI not in self._variables:
+ self._variables[varURI] = self._context.newUniversal(varURI
+ , why=self._reason2)
+ res.append(self._variables[varURI])
+ return i
+ # @@ was:
+ # raise BadSyntax(self._thisDoc, self.lines, str, j,
+ # "Can't use ?xxx syntax for variable in outermost level: %s"
+ # % str[j-1:i])
+ varURI = self._store.newSymbol(self._baseURI + "#" +str[j:i])
+ if varURI not in self._parentVariables:
+ self._parentVariables[varURI] = self._parentContext.newUniversal(varURI
+ , why=self._reason2)
+ res.append(self._parentVariables[varURI])
+ return i
+
+ def bareWord(self, str, i, res):
+ """ abc -> :abc
+ """
+ j = self.skipSpace(str, i)
+ if j<0: return -1
+
+ if str[j] in "0123456789-" or str[j] in _notNameChars: return -1
+ i = j
+ while i <len(str) and str[i] not in _notNameChars:
+ i = i+1
+ res.append(str[j:i])
+ return i
+
+ def qname(self, str, i, res):
+ """
+ xyz:def -> ('xyz', 'def')
+ If not in keywords and keywordsSet: def -> ('', 'def')
+ :def -> ('', 'def')
+ """
+
+ i = self.skipSpace(str, i)
+ if i<0: return -1
+
+ c = str[i]
+ if c in "0123456789-+": return -1
+ if c not in _notNameChars:
+ ln = c
+ i = i + 1
+ while i < len(str):
+ c = str[i]
+ if c not in _notNameChars:
+ ln = ln + c
+ i = i + 1
+ else: break
+ else: # First character is non-alpha
+ ln = '' # Was: None - TBL (why? useful?)
+
+ if i<len(str) and str[i] == ':':
+ pfx = ln
+ i = i + 1
+ ln = ''
+ while i < len(str):
+ c = str[i]
+ if c not in _notNameChars:
+ ln = ln + c
+ i = i + 1
+ else: break
+
+ res.append((pfx, ln))
+ return i
+
+ else: # delimiter was not ":"
+ if ln and self.keywordsSet and ln not in self.keywords:
+ res.append(('', ln))
+ return i
+ return -1
+
+ def object(self, str, i, res):
+ j = self.subject(str, i, res)
+ if j>= 0:
+ return j
+ else:
+ j = self.skipSpace(str, i)
+ if j<0: return -1
+ else: i=j
+
+ if str[i]=='"':
+ if str[i:i+3] == '"""': delim = '"""'
+ else: delim = '"'
+ i = i + len(delim)
+
+ j, s = self.strconst(str, i, delim)
+
+ res.append(self._store.newLiteral(s))
+ progress("New string const ", s, j)
+ return j
+ else:
+ return -1
+
+ def nodeOrLiteral(self, str, i, res):
+ j = self.node(str, i, res)
+ startline = self.lines # Remember where for error messages
+ if j>= 0:
+ return j
+ else:
+ j = self.skipSpace(str, i)
+ if j<0: return -1
+ else: i=j
+
+ ch = str[i]
+ if ch in "-+0987654321":
+ m = number_syntax.match(str, i)
+ if m == None:
+ raise BadSyntax(self._thisDoc, self.lines, str, i,
+ "Bad number syntax")
+ j = m.end()
+ if m.group('exponent') != None: # includes decimal exponent
+ res.append(float(str[i:j]))
+# res.append(self._store.newLiteral(str[i:j],
+# self._store.newSymbol(FLOAT_DATATYPE)))
+ elif m.group('decimal') != None:
+ res.append(Decimal(str[i:j]))
+ else:
+ res.append(long(str[i:j]))
+# res.append(self._store.newLiteral(str[i:j],
+# self._store.newSymbol(INTEGER_DATATYPE)))
+ return j
+
+ if str[i]=='"':
+ if str[i:i+3] == '"""': delim = '"""'
+ else: delim = '"'
+ i = i + len(delim)
+
+ dt = None
+ j, s = self.strconst(str, i, delim)
+ lang = None
+ if str[j:j+1] == "@": # Language?
+ m = langcode.match(str, j+1)
+ if m == None:
+ raise BadSyntax(self._thisDoc, startline, str, i,
+ "Bad language code syntax on string literal, after @")
+ i = m.end()
+ lang = str[j+1:i]
+ j = i
+ if str[j:j+2] == "^^":
+ res2 = []
+ j = self.uri_ref2(str, j+2, res2) # Read datatype URI
+ dt = res2[0]
+# if dt.uriref() == "http://www.w3.org/1999/02/22-rdf-syntax-ns#XMLLiteral":
+ if dt == "http://www.w3.org/1999/02/22-rdf-syntax-ns#XMLLiteral":
+ try:
+ dom = XMLtoDOM('<rdf:envelope xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns">'
+ + s
+ + '</rdf:envelope>').firstChild
+ except:
+ raise ValueError('s="%s"' % s)
+ res.append(self._store.newXMLLiteral(dom))
+ return j
+ res.append(self._store.newLiteral(s, dt, lang))
+ return j
+ else:
+ return -1
+
+ def uriOf(self, sym):
+ if isinstance(sym, types.TupleType):
+ return sym[1] # old system for --pipe
+ # return sym.uriref() # cwm api
+ return sym
+
+
+ def strconst(self, str, i, delim):
+ """parse an N3 string constant delimited by delim.
+ return index, val
+ """
+
+
+ j = i
+ ustr = u"" # Empty unicode string
+ startline = self.lines # Remember where for error messages
+ while j<len(str):
+ if str[j] == '"':
+ if delim == '"': # done when delim is "
+ i = j + 1
+ return i, ustr
+ if delim == '"""': # done when delim is """ and ...
+ if str[j:j+5] == '"""""': # ... we have "" before
+ i = j + 5
+ ustr = ustr + '""'
+ return i, ustr
+ if str[j:j+4] == '""""': # ... we have " before
+ i = j + 4
+ ustr = ustr + '"'
+ return i, ustr
+ if str[j:j+3] == '"""': # ... current " is part of delim
+ i = j + 3
+ return i, ustr
+
+ # we are inside of the string and current char is "
+ j = j + 1
+ ustr = ustr + '"'
+ continue
+
+ m = interesting.search(str, j) # was str[j:].
+ # Note for pos param to work, MUST be compiled ... re bug?
+ assert m , "Quote expected in string at ^ in %s^%s" %(
+ str[j-20:j], str[j:j+20]) # we at least have to find a quote
+
+ i = m.start()
+ try:
+ ustr = ustr + str[j:i]
+ except UnicodeError:
+ err = ""
+ for c in str[j:i]:
+ err = err + (" %02x" % ord(c))
+ streason = sys.exc_info()[1].__str__()
+ raise BadSyntax(self._thisDoc, startline, str, j,
+ "Unicode error appending characters %s to string, because\n\t%s"
+ % (err, streason))
+
+# print "@@@ i = ",i, " j=",j, "m.end=", m.end()
+
+ ch = str[i]
+ if ch == '"':
+ j = i
+ continue
+ elif ch == "\r": # Strip carriage returns
+ j = i+1
+ continue
+ elif ch == "\n":
+ if delim == '"':
+ raise BadSyntax(self._thisDoc, startline, str, i,
+ "newline found in string literal")
+ self.lines = self.lines + 1
+ ustr = ustr + ch
+ j = i + 1
+ self.startOfLine = j
+
+ elif ch == "\\":
+ j = i + 1
+ ch = str[j:j+1] # Will be empty if string ends
+ if not ch:
+ raise BadSyntax(self._thisDoc, startline, str, i,
+ "unterminated string literal (2)")
+ k = 'abfrtvn\\"'.find(ch)
+ if k >= 0:
+ uch = '\a\b\f\r\t\v\n\\"'[k]
+ ustr = ustr + uch
+ j = j + 1
+ elif ch == "u":
+ j, ch = self.uEscape(str, j+1, startline)
+ ustr = ustr + ch
+ elif ch == "U":
+ j, ch = self.UEscape(str, j+1, startline)
+ ustr = ustr + ch
+ else:
+ raise BadSyntax(self._thisDoc, self.lines, str, i,
+ "bad escape")
+
+ raise BadSyntax(self._thisDoc, self.lines, str, i,
+ "unterminated string literal")
+
+
+ def uEscape(self, str, i, startline):
+ j = i
+ count = 0
+ value = 0
+ while count < 4: # Get 4 more characters
+ ch = str[j:j+1].lower()
+ # sbp http://ilrt.org/discovery/chatlogs/rdfig/2002-07-05
+ j = j + 1
+ if ch == "":
+ raise BadSyntax(self._thisDoc, startline, str, i,
+ "unterminated string literal(3)")
+ k = "0123456789abcdef".find(ch)
+ if k < 0:
+ raise BadSyntax(self._thisDoc, startline, str, i,
+ "bad string literal hex escape")
+ value = value * 16 + k
+ count = count + 1
+ uch = unichr(value)
+ return j, uch
+
+ def UEscape(self, str, i, startline):
+ stringType = type('')
+ j = i
+ count = 0
+ value = '\\U'
+ while count < 8: # Get 8 more characters
+ ch = str[j:j+1].lower()
+ # sbp http://ilrt.org/discovery/chatlogs/rdfig/2002-07-05
+ j = j + 1
+ if ch == "":
+ raise BadSyntax(self._thisDoc, startline, str, i,
+ "unterminated string literal(3)")
+ k = "0123456789abcdef".find(ch)
+ if k < 0:
+ raise BadSyntax(self._thisDoc, startline, str, i,
+ "bad string literal hex escape")
+ value = value + ch
+ count = count + 1
+
+ uch = stringType(value).decode('unicode-escape')
+ return j, uch
+
+wide_build = True
+try:
+ unichr(0x10000)
+except ValueError:
+ wide_build = False
+
+# If we are going to do operators then they should generate
+# [ is operator:plus of ( \1 \2 ) ]
+
+
+class BadSyntax(SyntaxError):
+ def __init__(self, uri, lines, str, i, why):
+ self._str = str.encode('utf-8') # Better go back to strings for errors
+ self._i = i
+ self._why = why
+ self.lines = lines
+ self._uri = uri
+
+ def __str__(self):
+ str = self._str
+ i = self._i
+ st = 0
+ if i>60:
+ pre="..."
+ st = i - 60
+ else: pre=""
+ if len(str)-i > 60: post="..."
+ else: post=""
+
+ return 'at line %i of <%s>:\nBad syntax (%s) at ^ in:\n"%s%s^%s%s"' \
+ % (self.lines +1, self._uri, self._why, pre,
+ str[st:i], str[i:i+60], post)
+
+
+
+def stripCR(str):
+ res = ""
+ for ch in str:
+ if ch != "\r":
+ res = res + ch
+ return res
+
+def dummyWrite(x):
+ pass
+
+################################################################################
+
+
+def toBool(s):
+ if s == 'true' or s == 'True' or s == '1':
+ return True
+ if s == 'false' or s == 'False' or s == '0':
+ return False
+ raise ValueError(s)
+
+
+
+
+
+class Formula(object):
+ number = 0
+
+ def __init__(self, parent):
+ self.counter = 0
+ Formula.number += 1
+ self.number = Formula.number
+ self.existentials = {}
+ self.universals = {}
+
+ self.quotedgraph=QuotedGraph(store=parent.store, identifier=self.id())
+
+ def __str__(self):
+ return '_:Formula%s' % self.number
+
+ def id(self):
+ return BNode('_:Formula%s' % self.number)
+
+ def newBlankNode(self, uri=None, why=None):
+ if uri is None:
+ self.counter += 1
+ b = BNode('f%sb%s' % (id(self), self.counter))
+ else: b = BNode(uri.split('#').pop().replace('_', 'b'))
+ return b
+
+ def newUniversal(self, uri, why=None):
+ return Variable(uri.split('#').pop())
+
+ def declareExistential(self, x):
+ self.existentials[x] = self.newBlankNode()
+
+ def close(self):
+
+ return self.quotedgraph
+
+r_hibyte = re.compile(r'([\x80-\xff])')
+def iri(uri):
+ return uri.decode('utf-8')
+ # return unicode(r_hibyte.sub(lambda m: '%%%02X' % ord(m.group(1)), uri))
+
+class RDFSink(object):
+ def __init__(self, graph):
+ self.rootFormula = None
+ self.counter = 0
+ self.graph=graph
+
+
+ def newFormula(self):
+ assert self.graph.store.formula_aware
+ f = Formula(self.graph)
+ return f
+
+ def newSymbol(self, *args):
+ uri = args[0].encode('utf-8')
+ return URIRef(iri(uri))
+
+ def newBlankNode(self, arg=None, **kargs):
+ if isinstance(arg, Formula):
+ return arg.newBlankNode()
+ elif arg is None:
+ self.counter += 1
+ b = BNode('n' + str(self.counter))
+ else: b = BNode(str(arg[0]).split('#').pop().replace('_', 'b'))
+ return b
+
+ def newLiteral(self, s, dt, lang):
+ if dt: return Literal(s, datatype=dt)
+ else: return Literal(s, lang=lang)
+
+ def newList(self, n, f):
+ if not n:
+ return self.newSymbol(
+ 'http://www.w3.org/1999/02/22-rdf-syntax-ns#nil'
+ )
+
+ a = self.newBlankNode(f)
+ first = self.newSymbol(
+ 'http://www.w3.org/1999/02/22-rdf-syntax-ns#first'
+ )
+ rest = self.newSymbol('http://www.w3.org/1999/02/22-rdf-syntax-ns#rest')
+ self.makeStatement((f, first, a, n[0]))
+ self.makeStatement((f, rest, a, self.newList(n[1:], f)))
+ return a
+
+ def newSet(self, *args):
+ return set(args)
+
+ def setDefaultNamespace(self, *args):
+ return ':'.join(repr(n) for n in args)
+
+ def makeStatement(self, quadruple, why=None):
+ f, p, s, o = quadruple
+
+ if hasattr(p, 'formula'):
+ raise Exception("Formula used as predicate")
+
+ s = self.normalise(f, s)
+ p = self.normalise(f, p)
+ o = self.normalise(f, o)
+
+
+ if f == self.rootFormula:
+ # print s, p, o, '.'
+ self.graph.add((s, p, o))
+ else:
+ f.quotedgraph.add((s,p,o))
+
+
+ #return str(quadruple)
+
+ def normalise(self, f, n):
+ if isinstance(n, tuple):
+ return URIRef(unicode(n[1]))
+
+ # if isinstance(n, list):
+ # rdflist, f = n
+ # name = self.newBlankNode()
+ # if f == self.rootFormula:
+ # sublist = name
+ # for i in xrange(0, len(rdflist) - 1):
+ # print sublist, 'first', rdflist[i]
+ # rest = self.newBlankNode()
+ # print sublist, 'rest', rest
+ # sublist = rest
+ # print sublist, 'first', rdflist[-1]
+ # print sublist, 'rest', 'nil'
+ # return name
+
+ if isinstance(n, bool):
+ s = Literal(str(n).lower(), datatype=BOOLEAN_DATATYPE)
+ return s
+
+ if isinstance(n, int) or isinstance(n, long):
+ s = Literal(unicode(n), datatype=INTEGER_DATATYPE)
+ return s
+
+ if isinstance(n, Decimal):
+ value = str(n.normalize())
+ if value == '-0':
+ value = '0'
+ s = Literal(value, datatype=DECIMAL_DATATYPE )
+ return s
+
+ if isinstance(n, float):
+ s = Literal(str(n), datatype=DOUBLE_DATATYPE )
+ return s
+
+ if f.existentials.has_key(n):
+ return f.existentials[n]
+
+ # if isinstance(n, Var):
+ # if f.universals.has_key(n):
+ # return f.universals[n]
+ # f.universals[n] = f.newBlankNode()
+ # return f.universals[n]
+
+ return n
+
+ def intern(self, something):
+ return something
+
+ def bind(self, pfx, uri):
+ pass # print pfx, ':', uri
+
+ def startDoc(self, formula):
+ self.rootFormula = formula
+
+ def endDoc(self, formula):
+ pass
+
+
+###################################################
+#
+# Utilities
+#
+
+Escapes = {'a': '\a',
+ 'b': '\b',
+ 'f': '\f',
+ 'r': '\r',
+ 't': '\t',
+ 'v': '\v',
+ 'n': '\n',
+ '\\': '\\',
+ '"': '"'}
+
+forbidden1 = re.compile(ur'[\\\"\a\b\f\r\v\u0080-\U0000ffff]')
+forbidden2 = re.compile(ur'[\\\"\a\b\f\r\v\t\n\u0080-\U0000ffff]')
+#"
+def stringToN3(str, singleLine=0, flags=""):
+ res = ''
+ if (len(str) > 20 and
+ str[-1] <> '"' and
+ not singleLine and
+ (str.find("\n") >=0
+ or str.find('"') >=0)):
+ delim= '"""'
+ forbidden = forbidden1 # (allow tabs too now)
+ else:
+ delim = '"'
+ forbidden = forbidden2
+
+ i = 0
+
+ while i < len(str):
+ m = forbidden.search(str, i)
+ if not m:
+ break
+
+ j = m.start()
+ res = res + str[i:j]
+ ch = m.group(0)
+ if ch == '"' and delim == '"""' and str[j:j+3] != '"""': #"
+ res = res + ch
+ else:
+ k = '\a\b\f\r\t\v\n\\"'.find(ch)
+ if k >= 0: res = res + "\\" + 'abfrtvn\\"'[k]
+ else:
+ if 'e' in flags:
+# res = res + ('\\u%04x' % ord(ch))
+ res = res + ('\\u%04X' % ord(ch))
+ # http://www.w3.org/TR/rdf-testcases/#ntriples
+ else:
+ res = res + ch
+ i = j + 1
+
+ # The following code fixes things for really high range Unicode
+ newstr = ""
+ for ch in res + str[i:]:
+ if ord(ch)>65535:
+ newstr = newstr + ('\\U%08X' % ord(ch))
+ # http://www.w3.org/TR/rdf-testcases/#ntriples
+ else:
+ newstr = newstr + ch
+ #
+
+ return delim + newstr + delim
+
+def backslashUify(ustr):
+ """Use URL encoding to return an ASCII string corresponding
+ to the given unicode"""
+# progress("String is "+`ustr`)
+# s1=ustr.encode('utf-8')
+ s = ""
+ for ch in ustr: # .encode('utf-8'):
+ if ord(ch) > 65535:
+ ch = "\\U%08X" % ord(ch)
+ elif ord(ch) > 126:
+ ch = "\\u%04X" % ord(ch)
+ else:
+ ch = "%c" % ord(ch)
+ s = s + ch
+ return b(s)
+
+@py3compat.format_doctest_out
+def hexify(ustr):
+ """Use URL encoding to return an ASCII string
+ corresponding to the given UTF8 string
+
+ >>> hexify("http://example/a b")
+ %(b)s'http://example/a%%20b'
+
+ """ #"
+# progress("String is "+`ustr`)
+# s1=ustr.encode('utf-8')
+ s = ""
+ for ch in ustr: # .encode('utf-8'):
+ if ord(ch) > 126 or ord(ch) < 33 :
+ ch = "%%%02X" % ord(ch)
+ else:
+ ch = "%c" % ord(ch)
+ s = s + ch
+ return b(s)
+
+def dummy():
+ res = ""
+ if len(str) > 20 and (str.find("\n") >=0
+ or str.find('"') >=0):
+ delim= '"""'
+ forbidden = "\\\"\a\b\f\r\v" # (allow tabs too now)
+ else:
+ delim = '"'
+ forbidden = "\\\"\a\b\f\r\v\t\n"
+ for i in range(len(str)):
+ ch = str[i]
+ j = forbidden.find(ch)
+ if ch == '"' and delim == '"""' \
+ and i+1 < len(str) and str[i+1] != '"':
+ j=-1 # Single quotes don't need escaping in long format
+ if j>=0: ch = "\\" + '\\"abfrvtn'[j]
+ elif ch not in "\n\t" and (ch < " " or ch > "}"):
+ ch = "[[" + `ch` + "]]" #[2:-1] # Use python
+ res = res + ch
+ return delim + res + delim
+
+
+class N3Parser(Parser):
+
+ def __init__(self):
+ pass
+
+ def parse(self, source, graph, encoding="utf-8"):
+ # we're currently being handed a Graph, not a ConjunctiveGraph
+ assert graph.store.context_aware # is this implied by formula_aware
+ assert graph.store.formula_aware
+
+ if encoding not in [None, "utf-8"]:
+ raise Exception("N3 files are always utf-8 encoded, I was passed: %s"%encoding)
+
+ conj_graph = ConjunctiveGraph(store=graph.store)
+ conj_graph.default_context = graph # TODO: CG __init__ should have a default_context arg
+ # TODO: update N3Processor so that it can use conj_graph as the sink
+ conj_graph.namespace_manager = graph.namespace_manager
+ sink = RDFSink(conj_graph)
+
+ baseURI = graph.absolutize(source.getPublicId() or source.getSystemId() or "")
+ p = SinkParser(sink, baseURI=baseURI)
+
+ p.loadStream(source.getByteStream())
+
+ for prefix, namespace in p._bindings.items():
+ conj_graph.bind(prefix, namespace)
+
+
+
+
+def _test():
+ import doctest
+ doctest.testmod()
+
+
+# if __name__ == '__main__':
+# _test()
+
+def main():
+ g=ConjunctiveGraph()
+
+ sink = RDFSink(g)
+ base = 'file://' + os.path.join(os.getcwd(), sys.argv[1])
+
+ p = SinkParser(sink, baseURI=base)
+ p._bindings[''] = p._baseURI + '#'
+ p.startDoc()
+
+ f = open(sys.argv[1], 'rb')
+ bytes = f.read()
+ f.close()
+
+ p.feed(bytes)
+ p.endDoc()
+ for t in g.quads((None,None,None)):
+
+ print t
+
+if __name__ == '__main__':
+ main()
+
+#ends
+