Web   ·   Wiki   ·   Activities   ·   Blog   ·   Lists   ·   Chat   ·   Meeting   ·   Bugs   ·   Git   ·   Translate   ·   Archive   ·   People   ·   Donate
summaryrefslogtreecommitdiffstats
path: root/creactistore/_templates/lib/rdflib/plugins/parsers/notation3.py
diff options
context:
space:
mode:
Diffstat (limited to 'creactistore/_templates/lib/rdflib/plugins/parsers/notation3.py')
-rw-r--r--creactistore/_templates/lib/rdflib/plugins/parsers/notation3.py2314
1 files changed, 0 insertions, 2314 deletions
diff --git a/creactistore/_templates/lib/rdflib/plugins/parsers/notation3.py b/creactistore/_templates/lib/rdflib/plugins/parsers/notation3.py
deleted file mode 100644
index ac48340..0000000
--- a/creactistore/_templates/lib/rdflib/plugins/parsers/notation3.py
+++ /dev/null
@@ -1,2314 +0,0 @@
-#!/usr/bin/env python
-u"""
-notation3.py - Standalone Notation3 Parser
-Derived from CWM, the Closed World Machine
-
-Authors of the original suite:
-
-* Dan Connolly <@@>
-* Tim Berners-Lee <@@>
-* Yosi Scharf <@@>
-* Joseph M. Reagle Jr. <reagle@w3.org>
-* Rich Salz <rsalz@zolera.com>
-
-http://www.w3.org/2000/10/swap/notation3.py
-
-Copyright 2000-2007, World Wide Web Consortium.
-Copyright 2001, MIT.
-Copyright 2001, Zolera Systems Inc.
-
-License: W3C Software License
-http://www.w3.org/Consortium/Legal/copyright-software
-
-Modified by Sean B. Palmer
-Copyright 2007, Sean B. Palmer. \u32E1
-
-Modified to work with rdflib by Gunnar Aastrand Grimnes
-Copyright 2010, Gunnar A. Grimnes
-
-"""
-
-# Python standard libraries
-import types
-import sys
-import os
-import string
-import re
-import time
-import StringIO
-import codecs
-
-from binascii import a2b_hex
-from decimal import Decimal
-
-from rdflib.term import URIRef, BNode, Literal, Variable, _XSD_PFX, _unique_id
-from rdflib.graph import QuotedGraph, ConjunctiveGraph
-from rdflib import py3compat
-b = py3compat.b
-
-__all__ = ['URISyntaxError', 'BadSyntax', 'N3Parser', "verbosity", "setVerbosity", "progress", "splitFrag", "splitFragP", "join", "refTo", "base", "canonical", "runNamespace", "uniqueURI", "Canonicalize", "stripCR", "dummyWrite", "toBool", "stringToN3", "backslashUify", "hexify", "dummy"]
-
-from rdflib.parser import Parser
-
-# Incestuous.. would be nice to separate N3 and XML
-# from sax2rdf import XMLtoDOM
-def XMLtoDOM(*args, **kargs):
- # print >> sys.stderr, args, kargs
- pass
-
-# SWAP http://www.w3.org/2000/10/swap
-# from diag import verbosity, setVerbosity, progress
-def verbosity(*args, **kargs):
- # print >> sys.stderr, args, kargs
- pass
-def setVerbosity(*args, **kargs):
- # print >> sys.stderr, args, kargs
- pass
-def progress(*args, **kargs):
- # print >> sys.stderr, args, kargs
- pass
-
-
-
-def splitFrag(uriref):
- """split a URI reference between the fragment and the rest.
-
- Punctuation is thrown away.
-
- e.g.
-
- >>> splitFrag("abc#def")
- ('abc', 'def')
-
- >>> splitFrag("abcdef")
- ('abcdef', None)
-
- """
-
- i = uriref.rfind("#")
- if i>= 0: return uriref[:i], uriref[i+1:]
- else: return uriref, None
-
-def splitFragP(uriref, punct=0):
- """split a URI reference before the fragment
-
- Punctuation is kept.
-
- e.g.
-
- >>> splitFragP("abc#def")
- ('abc', '#def')
-
- >>> splitFragP("abcdef")
- ('abcdef', '')
-
- """
-
- i = uriref.rfind("#")
- if i>= 0: return uriref[:i], uriref[i:]
- else: return uriref, ''
-
-@py3compat.format_doctest_out
-def join(here, there):
- """join an absolute URI and URI reference
- (non-ascii characters are supported/doctested;
- haven't checked the details of the IRI spec though)
-
- here is assumed to be absolute.
- there is URI reference.
-
- >>> join('http://example/x/y/z', '../abc')
- 'http://example/x/abc'
-
- Raise ValueError if there uses relative path
- syntax but here has no hierarchical path.
-
- >>> join('mid:foo@example', '../foo')
- Traceback (most recent call last):
- raise ValueError, here
- ValueError: Base <mid:foo@example> has no slash after colon - with relative '../foo'.
-
- >>> join('http://example/x/y/z', '')
- 'http://example/x/y/z'
-
- >>> join('mid:foo@example', '#foo')
- 'mid:foo@example#foo'
-
- We grok IRIs
-
- >>> len(u'Andr\\xe9')
- 5
-
- >>> join('http://example.org/', u'#Andr\\xe9')
- %(u)s'http://example.org/#Andr\\xe9'
- """
-
- assert(here.find("#") < 0), "Base may not contain hash: '%s'"% here # caller must splitFrag (why?)
-
- slashl = there.find('/')
- colonl = there.find(':')
-
- # join(base, 'foo:/') -- absolute
- if colonl >= 0 and (slashl < 0 or colonl < slashl):
- return there
-
- bcolonl = here.find(':')
- assert(bcolonl >= 0), "Base uri '%s' is not absolute" % here # else it's not absolute
-
- path, frag = splitFragP(there)
- if not path: return here + frag
-
- # join('mid:foo@example', '../foo') bzzt
- if here[bcolonl+1:bcolonl+2] <> '/':
- raise ValueError ("Base <%s> has no slash after colon - with relative '%s'." %(here, there))
-
- if here[bcolonl+1:bcolonl+3] == '//':
- bpath = here.find('/', bcolonl+3)
- else:
- bpath = bcolonl+1
-
- # join('http://xyz', 'foo')
- if bpath < 0:
- bpath = len(here)
- here = here + '/'
-
- # join('http://xyz/', '//abc') => 'http://abc'
- if there[:2] == '//':
- return here[:bcolonl+1] + there
-
- # join('http://xyz/', '/abc') => 'http://xyz/abc'
- if there[:1] == '/':
- return here[:bpath] + there
-
- slashr = here.rfind('/')
-
- while 1:
- if path[:2] == './':
- path = path[2:]
- if path == '.':
- path = ''
- elif path[:3] == '../' or path == '..':
- path = path[3:]
- i = here.rfind('/', bpath, slashr)
- if i >= 0:
- here = here[:i+1]
- slashr = i
- else:
- break
-
- return here[:slashr+1] + path + frag
-
-commonHost = re.compile(r'^[-_a-zA-Z0-9.]+:(//[^/]*)?/[^/]*$')
-
-def refTo(base, uri):
- """figure out a relative URI reference from base to uri
-
- >>> refTo('http://example/x/y/z', 'http://example/x/abc')
- '../abc'
-
- >>> refTo('file:/ex/x/y', 'file:/ex/x/q/r#s')
- 'q/r#s'
-
- >>> refTo(None, 'http://ex/x/y')
- 'http://ex/x/y'
-
- >>> refTo('http://ex/x/y', 'http://ex/x/y')
- ''
-
- Note the relationship between refTo and join:
- join(x, refTo(x, y)) == y
- which points out certain strings which cannot be URIs. e.g.
- >>> x='http://ex/x/y';y='http://ex/x/q:r';join(x, refTo(x, y)) == y
- 0
-
- So 'http://ex/x/q:r' is not a URI. Use 'http://ex/x/q%3ar' instead:
- >>> x='http://ex/x/y';y='http://ex/x/q%3ar';join(x, refTo(x, y)) == y
- 1
-
- This one checks that it uses a root-realtive one where that is
- all they share. Now uses root-relative where no path is shared.
- This is a matter of taste but tends to give more resilience IMHO
- -- and shorter paths
-
- Note that base may be None, meaning no base. In some situations, there
- just ain't a base. Slife. In these cases, relTo returns the absolute value.
- The axiom abs(,rel(b,x))=x still holds.
- This saves people having to set the base to "bogus:".
-
- >>> refTo('http://ex/x/y/z', 'http://ex/r')
- '/r'
-
- """
-
-# assert base # don't mask bugs -danc # not a bug. -tim
- if not base: return uri
- if base == uri: return ""
-
- # Find how many path segments in common
- i=0
- while i<len(uri) and i<len(base):
- if uri[i] == base[i]: i = i + 1
- else: break
- # print "# relative", base, uri, " same up to ", i
- # i point to end of shortest one or first difference
-
- m = commonHost.match(base[:i])
- if m:
- k=uri.find("//")
- if k<0: k=-2 # no host
- l=uri.find("/", k+2)
- if uri[l+1:l+2] != "/" and base[l+1:l+2] != "/" and uri[:l]==base[:l]:
- return uri[l:]
-
- if uri[i:i+1] =="#" and len(base) == i: return uri[i:] # fragment of base
-
- while i>0 and uri[i-1] != '/' : i=i-1 # scan for slash
-
- if i < 3: return uri # No way.
- if base.find("//", i-2)>0 \
- or uri.find("//", i-2)>0: return uri # An unshared "//"
- if base.find(":", i)>0: return uri # An unshared ":"
- n = base.count("/", i)
- if n == 0 and i<len(uri) and uri[i] == '#':
- return "./" + uri[i:]
- elif n == 0 and i == len(uri):
- return "./"
- else:
- return ("../" * n) + uri[i:]
-
-
-def base():
- """The base URI for this process - the Web equiv of cwd
-
- Relative or abolute unix-standard filenames parsed relative to
- this yeild the URI of the file.
- If we had a reliable way of getting a computer name,
- we should put it in the hostname just to prevent ambiguity
-
- """
-# return "file://" + hostname + os.getcwd() + "/"
- return "file://" + _fixslash(os.getcwd()) + "/"
-
-
-def _fixslash(str):
- """ Fix windowslike filename to unixlike - (#ifdef WINDOWS)"""
- s = str
- for i in range(len(s)):
- if s[i] == "\\": s = s[:i] + "/" + s[i+1:]
- if s[0] != "/" and s[1] == ":": s = s[2:] # @@@ Hack when drive letter present
- return s
-
-URI_unreserved = b("ABCDEFGHIJJLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-._~")
- # unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~"
-
-@py3compat.format_doctest_out
-def canonical(str_in):
- """Convert equivalent URIs (or parts) to the same string
-
- There are many differenet levels of URI canonicalization
- which are possible. See http://www.ietf.org/rfc/rfc3986.txt
- Done:
- - Converfting unicode IRI to utf-8
- - Escaping all non-ASCII
- - De-escaping, if escaped, ALPHA (%%41-%%5A and %%61-%%7A), DIGIT (%%30-%%39),
- hyphen (%%2D), period (%%2E), underscore (%%5F), or tilde (%%7E) (Sect 2.4)
- - Making all escapes uppercase hexadecimal
-
- Not done:
- - Making URI scheme lowercase
- - changing /./ or /foo/../ to / with care not to change host part
-
-
- >>> canonical("foo bar")
- %(b)s'foo%%20bar'
-
- >>> canonical(u'http:')
- %(b)s'http:'
-
- >>> canonical('fran%%c3%%83%%c2%%a7ois')
- %(b)s'fran%%C3%%83%%C2%%A7ois'
-
- >>> canonical('a')
- %(b)s'a'
-
- >>> canonical('%%4e')
- %(b)s'N'
-
- >>> canonical('%%9d')
- %(b)s'%%9D'
-
- >>> canonical('%%2f')
- %(b)s'%%2F'
-
- >>> canonical('%%2F')
- %(b)s'%%2F'
-
- """
- if type(str_in) == type(u''):
- s8 = str_in.encode('utf-8')
- else:
- s8 = str_in
- s = b('')
- i = 0
- while i < len(s8):
- if py3compat.PY3:
- n = s8[i]; ch = bytes([n])
- else:
- ch = s8[i]; n = ord(ch)
- if (n > 126) or (n < 33) : # %-encode controls, SP, DEL, and utf-8
- s += b("%%%02X" % ord(ch))
- elif ch == b('%') and i+2 < len(s8):
- ch2 = a2b_hex(s8[i+1:i+3])
- if ch2 in URI_unreserved: s += ch2
- else: s += b("%%%02X" % ord(ch2))
- i = i+3
- continue
- else:
- s += ch
- i = i +1
- return s
-
-
-
-
-
-
-CONTEXT = 0
-PRED = 1
-SUBJ = 2
-OBJ = 3
-
-PARTS = PRED, SUBJ, OBJ
-ALL4 = CONTEXT, PRED, SUBJ, OBJ
-
-SYMBOL = 0
-FORMULA = 1
-LITERAL = 2
-LITERAL_DT = 21
-LITERAL_LANG = 22
-ANONYMOUS = 3
-XMLLITERAL = 25
-
-Logic_NS = "http://www.w3.org/2000/10/swap/log#"
-NODE_MERGE_URI = Logic_NS + "is" # Pseudo-property indicating node merging
-forSomeSym = Logic_NS + "forSome"
-forAllSym = Logic_NS + "forAll"
-
-RDF_type_URI = "http://www.w3.org/1999/02/22-rdf-syntax-ns#type"
-RDF_NS_URI = "http://www.w3.org/1999/02/22-rdf-syntax-ns#"
-OWL_NS = "http://www.w3.org/2002/07/owl#"
-DAML_sameAs_URI = OWL_NS+"sameAs"
-parsesTo_URI = Logic_NS + "parsesTo"
-RDF_spec = "http://www.w3.org/TR/REC-rdf-syntax/"
-
-List_NS = RDF_NS_URI # From 20030808
-_Old_Logic_NS = "http://www.w3.org/2000/10/swap/log.n3#"
-
-N3_first = (SYMBOL, List_NS + "first")
-N3_rest = (SYMBOL, List_NS + "rest")
-N3_li = (SYMBOL, List_NS + "li")
-N3_nil = (SYMBOL, List_NS + "nil")
-N3_List = (SYMBOL, List_NS + "List")
-N3_Empty = (SYMBOL, List_NS + "Empty")
-
-
-
-runNamespaceValue = None
-
-def runNamespace():
- "Return a URI suitable as a namespace for run-local objects"
- # @@@ include hostname (privacy?) (hash it?)
- global runNamespaceValue
- if runNamespaceValue == None:
- runNamespaceValue = join(base(), _unique_id()) + '#'
- return runNamespaceValue
-
-nextu = 0
-def uniqueURI():
- "A unique URI"
- global nextu
- nextu += 1
- return runNamespace() + "u_" + `nextu`
-
-class URISyntaxError(ValueError):
- """A parameter is passed to a routine that requires a URI reference"""
- pass
-
-
-tracking = False
-chatty_flag = 50
-
-
-from xml.dom import Node
-try:
- from xml.ns import XMLNS
-except:
- class XMLNS:
- BASE = "http://www.w3.org/2000/xmlns/"
- XML = "http://www.w3.org/XML/1998/namespace"
-
-
-_attrs = lambda E: (E.attributes and E.attributes.values()) or []
-_children = lambda E: E.childNodes or []
-_IN_XML_NS = lambda n: n.namespaceURI == XMLNS.XML
-_inclusive = lambda n: n.unsuppressedPrefixes == None
-
-# Does a document/PI has lesser/greater document order than the
-# first element?
-_LesserElement, _Element, _GreaterElement = range(3)
-
-def _sorter(n1,n2):
- '''_sorter(n1,n2) -> int
- Sorting predicate for non-NS attributes.'''
-
- i = cmp(n1.namespaceURI, n2.namespaceURI)
- if i: return i
- return cmp(n1.localName, n2.localName)
-
-
-def _sorter_ns(n1,n2):
- '''_sorter_ns((n,v),(n,v)) -> int
- "(an empty namespace URI is lexicographically least)."'''
-
- if n1[0] == 'xmlns': return -1
- if n2[0] == 'xmlns': return 1
- return cmp(n1[0], n2[0])
-
-def _utilized(n, node, other_attrs, unsuppressedPrefixes):
- '''_utilized(n, node, other_attrs, unsuppressedPrefixes) -> boolean
- Return true if that nodespace is utilized within the node'''
-
- if n.startswith('xmlns:'):
- n = n[6:]
- elif n.startswith('xmlns'):
- n = n[5:]
- if (n=="" and node.prefix in ["#default", None]) or \
- n == node.prefix or n in unsuppressedPrefixes:
- return 1
- for attr in other_attrs:
- if n == attr.prefix: return 1
- return 0
-
-#_in_subset = lambda subset, node: not subset or node in subset
-_in_subset = lambda subset, node: subset is None or node in subset # rich's tweak
-
-class _implementation:
- '''Implementation class for C14N. This accompanies a node during it's
- processing and includes the parameters and processing state.'''
-
- # Handler for each node type; populated during module instantiation.
- handlers = {}
-
- def __init__(self, node, write, **kw):
- '''Create and run the implementation.'''
- self.write = write
- self.subset = kw.get('subset')
- self.comments = kw.get('comments', 0)
- self.unsuppressedPrefixes = kw.get('unsuppressedPrefixes')
- nsdict = kw.get('nsdict', { 'xml': XMLNS.XML, 'xmlns': XMLNS.BASE })
-
- # Processing state.
- self.state = (nsdict, {'xml':''}, {}) #0422
-
- if node.nodeType == Node.DOCUMENT_NODE:
- self._do_document(node)
- elif node.nodeType == Node.ELEMENT_NODE:
- self.documentOrder = _Element # At document element
- if not _inclusive(self):
- self._do_element(node)
- else:
- inherited = self._inherit_context(node)
- self._do_element(node, inherited)
- elif node.nodeType == Node.DOCUMENT_TYPE_NODE:
- pass
- elif node.nodeType == Node.TEXT_NODE:
- self._do_text(node)
- else:
- raise TypeError, str(node)
-
-
- def _inherit_context(self, node):
- '''_inherit_context(self, node) -> list
- Scan ancestors of attribute and namespace context. Used only
- for single element node canonicalization, not for subset
- canonicalization.'''
-
- # Collect the initial list of xml:foo attributes.
- xmlattrs = filter(_IN_XML_NS, _attrs(node))
-
- # Walk up and get all xml:XXX attributes we inherit.
- inherited, parent = [], node.parentNode
- while parent and parent.nodeType == Node.ELEMENT_NODE:
- for a in filter(_IN_XML_NS, _attrs(parent)):
- n = a.localName
- if n not in xmlattrs:
- xmlattrs.append(n)
- inherited.append(a)
- parent = parent.parentNode
- return inherited
-
-
- def _do_document(self, node):
- '''_do_document(self, node) -> None
- Process a document node. documentOrder holds whether the document
- element has been encountered such that PIs/comments can be written
- as specified.'''
-
- self.documentOrder = _LesserElement
- for child in node.childNodes:
- if child.nodeType == Node.ELEMENT_NODE:
- self.documentOrder = _Element # At document element
- self._do_element(child)
- self.documentOrder = _GreaterElement # After document element
- elif child.nodeType == Node.PROCESSING_INSTRUCTION_NODE:
- self._do_pi(child)
- elif child.nodeType == Node.COMMENT_NODE:
- self._do_comment(child)
- elif child.nodeType == Node.DOCUMENT_TYPE_NODE:
- pass
- else:
- raise TypeError, str(child)
- handlers[Node.DOCUMENT_NODE] = _do_document
-
-
- def _do_text(self, node):
- '''_do_text(self, node) -> None
- Process a text or CDATA node. Render various special characters
- as their C14N entity representations.'''
- if not _in_subset(self.subset, node): return
- s = node.data.replace("&", "&amp;")
- s = s.replace("<", "&lt;")
- s = s.replace(">", "&gt;")
- s = s.replace("\015", "&#xD;")
- if s: self.write(s)
- handlers[Node.TEXT_NODE] = _do_text
- handlers[Node.CDATA_SECTION_NODE] = _do_text
-
-
- def _do_pi(self, node):
- '''_do_pi(self, node) -> None
- Process a PI node. Render a leading or trailing #xA if the
- document order of the PI is greater or lesser (respectively)
- than the document element.
- '''
- if not _in_subset(self.subset, node): return
- W = self.write
- if self.documentOrder == _GreaterElement: W('\n')
- W('<?')
- W(node.nodeName)
- s = node.data
- if s:
- W(' ')
- W(s)
- W('?>')
- if self.documentOrder == _LesserElement: W('\n')
- handlers[Node.PROCESSING_INSTRUCTION_NODE] = _do_pi
-
-
- def _do_comment(self, node):
- '''_do_comment(self, node) -> None
- Process a comment node. Render a leading or trailing #xA if the
- document order of the comment is greater or lesser (respectively)
- than the document element.
- '''
- if not _in_subset(self.subset, node): return
- if self.comments:
- W = self.write
- if self.documentOrder == _GreaterElement: W('\n')
- W('<!--')
- W(node.data)
- W('-->')
- if self.documentOrder == _LesserElement: W('\n')
- handlers[Node.COMMENT_NODE] = _do_comment
-
-
- def _do_attr(self, n, value):
- ''''_do_attr(self, node) -> None
- Process an attribute.'''
-
- W = self.write
- W(' ')
- W(n)
- W('="')
- s = value.replace(value, "&", "&amp;")
- s = s.replace("<", "&lt;")
- s = s.replace('"', '&quot;')
- s = s.replace('\011', '&#x9')
- s = s.replace('\012', '&#xA')
- s = s.replace('\015', '&#xD')
- W(s)
- W('"')
-
-
- def _do_element(self, node, initial_other_attrs = []):
- '''_do_element(self, node, initial_other_attrs = []) -> None
- Process an element (and its children).'''
-
- # Get state (from the stack) make local copies.
- # ns_parent -- NS declarations in parent
- # ns_rendered -- NS nodes rendered by ancestors
- # ns_local -- NS declarations relevant to this element
- # xml_attrs -- Attributes in XML namespace from parent
- # xml_attrs_local -- Local attributes in XML namespace.
- ns_parent, ns_rendered, xml_attrs = \
- self.state[0], self.state[1].copy(), self.state[2].copy() #0422
- ns_local = ns_parent.copy()
- xml_attrs_local = {}
-
- # progress("_do_element node.nodeName=", node.nodeName)
- # progress("_do_element node.namespaceURI", node.namespaceURI)
- # progress("_do_element node.tocml()", node.toxml())
- # Divide attributes into NS, XML, and others.
- other_attrs = initial_other_attrs[:]
- in_subset = _in_subset(self.subset, node)
- for a in _attrs(node):
- # progress("\t_do_element a.nodeName=", a.nodeName)
- if a.namespaceURI == XMLNS.BASE:
- n = a.nodeName
- if n == "xmlns:": n = "xmlns" # DOM bug workaround
- ns_local[n] = a.nodeValue
- elif a.namespaceURI == XMLNS.XML:
- if _inclusive(self) or in_subset:
- xml_attrs_local[a.nodeName] = a #0426
- else:
- other_attrs.append(a)
- #add local xml:foo attributes to ancestor's xml:foo attributes
- xml_attrs.update(xml_attrs_local)
-
- # Render the node
- W, name = self.write, None
- if in_subset:
- name = node.nodeName
- W('<')
- W(name)
-
- # Create list of NS attributes to render.
- ns_to_render = []
- for n,v in ns_local.items():
-
- # If default namespace is XMLNS.BASE or empty,
- # and if an ancestor was the same
- if n == "xmlns" and v in [ XMLNS.BASE, '' ] \
- and ns_rendered.get('xmlns') in [ XMLNS.BASE, '', None ]:
- continue
-
- # "omit namespace node with local name xml, which defines
- # the xml prefix, if its string value is
- # http://www.w3.org/XML/1998/namespace."
- if n in ["xmlns:xml", "xml"] \
- and v in [ 'http://www.w3.org/XML/1998/namespace' ]:
- continue
-
-
- # If not previously rendered
- # and it's inclusive or utilized
- if (n,v) not in ns_rendered.items() \
- and (_inclusive(self) or \
- _utilized(n, node, other_attrs, self.unsuppressedPrefixes)):
- ns_to_render.append((n, v))
-
- # Sort and render the ns, marking what was rendered.
- ns_to_render.sort(_sorter_ns)
- for n,v in ns_to_render:
- self._do_attr(n, v)
- ns_rendered[n]=v #0417
-
- # If exclusive or the parent is in the subset, add the local xml attributes
- # Else, add all local and ancestor xml attributes
- # Sort and render the attributes.
- if not _inclusive(self) or _in_subset(self.subset,node.parentNode): #0426
- other_attrs.extend(xml_attrs_local.values())
- else:
- other_attrs.extend(xml_attrs.values())
- other_attrs.sort(_sorter)
- for a in other_attrs:
- self._do_attr(a.nodeName, a.value)
- W('>')
-
- # Push state, recurse, pop state.
- state, self.state = self.state, (ns_local, ns_rendered, xml_attrs)
- for c in _children(node):
- _implementation.handlers[c.nodeType](self, c)
- self.state = state
-
- if name: W('</%s>' % name)
- handlers[Node.ELEMENT_NODE] = _do_element
-
-
-def Canonicalize(node, output=None, **kw):
- '''Canonicalize(node, output=None, **kw) -> UTF-8
-
- Canonicalize a DOM document/element node and all descendents.
- Return the text; if output is specified then output.write will
- be called to output the text and None will be returned
- Keyword parameters:
- nsdict -- a dictionary of prefix:uri namespace entries
- assumed to exist in the surrounding context
- comments -- keep comments if non-zero (default is 0)
- subset -- Canonical XML subsetting resulting from XPath (default is [])
- unsuppressedPrefixes -- do exclusive C14N, and this specifies the
- prefixes that should be inherited.
- '''
- if output:
- apply(_implementation, (node, output.write), kw)
- else:
- s = StringIO.StringIO()
- apply(_implementation, (node, s.write), kw)
- return s.getvalue()
-
-# end of xmlC14n.py
-
-# from why import BecauseOfData, becauseSubexpression
-def BecauseOfData(*args, **kargs):
- # print args, kargs
- pass
-def becauseSubexpression(*args, **kargs):
- # print args, kargs
- pass
-
-N3_forSome_URI = forSomeSym
-N3_forAll_URI = forAllSym
-
-# Magic resources we know about
-
-
-
-ADDED_HASH = "#" # Stop where we use this in case we want to remove it!
-# This is the hash on namespace URIs
-
-RDF_type = ( SYMBOL , RDF_type_URI )
-DAML_sameAs = ( SYMBOL, DAML_sameAs_URI )
-
-LOG_implies_URI = "http://www.w3.org/2000/10/swap/log#implies"
-
-BOOLEAN_DATATYPE = _XSD_PFX + "boolean"
-DECIMAL_DATATYPE = _XSD_PFX + "decimal"
-DOUBLE_DATATYPE = _XSD_PFX + "double"
-FLOAT_DATATYPE = _XSD_PFX + "float"
-INTEGER_DATATYPE = _XSD_PFX + "integer"
-
-option_noregen = 0 # If set, do not regenerate genids on output
-
-# @@ I18n - the notname chars need extending for well known unicode non-text
-# characters. The XML spec switched to assuming unknown things were name
-# characaters.
-# _namechars = string.lowercase + string.uppercase + string.digits + '_-'
-_notQNameChars = "\t\r\n !\"#$%&'()*.,+/;<=>?@[\\]^`{|}~" # else valid qname :-/
-_notNameChars = _notQNameChars + ":" # Assume anything else valid name :-/
-_rdfns = 'http://www.w3.org/1999/02/22-rdf-syntax-ns#'
-
-
-N3CommentCharacter = "#" # For unix script #! compatabilty
-
-########################################## Parse string to sink
-#
-# Regular expressions:
-eol = re.compile(r'[ \t]*(#[^\n]*)?\r?\n') # end of line, poss. w/comment
-eof = re.compile(r'[ \t]*(#[^\n]*)?$') # end of file, poss. w/comment
-ws = re.compile(r'[ \t]*') # Whitespace not including NL
-signed_integer = re.compile(r'[-+]?[0-9]+') # integer
-number_syntax = re.compile(r'(?P<integer>[-+]?[0-9]+)(?P<decimal>\.[0-9]+)?(?P<exponent>e[-+]?[0-9]+)?')
-digitstring = re.compile(r'[0-9]+') # Unsigned integer
-interesting = re.compile(r'[\\\r\n\"]')
-langcode = re.compile(r'[a-zA-Z0-9]+(-[a-zA-Z0-9]+)?')
-#"
-
-
-
-class SinkParser:
- def __init__(self, store, openFormula=None, thisDoc="", baseURI=None,
- genPrefix = "", flags="",
- why=None):
- """ note: namespace names should *not* end in #;
- the # will get added during qname processing """
-
- self._bindings = {}
- self._flags = flags
- if thisDoc != "":
- assert ':' in thisDoc, "Document URI not absolute: <%s>" % thisDoc
- self._bindings[""] = thisDoc + "#" # default
-
- self._store = store
- if genPrefix: store.setGenPrefix(genPrefix) # pass it on
-
- self._thisDoc = thisDoc
- self.lines = 0 # for error handling
- self.startOfLine = 0 # For calculating character number
- self._genPrefix = genPrefix
- self.keywords = ['a', 'this', 'bind', 'has', 'is', 'of', 'true', 'false' ]
- self.keywordsSet = 0 # Then only can others be considerd qnames
- self._anonymousNodes = {} # Dict of anon nodes already declared ln: Term
- self._variables = {}
- self._parentVariables = {}
- self._reason = why # Why the parser was asked to parse this
-
- self._reason2 = None # Why these triples
- # was: diag.tracking
- if tracking: self._reason2 = BecauseOfData(
- store.newSymbol(thisDoc), because=self._reason)
-
- if baseURI: self._baseURI = baseURI
- else:
- if thisDoc:
- self._baseURI = thisDoc
- else:
- self._baseURI = None
-
- assert not self._baseURI or ':' in self._baseURI
-
- if not self._genPrefix:
- if self._thisDoc: self._genPrefix = self._thisDoc + "#_g"
- else: self._genPrefix = uniqueURI()
-
- if openFormula ==None:
- if self._thisDoc:
- self._formula = store.newFormula(thisDoc + "#_formula")
- else:
- self._formula = store.newFormula()
- else:
- self._formula = openFormula
-
-
- self._context = self._formula
- self._parentContext = None
-
-
- def here(self, i):
- """String generated from position in file
-
- This is for repeatability when refering people to bnodes in a document.
- This has diagnostic uses less formally, as it should point one to which
- bnode the arbitrary identifier actually is. It gives the
- line and character number of the '[' charcacter or path character
- which introduced the blank node. The first blank node is boringly _L1C1.
- It used to be used only for tracking, but for tests in general
- it makes the canonical ordering of bnodes repeatable."""
-
- return "%s_L%iC%i" % (self._genPrefix , self.lines,
- i - self.startOfLine + 1)
-
- def formula(self):
- return self._formula
-
- def loadStream(self, stream):
- return self.loadBuf(stream.read()) # Not ideal
-
- def loadBuf(self, buf):
- """Parses a buffer and returns its top level formula"""
- self.startDoc()
-
- self.feed(buf)
- return self.endDoc() # self._formula
-
-
- def feed(self, octets):
- """Feed an octet stream tothe parser
-
- if BadSyntax is raised, the string
- passed in the exception object is the
- remainder after any statements have been parsed.
- So if there is more data to feed to the
- parser, it should be straightforward to recover."""
-
- if not isinstance(octets, unicode):
- s = octets.decode('utf-8')
- # NB already decoded, so \ufeff
- if len(s) > 0 and s[0] == codecs.BOM_UTF8.decode('utf-8'):
- s = s[1:]
- else:
- s=octets
-
- i = 0
- while i >= 0:
- j = self.skipSpace(s, i)
- if j<0: return
-
- i = self.directiveOrStatement(s,j)
- if i<0:
- print "# next char: ", `s[j]`
- raise BadSyntax(self._thisDoc, self.lines, s, j,
- "expected directive or statement")
-
- def directiveOrStatement(self, str,h):
-
- i = self.skipSpace(str, h)
- if i<0: return i # EOF
-
- j = self.directive(str, i)
- if j>=0: return self.checkDot(str,j)
-
- j = self.statement(str, i)
- if j>=0: return self.checkDot(str,j)
-
- return j
-
-
- #@@I18N
- global _notNameChars
- #_namechars = string.lowercase + string.uppercase + string.digits + '_-'
-
- def tok(self, tok, str, i):
- """Check for keyword. Space must have been stripped on entry and
- we must not be at end of file."""
-
- assert tok[0] not in _notNameChars # not for punctuation
- if str[i:i+1] == "@":
- i = i+1
- else:
- if tok not in self.keywords:
- return -1 # No, this has neither keywords declaration nor "@"
-
- if (str[i:i+len(tok)] == tok
- and (str[i+len(tok)] in _notQNameChars )):
- i = i + len(tok)
- return i
- else:
- return -1
-
- def directive(self, str, i):
- j = self.skipSpace(str, i)
- if j<0: return j # eof
- res = []
-
- j = self.tok('bind', str, i) # implied "#". Obsolete.
- if j>0: raise BadSyntax(self._thisDoc, self.lines, str, i,
- "keyword bind is obsolete: use @prefix")
-
- j = self.tok('keywords', str, i)
- if j>0:
- i = self.commaSeparatedList(str, j, res, self.bareWord)
- if i < 0:
- raise BadSyntax(self._thisDoc, self.lines, str, i,
- "'@keywords' needs comma separated list of words")
- self.setKeywords(res[:])
- # was: diag.chatty_flag
- if chatty_flag > 80: progress("Keywords ", self.keywords)
- return i
-
-
- j = self.tok('forAll', str, i)
- if j > 0:
- i = self.commaSeparatedList(str, j, res, self.uri_ref2)
- if i <0: raise BadSyntax(self._thisDoc, self.lines, str, i,
- "Bad variable list after @forAll")
- for x in res:
- #self._context.declareUniversal(x)
- if x not in self._variables or x in self._parentVariables:
- self._variables[x] = self._context.newUniversal(x)
- return i
-
- j = self.tok('forSome', str, i)
- if j > 0:
- i = self. commaSeparatedList(str, j, res, self.uri_ref2)
- if i <0: raise BadSyntax(self._thisDoc, self.lines, str, i,
- "Bad variable list after @forSome")
- for x in res:
- self._context.declareExistential(x)
- return i
-
-
- j=self.tok('prefix', str, i) # no implied "#"
- if j>=0:
- t = []
- i = self.qname(str, j, t)
- if i<0: raise BadSyntax(self._thisDoc, self.lines, str, j,
- "expected qname after @prefix")
- j = self.uri_ref2(str, i, t)
- if j<0: raise BadSyntax(self._thisDoc, self.lines, str, i,
- "expected <uriref> after @prefix _qname_")
- ns = self.uriOf(t[1])
-
- if self._baseURI:
- ns = join(self._baseURI, ns)
- elif ":" not in ns:
- raise BadSyntax(self._thisDoc, self.lines, str, j,
- "With no base URI, cannot use relative URI in @prefix <"+ns+">")
- assert ':' in ns # must be absolute
- self._bindings[t[0][0]] = ns
- self.bind(t[0][0], hexify(ns))
- return j
-
- j=self.tok('base', str, i) # Added 2007/7/7
- if j >= 0:
- t = []
- i = self.uri_ref2(str, j, t)
- if i<0: raise BadSyntax(self._thisDoc, self.lines, str, j,
- "expected <uri> after @base ")
- ns = self.uriOf(t[0])
-
- if self._baseURI:
- ns = join(self._baseURI, ns)
- else:
- raise BadSyntax(self._thisDoc, self.lines, str, j,
- "With no previous base URI, cannot use relative URI in @base <"+ns+">")
- assert ':' in ns # must be absolute
- self._baseURI = ns
- return i
-
- return -1 # Not a directive, could be something else.
-
- def bind(self, qn, uri):
- assert isinstance(uri,
- types.StringType), "Any unicode must be %x-encoded already"
- if qn == "":
- self._store.setDefaultNamespace(uri)
- else:
- self._store.bind(qn, uri)
-
- def setKeywords(self, k):
- "Takes a list of strings"
- if k == None:
- self.keywordsSet = 0
- else:
- self.keywords = k
- self.keywordsSet = 1
-
-
- def startDoc(self):
- # was: self._store.startDoc()
- self._store.startDoc(self._formula)
-
- def endDoc(self):
- """Signal end of document and stop parsing. returns formula"""
- self._store.endDoc(self._formula) # don't canonicalize yet
- return self._formula
-
- def makeStatement(self, quadruple):
- #$$$$$$$$$$$$$$$$$$$$$
-# print "# Parser output: ", `quadruple`
- self._store.makeStatement(quadruple, why=self._reason2)
-
-
-
- def statement(self, str, i):
- r = []
-
- i = self.object(str, i, r) # Allow literal for subject - extends RDF
- if i<0: return i
-
- j = self.property_list(str, i, r[0])
-
- if j<0: raise BadSyntax(self._thisDoc, self.lines,
- str, i, "expected propertylist")
- return j
-
- def subject(self, str, i, res):
- return self.item(str, i, res)
-
- def verb(self, str, i, res):
- """ has _prop_
- is _prop_ of
- a
- =
- _prop_
- >- prop ->
- <- prop -<
- _operator_"""
-
- j = self.skipSpace(str, i)
- if j<0:return j # eof
-
- r = []
-
- j = self.tok('has', str, i)
- if j>=0:
- i = self.prop(str, j, r)
- if i < 0: raise BadSyntax(self._thisDoc, self.lines,
- str, j, "expected property after 'has'")
- res.append(('->', r[0]))
- return i
-
- j = self.tok('is', str, i)
- if j>=0:
- i = self.prop(str, j, r)
- if i < 0: raise BadSyntax(self._thisDoc, self.lines, str, j,
- "expected <property> after 'is'")
- j = self.skipSpace(str, i)
- if j<0:
- raise BadSyntax(self._thisDoc, self.lines, str, i,
- "End of file found, expected property after 'is'")
- return j # eof
- i=j
- j = self.tok('of', str, i)
- if j<0: raise BadSyntax(self._thisDoc, self.lines, str, i,
- "expected 'of' after 'is' <prop>")
- res.append(('<-', r[0]))
- return j
-
- j = self.tok('a', str, i)
- if j>=0:
- res.append(('->', RDF_type))
- return j
-
-
- if str[i:i+2] == "<=":
- res.append(('<-', self._store.newSymbol(Logic_NS+"implies")))
- return i+2
-
- if str[i:i+1] == "=":
- if str[i+1:i+2] == ">":
- res.append(('->', self._store.newSymbol(Logic_NS+"implies")))
- return i+2
- res.append(('->', DAML_sameAs))
- return i+1
-
- if str[i:i+2] == ":=":
- # patch file relates two formulae, uses this @@ really?
- res.append(('->', Logic_NS+"becomes"))
- return i+2
-
- j = self.prop(str, i, r)
- if j >= 0:
- res.append(('->', r[0]))
- return j
-
- if str[i:i+2] == ">-" or str[i:i+2] == "<-":
- raise BadSyntax(self._thisDoc, self.lines, str, j,
- ">- ... -> syntax is obsolete.")
-
- return -1
-
- def prop(self, str, i, res):
- return self.item(str, i, res)
-
- def item(self, str, i, res):
- return self.path(str, i, res)
-
- def blankNode(self, uri=None):
- if "B" not in self._flags:
- return self._context.newBlankNode(uri, why=self._reason2)
- x = self._context.newSymbol(uri)
- self._context.declareExistential(x)
- return x
-
- def path(self, str, i, res):
- """Parse the path production.
- """
- j = self.nodeOrLiteral(str, i, res)
- if j<0: return j # nope
-
- while str[j:j+1] in "!^.": # no spaces, must follow exactly (?)
- ch = str[j:j+1] # @@ Allow "." followed IMMEDIATELY by a node.
- if ch == ".":
- ahead = str[j+1:j+2]
- if not ahead or (ahead in _notNameChars
- and ahead not in ":?<[{("): break
- subj = res.pop()
- obj = self.blankNode(uri=self.here(j))
- j = self.node(str, j+1, res)
- if j<0: raise BadSyntax(self._thisDoc, self.lines, str, j,
- "EOF found in middle of path syntax")
- pred = res.pop()
- if ch == "^": # Reverse traverse
- self.makeStatement((self._context, pred, obj, subj))
- else:
- self.makeStatement((self._context, pred, subj, obj))
- res.append(obj)
- return j
-
- def anonymousNode(self, ln):
- """Remember or generate a term for one of these _: anonymous nodes"""
- term = self._anonymousNodes.get(ln, None)
- if term != None: return term
- term = self._store.newBlankNode(self._context, why=self._reason2)
- self._anonymousNodes[ln] = term
- return term
-
- def node(self, str, i, res, subjectAlready=None):
- """Parse the <node> production.
- Space is now skipped once at the beginning
- instead of in multipe calls to self.skipSpace().
- """
- subj = subjectAlready
-
- j = self.skipSpace(str,i)
- if j<0: return j #eof
- i=j
- ch = str[i:i+1] # Quick 1-character checks first:
-
- if ch == "[":
- bnodeID = self.here(i)
- j=self.skipSpace(str,i+1)
- if j<0: raise BadSyntax(self._thisDoc,
- self.lines, str, i, "EOF after '['")
- if str[j:j+1] == "=": # Hack for "is" binding name to anon node
- i = j+1
- objs = []
- j = self.objectList(str, i, objs);
- if j>=0:
- subj = objs[0]
- if len(objs)>1:
- for obj in objs:
- self.makeStatement((self._context,
- DAML_sameAs, subj, obj))
- j = self.skipSpace(str, j)
- if j<0: raise BadSyntax(self._thisDoc, self.lines, str, i,
- "EOF when objectList expected after [ = ")
- if str[j:j+1] == ";":
- j=j+1
- else:
- raise BadSyntax(self._thisDoc, self.lines, str, i,
- "objectList expected after [= ")
-
- if subj is None:
- subj=self.blankNode(uri= bnodeID)
-
- i = self.property_list(str, j, subj)
- if i<0: raise BadSyntax(self._thisDoc, self.lines, str, j,
- "property_list expected")
-
- j = self.skipSpace(str, i)
- if j<0: raise BadSyntax(self._thisDoc, self.lines, str, i,
- "EOF when ']' expected after [ <propertyList>")
- if str[j:j+1] != "]":
- raise BadSyntax(self._thisDoc,
- self.lines, str, j, "']' expected")
- res.append(subj)
- return j+1
-
- if ch == "{":
- ch2 = str[i+1:i+2]
- if ch2 == '$':
- i += 1
- j = i + 1
- List = []
- first_run = True
- while 1:
- i = self.skipSpace(str, j)
- if i<0: raise BadSyntax(self._thisDoc, self.lines, str, i,
- "needed '$}', found end.")
- if str[i:i+2] == '$}':
- j = i+2
- break
-
- if not first_run:
- if str[i:i+1] == ',':
- i+=1
- else:
- raise BadSyntax(self._thisDoc, self.lines,
- str, i, "expected: ','")
- else: first_run = False
-
- item = []
- j = self.item(str,i, item) #@@@@@ should be path, was object
- if j<0: raise BadSyntax(self._thisDoc, self.lines, str, i,
- "expected item in set or '$}'")
- List.append(self._store.intern(item[0]))
- res.append(self._store.newSet(List, self._context))
- return j
- else:
- j=i+1
- oldParentContext = self._parentContext
- self._parentContext = self._context
- parentAnonymousNodes = self._anonymousNodes
- grandParentVariables = self._parentVariables
- self._parentVariables = self._variables
- self._anonymousNodes = {}
- self._variables = self._variables.copy()
- reason2 = self._reason2
- self._reason2 = becauseSubexpression
- if subj is None: subj = self._store.newFormula()
- self._context = subj
-
- while 1:
- i = self.skipSpace(str, j)
- if i<0: raise BadSyntax(self._thisDoc, self.lines,
- str, i, "needed '}', found end.")
-
- if str[i:i+1] == "}":
- j = i+1
- break
-
- j = self.directiveOrStatement(str,i)
- if j<0: raise BadSyntax(self._thisDoc, self.lines,
- str, i, "expected statement or '}'")
-
- self._anonymousNodes = parentAnonymousNodes
- self._variables = self._parentVariables
- self._parentVariables = grandParentVariables
- self._context = self._parentContext
- self._reason2 = reason2
- self._parentContext = oldParentContext
- res.append(subj.close()) # No use until closed
- return j
-
- if ch == "(":
- thing_type = self._store.newList
- ch2 = str[i+1:i+2]
- if ch2 == '$':
- thing_type = self._store.newSet
- i += 1
- j=i+1
-
- List = []
- while 1:
- i = self.skipSpace(str, j)
- if i<0: raise BadSyntax(self._thisDoc, self.lines,
- str, i, "needed ')', found end.")
- if str[i:i+1] == ')':
- j = i+1
- break
-
- item = []
- j = self.item(str,i, item) #@@@@@ should be path, was object
- if j<0: raise BadSyntax(self._thisDoc, self.lines, str, i,
- "expected item in list or ')'")
- List.append(self._store.intern(item[0]))
- res.append(thing_type(List, self._context))
- return j
-
- j = self.tok('this', str, i) # This context
- if j>=0:
- raise BadSyntax(self._thisDoc, self.lines, str, i,
- "Keyword 'this' was ancient N3. Now use @forSome and @forAll keywords.")
- res.append(self._context)
- return j
-
- #booleans
- j = self.tok('true', str, i)
- if j>=0:
- res.append(True)
- return j
- j = self.tok('false', str, i)
- if j>=0:
- res.append(False)
- return j
-
- if subj is None: # If this can be a named node, then check for a name.
- j = self.uri_ref2(str, i, res)
- if j >= 0:
- return j
-
- return -1
-
- def property_list(self, str, i, subj):
- """Parse property list
- Leaves the terminating punctuation in the buffer
- """
- while 1:
- j = self.skipSpace(str, i)
- if j<0:
- raise BadSyntax(self._thisDoc, self.lines, str, i,
- "EOF found when expected verb in property list")
- return j #eof
-
- if str[j:j+2] ==":-":
- i = j + 2
- res = []
- j = self.node(str, i, res, subj)
- if j<0: raise BadSyntax(self._thisDoc, self.lines, str, i,
- "bad {} or () or [] node after :- ")
- i=j
- continue
- i=j
- v = []
- j = self.verb(str, i, v)
- if j<=0:
- return i # void but valid
-
- objs = []
- i = self.objectList(str, j, objs)
- if i<0: raise BadSyntax(self._thisDoc, self.lines, str, j,
- "objectList expected")
- for obj in objs:
- dir, sym = v[0]
- if dir == '->':
- self.makeStatement((self._context, sym, subj, obj))
- else:
- self.makeStatement((self._context, sym, obj, subj))
-
- j = self.skipSpace(str, i)
- if j<0:
- raise BadSyntax(self._thisDoc, self.lines, str, j,
- "EOF found in list of objects")
- return j #eof
- if str[i:i+1] != ";":
- return i
- i = i+1 # skip semicolon and continue
-
- def commaSeparatedList(self, str, j, res, what):
- """return value: -1 bad syntax; >1 new position in str
- res has things found appended
- """
- i = self.skipSpace(str, j)
- if i<0:
- raise BadSyntax(self._thisDoc, self.lines, str, i,
- "EOF found expecting comma sep list")
- return i
- if str[i] == ".": return j # empty list is OK
- i = what(str, i, res)
- if i<0: return -1
-
- while 1:
- j = self.skipSpace(str, i)
- if j<0: return j # eof
- ch = str[j:j+1]
- if ch != ",":
- if ch != ".":
- return -1
- return j # Found but not swallowed "."
- i = what(str, j+1, res)
- if i<0:
- raise BadSyntax(self._thisDoc, self.lines, str, i,
- "bad list content")
- return i
-
- def objectList(self, str, i, res):
- i = self.object(str, i, res)
- if i<0: return -1
- while 1:
- j = self.skipSpace(str, i)
- if j<0:
- raise BadSyntax(self._thisDoc, self.lines, str, j,
- "EOF found after object")
- return j #eof
- if str[j:j+1] != ",":
- return j # Found something else!
- i = self.object(str, j+1, res)
- if i<0: return i
-
- def checkDot(self, str, i):
- j = self.skipSpace(str, i)
- if j<0: return j #eof
- if str[j:j+1] == ".":
- return j+1 # skip
- if str[j:j+1] == "}":
- return j # don't skip it
- if str[j:j+1] == "]":
- return j
- raise BadSyntax(self._thisDoc, self.lines,
- str, j, "expected '.' or '}' or ']' at end of statement")
- return i
-
-
- def uri_ref2(self, str, i, res):
- """Generate uri from n3 representation.
-
- Note that the RDF convention of directly concatenating
- NS and local name is now used though I prefer inserting a '#'
- to make the namesapces look more like what XML folks expect.
- """
- qn = []
- j = self.qname(str, i, qn)
- if j>=0:
- pfx, ln = qn[0]
- if pfx is None:
- assert 0, "not used?"
- ns = self._baseURI + ADDED_HASH
- else:
- try:
- ns = self._bindings[pfx]
- except KeyError:
- if pfx == "_": # Magic prefix 2001/05/30, can be overridden
- res.append(self.anonymousNode(ln))
- return j
- raise BadSyntax(self._thisDoc, self.lines, str, i,
- "Prefix \"%s:\" not bound" % (pfx))
- symb = self._store.newSymbol(ns + ln)
- if symb in self._variables:
- res.append(self._variables[symb])
- else:
- res.append(symb) # @@@ "#" CONVENTION
- if not ns.find("#"):progress(
- "Warning: no # on namespace %s," % ns)
- return j
-
-
- i = self.skipSpace(str, i)
- if i<0: return -1
-
- if str[i] == "?":
- v = []
- j = self.variable(str,i,v)
- if j>0: #Forget varibles as a class, only in context.
- res.append(v[0])
- return j
- return -1
-
- elif str[i]=="<":
- i = i + 1
- st = i
- while i < len(str):
- if str[i] == ">":
- uref = str[st:i] # the join should dealt with "":
- if self._baseURI:
- uref = join(self._baseURI, uref) # was: uripath.join
- else:
- assert ":" in uref, \
- "With no base URI, cannot deal with relative URIs"
- if str[i-1:i]=="#" and not uref[-1:]=="#":
- uref = uref + "#" # She meant it! Weirdness in urlparse?
- symb = self._store.newSymbol(uref)
- if symb in self._variables:
- res.append(self._variables[symb])
- else:
- res.append(symb)
- return i+1
- i = i + 1
- raise BadSyntax(self._thisDoc, self.lines, str, j,
- "unterminated URI reference")
-
- elif self.keywordsSet:
- v = []
- j = self.bareWord(str,i,v)
- if j<0: return -1 #Forget varibles as a class, only in context.
- if v[0] in self.keywords:
- raise BadSyntax(self._thisDoc, self.lines, str, i,
- 'Keyword "%s" not allowed here.' % v[0])
- res.append(self._store.newSymbol(self._bindings[""]+v[0]))
- return j
- else:
- return -1
-
- def skipSpace(self, str, i):
- """Skip white space, newlines and comments.
- return -1 if EOF, else position of first non-ws character"""
- while 1:
- m = eol.match(str, i)
- if m == None: break
- self.lines = self.lines + 1
- i = m.end() # Point to first character unmatched
- self.startOfLine = i
- m = ws.match(str, i)
- if m != None:
- i = m.end()
- m = eof.match(str, i)
- if m != None: return -1
- return i
-
- def variable(self, str, i, res):
- """ ?abc -> variable(:abc)
- """
-
- j = self.skipSpace(str, i)
- if j<0: return -1
-
- if str[j:j+1] != "?": return -1
- j=j+1
- i = j
- if str[j] in "0123456789-":
- raise BadSyntax(self._thisDoc, self.lines, str, j,
- "Varible name can't start with '%s'" % str[j])
- return -1
- while i <len(str) and str[i] not in _notNameChars:
- i = i+1
- if self._parentContext == None:
- varURI = self._store.newSymbol(self._baseURI + "#" +str[j:i])
- if varURI not in self._variables:
- self._variables[varURI] = self._context.newUniversal(varURI
- , why=self._reason2)
- res.append(self._variables[varURI])
- return i
- # @@ was:
- # raise BadSyntax(self._thisDoc, self.lines, str, j,
- # "Can't use ?xxx syntax for variable in outermost level: %s"
- # % str[j-1:i])
- varURI = self._store.newSymbol(self._baseURI + "#" +str[j:i])
- if varURI not in self._parentVariables:
- self._parentVariables[varURI] = self._parentContext.newUniversal(varURI
- , why=self._reason2)
- res.append(self._parentVariables[varURI])
- return i
-
- def bareWord(self, str, i, res):
- """ abc -> :abc
- """
- j = self.skipSpace(str, i)
- if j<0: return -1
-
- if str[j] in "0123456789-" or str[j] in _notNameChars: return -1
- i = j
- while i <len(str) and str[i] not in _notNameChars:
- i = i+1
- res.append(str[j:i])
- return i
-
- def qname(self, str, i, res):
- """
- xyz:def -> ('xyz', 'def')
- If not in keywords and keywordsSet: def -> ('', 'def')
- :def -> ('', 'def')
- """
-
- i = self.skipSpace(str, i)
- if i<0: return -1
-
- c = str[i]
- if c in "0123456789-+": return -1
- if c not in _notNameChars:
- ln = c
- i = i + 1
- while i < len(str):
- c = str[i]
- if c not in _notNameChars:
- ln = ln + c
- i = i + 1
- else: break
- else: # First character is non-alpha
- ln = '' # Was: None - TBL (why? useful?)
-
- if i<len(str) and str[i] == ':':
- pfx = ln
- i = i + 1
- ln = ''
- while i < len(str):
- c = str[i]
- if c not in _notNameChars:
- ln = ln + c
- i = i + 1
- else: break
-
- res.append((pfx, ln))
- return i
-
- else: # delimiter was not ":"
- if ln and self.keywordsSet and ln not in self.keywords:
- res.append(('', ln))
- return i
- return -1
-
- def object(self, str, i, res):
- j = self.subject(str, i, res)
- if j>= 0:
- return j
- else:
- j = self.skipSpace(str, i)
- if j<0: return -1
- else: i=j
-
- if str[i]=='"':
- if str[i:i+3] == '"""': delim = '"""'
- else: delim = '"'
- i = i + len(delim)
-
- j, s = self.strconst(str, i, delim)
-
- res.append(self._store.newLiteral(s))
- progress("New string const ", s, j)
- return j
- else:
- return -1
-
- def nodeOrLiteral(self, str, i, res):
- j = self.node(str, i, res)
- startline = self.lines # Remember where for error messages
- if j>= 0:
- return j
- else:
- j = self.skipSpace(str, i)
- if j<0: return -1
- else: i=j
-
- ch = str[i]
- if ch in "-+0987654321":
- m = number_syntax.match(str, i)
- if m == None:
- raise BadSyntax(self._thisDoc, self.lines, str, i,
- "Bad number syntax")
- j = m.end()
- if m.group('exponent') != None: # includes decimal exponent
- res.append(float(str[i:j]))
-# res.append(self._store.newLiteral(str[i:j],
-# self._store.newSymbol(FLOAT_DATATYPE)))
- elif m.group('decimal') != None:
- res.append(Decimal(str[i:j]))
- else:
- res.append(long(str[i:j]))
-# res.append(self._store.newLiteral(str[i:j],
-# self._store.newSymbol(INTEGER_DATATYPE)))
- return j
-
- if str[i]=='"':
- if str[i:i+3] == '"""': delim = '"""'
- else: delim = '"'
- i = i + len(delim)
-
- dt = None
- j, s = self.strconst(str, i, delim)
- lang = None
- if str[j:j+1] == "@": # Language?
- m = langcode.match(str, j+1)
- if m == None:
- raise BadSyntax(self._thisDoc, startline, str, i,
- "Bad language code syntax on string literal, after @")
- i = m.end()
- lang = str[j+1:i]
- j = i
- if str[j:j+2] == "^^":
- res2 = []
- j = self.uri_ref2(str, j+2, res2) # Read datatype URI
- dt = res2[0]
-# if dt.uriref() == "http://www.w3.org/1999/02/22-rdf-syntax-ns#XMLLiteral":
- if dt == "http://www.w3.org/1999/02/22-rdf-syntax-ns#XMLLiteral":
- try:
- dom = XMLtoDOM('<rdf:envelope xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns">'
- + s
- + '</rdf:envelope>').firstChild
- except:
- raise ValueError('s="%s"' % s)
- res.append(self._store.newXMLLiteral(dom))
- return j
- res.append(self._store.newLiteral(s, dt, lang))
- return j
- else:
- return -1
-
- def uriOf(self, sym):
- if isinstance(sym, types.TupleType):
- return sym[1] # old system for --pipe
- # return sym.uriref() # cwm api
- return sym
-
-
- def strconst(self, str, i, delim):
- """parse an N3 string constant delimited by delim.
- return index, val
- """
-
-
- j = i
- ustr = u"" # Empty unicode string
- startline = self.lines # Remember where for error messages
- while j<len(str):
- if str[j] == '"':
- if delim == '"': # done when delim is "
- i = j + 1
- return i, ustr
- if delim == '"""': # done when delim is """ and ...
- if str[j:j+5] == '"""""': # ... we have "" before
- i = j + 5
- ustr = ustr + '""'
- return i, ustr
- if str[j:j+4] == '""""': # ... we have " before
- i = j + 4
- ustr = ustr + '"'
- return i, ustr
- if str[j:j+3] == '"""': # ... current " is part of delim
- i = j + 3
- return i, ustr
-
- # we are inside of the string and current char is "
- j = j + 1
- ustr = ustr + '"'
- continue
-
- m = interesting.search(str, j) # was str[j:].
- # Note for pos param to work, MUST be compiled ... re bug?
- assert m , "Quote expected in string at ^ in %s^%s" %(
- str[j-20:j], str[j:j+20]) # we at least have to find a quote
-
- i = m.start()
- try:
- ustr = ustr + str[j:i]
- except UnicodeError:
- err = ""
- for c in str[j:i]:
- err = err + (" %02x" % ord(c))
- streason = sys.exc_info()[1].__str__()
- raise BadSyntax(self._thisDoc, startline, str, j,
- "Unicode error appending characters %s to string, because\n\t%s"
- % (err, streason))
-
-# print "@@@ i = ",i, " j=",j, "m.end=", m.end()
-
- ch = str[i]
- if ch == '"':
- j = i
- continue
- elif ch == "\r": # Strip carriage returns
- j = i+1
- continue
- elif ch == "\n":
- if delim == '"':
- raise BadSyntax(self._thisDoc, startline, str, i,
- "newline found in string literal")
- self.lines = self.lines + 1
- ustr = ustr + ch
- j = i + 1
- self.startOfLine = j
-
- elif ch == "\\":
- j = i + 1
- ch = str[j:j+1] # Will be empty if string ends
- if not ch:
- raise BadSyntax(self._thisDoc, startline, str, i,
- "unterminated string literal (2)")
- k = 'abfrtvn\\"'.find(ch)
- if k >= 0:
- uch = '\a\b\f\r\t\v\n\\"'[k]
- ustr = ustr + uch
- j = j + 1
- elif ch == "u":
- j, ch = self.uEscape(str, j+1, startline)
- ustr = ustr + ch
- elif ch == "U":
- j, ch = self.UEscape(str, j+1, startline)
- ustr = ustr + ch
- else:
- raise BadSyntax(self._thisDoc, self.lines, str, i,
- "bad escape")
-
- raise BadSyntax(self._thisDoc, self.lines, str, i,
- "unterminated string literal")
-
-
- def uEscape(self, str, i, startline):
- j = i
- count = 0
- value = 0
- while count < 4: # Get 4 more characters
- ch = str[j:j+1].lower()
- # sbp http://ilrt.org/discovery/chatlogs/rdfig/2002-07-05
- j = j + 1
- if ch == "":
- raise BadSyntax(self._thisDoc, startline, str, i,
- "unterminated string literal(3)")
- k = "0123456789abcdef".find(ch)
- if k < 0:
- raise BadSyntax(self._thisDoc, startline, str, i,
- "bad string literal hex escape")
- value = value * 16 + k
- count = count + 1
- uch = unichr(value)
- return j, uch
-
- def UEscape(self, str, i, startline):
- stringType = type('')
- j = i
- count = 0
- value = '\\U'
- while count < 8: # Get 8 more characters
- ch = str[j:j+1].lower()
- # sbp http://ilrt.org/discovery/chatlogs/rdfig/2002-07-05
- j = j + 1
- if ch == "":
- raise BadSyntax(self._thisDoc, startline, str, i,
- "unterminated string literal(3)")
- k = "0123456789abcdef".find(ch)
- if k < 0:
- raise BadSyntax(self._thisDoc, startline, str, i,
- "bad string literal hex escape")
- value = value + ch
- count = count + 1
-
- uch = stringType(value).decode('unicode-escape')
- return j, uch
-
-wide_build = True
-try:
- unichr(0x10000)
-except ValueError:
- wide_build = False
-
-# If we are going to do operators then they should generate
-# [ is operator:plus of ( \1 \2 ) ]
-
-
-class BadSyntax(SyntaxError):
- def __init__(self, uri, lines, str, i, why):
- self._str = str.encode('utf-8') # Better go back to strings for errors
- self._i = i
- self._why = why
- self.lines = lines
- self._uri = uri
-
- def __str__(self):
- str = self._str
- i = self._i
- st = 0
- if i>60:
- pre="..."
- st = i - 60
- else: pre=""
- if len(str)-i > 60: post="..."
- else: post=""
-
- return 'at line %i of <%s>:\nBad syntax (%s) at ^ in:\n"%s%s^%s%s"' \
- % (self.lines +1, self._uri, self._why, pre,
- str[st:i], str[i:i+60], post)
-
-
-
-def stripCR(str):
- res = ""
- for ch in str:
- if ch != "\r":
- res = res + ch
- return res
-
-def dummyWrite(x):
- pass
-
-################################################################################
-
-
-def toBool(s):
- if s == 'true' or s == 'True' or s == '1':
- return True
- if s == 'false' or s == 'False' or s == '0':
- return False
- raise ValueError(s)
-
-
-
-
-
-class Formula(object):
- number = 0
-
- def __init__(self, parent):
- self.counter = 0
- Formula.number += 1
- self.number = Formula.number
- self.existentials = {}
- self.universals = {}
-
- self.quotedgraph=QuotedGraph(store=parent.store, identifier=self.id())
-
- def __str__(self):
- return '_:Formula%s' % self.number
-
- def id(self):
- return BNode('_:Formula%s' % self.number)
-
- def newBlankNode(self, uri=None, why=None):
- if uri is None:
- self.counter += 1
- b = BNode('f%sb%s' % (id(self), self.counter))
- else: b = BNode(uri.split('#').pop().replace('_', 'b'))
- return b
-
- def newUniversal(self, uri, why=None):
- return Variable(uri.split('#').pop())
-
- def declareExistential(self, x):
- self.existentials[x] = self.newBlankNode()
-
- def close(self):
-
- return self.quotedgraph
-
-r_hibyte = re.compile(r'([\x80-\xff])')
-def iri(uri):
- return uri.decode('utf-8')
- # return unicode(r_hibyte.sub(lambda m: '%%%02X' % ord(m.group(1)), uri))
-
-class RDFSink(object):
- def __init__(self, graph):
- self.rootFormula = None
- self.counter = 0
- self.graph=graph
-
-
- def newFormula(self):
- assert self.graph.store.formula_aware
- f = Formula(self.graph)
- return f
-
- def newSymbol(self, *args):
- uri = args[0].encode('utf-8')
- return URIRef(iri(uri))
-
- def newBlankNode(self, arg=None, **kargs):
- if isinstance(arg, Formula):
- return arg.newBlankNode()
- elif arg is None:
- self.counter += 1
- b = BNode('n' + str(self.counter))
- else: b = BNode(str(arg[0]).split('#').pop().replace('_', 'b'))
- return b
-
- def newLiteral(self, s, dt, lang):
- if dt: return Literal(s, datatype=dt)
- else: return Literal(s, lang=lang)
-
- def newList(self, n, f):
- if not n:
- return self.newSymbol(
- 'http://www.w3.org/1999/02/22-rdf-syntax-ns#nil'
- )
-
- a = self.newBlankNode(f)
- first = self.newSymbol(
- 'http://www.w3.org/1999/02/22-rdf-syntax-ns#first'
- )
- rest = self.newSymbol('http://www.w3.org/1999/02/22-rdf-syntax-ns#rest')
- self.makeStatement((f, first, a, n[0]))
- self.makeStatement((f, rest, a, self.newList(n[1:], f)))
- return a
-
- def newSet(self, *args):
- return set(args)
-
- def setDefaultNamespace(self, *args):
- return ':'.join(repr(n) for n in args)
-
- def makeStatement(self, quadruple, why=None):
- f, p, s, o = quadruple
-
- if hasattr(p, 'formula'):
- raise Exception("Formula used as predicate")
-
- s = self.normalise(f, s)
- p = self.normalise(f, p)
- o = self.normalise(f, o)
-
-
- if f == self.rootFormula:
- # print s, p, o, '.'
- self.graph.add((s, p, o))
- else:
- f.quotedgraph.add((s,p,o))
-
-
- #return str(quadruple)
-
- def normalise(self, f, n):
- if isinstance(n, tuple):
- return URIRef(unicode(n[1]))
-
- # if isinstance(n, list):
- # rdflist, f = n
- # name = self.newBlankNode()
- # if f == self.rootFormula:
- # sublist = name
- # for i in xrange(0, len(rdflist) - 1):
- # print sublist, 'first', rdflist[i]
- # rest = self.newBlankNode()
- # print sublist, 'rest', rest
- # sublist = rest
- # print sublist, 'first', rdflist[-1]
- # print sublist, 'rest', 'nil'
- # return name
-
- if isinstance(n, bool):
- s = Literal(str(n).lower(), datatype=BOOLEAN_DATATYPE)
- return s
-
- if isinstance(n, int) or isinstance(n, long):
- s = Literal(unicode(n), datatype=INTEGER_DATATYPE)
- return s
-
- if isinstance(n, Decimal):
- value = str(n.normalize())
- if value == '-0':
- value = '0'
- s = Literal(value, datatype=DECIMAL_DATATYPE )
- return s
-
- if isinstance(n, float):
- s = Literal(str(n), datatype=DOUBLE_DATATYPE )
- return s
-
- if f.existentials.has_key(n):
- return f.existentials[n]
-
- # if isinstance(n, Var):
- # if f.universals.has_key(n):
- # return f.universals[n]
- # f.universals[n] = f.newBlankNode()
- # return f.universals[n]
-
- return n
-
- def intern(self, something):
- return something
-
- def bind(self, pfx, uri):
- pass # print pfx, ':', uri
-
- def startDoc(self, formula):
- self.rootFormula = formula
-
- def endDoc(self, formula):
- pass
-
-
-###################################################
-#
-# Utilities
-#
-
-Escapes = {'a': '\a',
- 'b': '\b',
- 'f': '\f',
- 'r': '\r',
- 't': '\t',
- 'v': '\v',
- 'n': '\n',
- '\\': '\\',
- '"': '"'}
-
-forbidden1 = re.compile(ur'[\\\"\a\b\f\r\v\u0080-\U0000ffff]')
-forbidden2 = re.compile(ur'[\\\"\a\b\f\r\v\t\n\u0080-\U0000ffff]')
-#"
-def stringToN3(str, singleLine=0, flags=""):
- res = ''
- if (len(str) > 20 and
- str[-1] <> '"' and
- not singleLine and
- (str.find("\n") >=0
- or str.find('"') >=0)):
- delim= '"""'
- forbidden = forbidden1 # (allow tabs too now)
- else:
- delim = '"'
- forbidden = forbidden2
-
- i = 0
-
- while i < len(str):
- m = forbidden.search(str, i)
- if not m:
- break
-
- j = m.start()
- res = res + str[i:j]
- ch = m.group(0)
- if ch == '"' and delim == '"""' and str[j:j+3] != '"""': #"
- res = res + ch
- else:
- k = '\a\b\f\r\t\v\n\\"'.find(ch)
- if k >= 0: res = res + "\\" + 'abfrtvn\\"'[k]
- else:
- if 'e' in flags:
-# res = res + ('\\u%04x' % ord(ch))
- res = res + ('\\u%04X' % ord(ch))
- # http://www.w3.org/TR/rdf-testcases/#ntriples
- else:
- res = res + ch
- i = j + 1
-
- # The following code fixes things for really high range Unicode
- newstr = ""
- for ch in res + str[i:]:
- if ord(ch)>65535:
- newstr = newstr + ('\\U%08X' % ord(ch))
- # http://www.w3.org/TR/rdf-testcases/#ntriples
- else:
- newstr = newstr + ch
- #
-
- return delim + newstr + delim
-
-def backslashUify(ustr):
- """Use URL encoding to return an ASCII string corresponding
- to the given unicode"""
-# progress("String is "+`ustr`)
-# s1=ustr.encode('utf-8')
- s = ""
- for ch in ustr: # .encode('utf-8'):
- if ord(ch) > 65535:
- ch = "\\U%08X" % ord(ch)
- elif ord(ch) > 126:
- ch = "\\u%04X" % ord(ch)
- else:
- ch = "%c" % ord(ch)
- s = s + ch
- return b(s)
-
-@py3compat.format_doctest_out
-def hexify(ustr):
- """Use URL encoding to return an ASCII string
- corresponding to the given UTF8 string
-
- >>> hexify("http://example/a b")
- %(b)s'http://example/a%%20b'
-
- """ #"
-# progress("String is "+`ustr`)
-# s1=ustr.encode('utf-8')
- s = ""
- for ch in ustr: # .encode('utf-8'):
- if ord(ch) > 126 or ord(ch) < 33 :
- ch = "%%%02X" % ord(ch)
- else:
- ch = "%c" % ord(ch)
- s = s + ch
- return b(s)
-
-def dummy():
- res = ""
- if len(str) > 20 and (str.find("\n") >=0
- or str.find('"') >=0):
- delim= '"""'
- forbidden = "\\\"\a\b\f\r\v" # (allow tabs too now)
- else:
- delim = '"'
- forbidden = "\\\"\a\b\f\r\v\t\n"
- for i in range(len(str)):
- ch = str[i]
- j = forbidden.find(ch)
- if ch == '"' and delim == '"""' \
- and i+1 < len(str) and str[i+1] != '"':
- j=-1 # Single quotes don't need escaping in long format
- if j>=0: ch = "\\" + '\\"abfrvtn'[j]
- elif ch not in "\n\t" and (ch < " " or ch > "}"):
- ch = "[[" + `ch` + "]]" #[2:-1] # Use python
- res = res + ch
- return delim + res + delim
-
-
-class N3Parser(Parser):
-
- def __init__(self):
- pass
-
- def parse(self, source, graph, encoding="utf-8"):
- # we're currently being handed a Graph, not a ConjunctiveGraph
- assert graph.store.context_aware # is this implied by formula_aware
- assert graph.store.formula_aware
-
- if encoding not in [None, "utf-8"]:
- raise Exception("N3 files are always utf-8 encoded, I was passed: %s"%encoding)
-
- conj_graph = ConjunctiveGraph(store=graph.store)
- conj_graph.default_context = graph # TODO: CG __init__ should have a default_context arg
- # TODO: update N3Processor so that it can use conj_graph as the sink
- conj_graph.namespace_manager = graph.namespace_manager
- sink = RDFSink(conj_graph)
-
- baseURI = graph.absolutize(source.getPublicId() or source.getSystemId() or "")
- p = SinkParser(sink, baseURI=baseURI)
-
- p.loadStream(source.getByteStream())
-
- for prefix, namespace in p._bindings.items():
- conj_graph.bind(prefix, namespace)
-
-
-
-
-def _test():
- import doctest
- doctest.testmod()
-
-
-# if __name__ == '__main__':
-# _test()
-
-def main():
- g=ConjunctiveGraph()
-
- sink = RDFSink(g)
- base = 'file://' + os.path.join(os.getcwd(), sys.argv[1])
-
- p = SinkParser(sink, baseURI=base)
- p._bindings[''] = p._baseURI + '#'
- p.startDoc()
-
- f = open(sys.argv[1], 'rb')
- bytes = f.read()
- f.close()
-
- p.feed(bytes)
- p.endDoc()
- for t in g.quads((None,None,None)):
-
- print t
-
-if __name__ == '__main__':
- main()
-
-#ends
-