1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
|
"""
N-Triples RDF graph serializer for RDFLib.
See <http://www.w3.org/TR/rdf-testcases/#ntriples> for details about the
format.
"""
from rdflib.serializer import Serializer
from rdflib.py3compat import b
import warnings
__all__ = ['NTSerializer']
class NTSerializer(Serializer):
"""
Serializes RDF graphs to NTriples format.
"""
def serialize(self, stream, base=None, encoding=None, **args):
if base is not None:
warnings.warn("NTSerializer does not support base.")
if encoding is not None:
warnings.warn("NTSerializer does not use custom encoding.")
encoding = self.encoding
for triple in self.store:
stream.write(_nt_row(triple).encode(encoding, "replace"))
stream.write(b("\n"))
def _nt_row(triple):
return u"%s %s %s .\n" % (triple[0].n3(),
triple[1].n3(),
_xmlcharref_encode(triple[2].n3()))
# from <http://code.activestate.com/recipes/303668/>
def _xmlcharref_encode(unicode_data, encoding="ascii"):
"""Emulate Python 2.3's 'xmlcharrefreplace' encoding error handler."""
chars = []
# nothing to do about xmlchars, but replace newlines with escapes:
unicode_data=unicode_data.replace("\n","\\n")
if unicode_data.startswith('"""'):
# Updated with Bernhard Schandl's patch...
# unicode_data = unicode_data.replace('"""', '"') # original
last_triplequote_pos = unicode_data.rfind('"""')
payload = unicode_data[3:last_triplequote_pos]
trail = unicode_data[last_triplequote_pos+3:]
# fix three-quotes encoding
payload = payload.replace('\\"""', '"""')
# corner case: if string ends with " it is already encoded.
# so we need to de-escape it before it will be re-escaped in the next step.
if payload.endswith('\\"'):
payload = payload.replace('\\"', '"')
# escape quotes in payload
payload = payload.replace('"', '\\"')
# reconstruct result using single quotes
unicode_data = '"%s"%s' % (payload, trail)
# Step through the unicode_data string one character at a time in
# order to catch unencodable characters:
for char in unicode_data:
try:
char.encode(encoding, 'strict')
except UnicodeError:
if ord(char) <= 0xFFFF:
chars.append('\\u%04X' % ord(char))
else:
chars.append('\\U%08X' % ord(char))
else:
chars.append(char)
return ''.join(chars)
|