From 2a3bc1075a556b77978c1680b2b24c847dbc2d2a Mon Sep 17 00:00:00 2001 From: florent Date: Mon, 06 Feb 2012 03:16:21 +0000 Subject: creatistore template review: not a webapp by default, dependencies update, real simple semanticxo sample --- (limited to 'creactistore/_templates/lib/rdflib_/plugins/serializers/rdfxml.py') diff --git a/creactistore/_templates/lib/rdflib_/plugins/serializers/rdfxml.py b/creactistore/_templates/lib/rdflib_/plugins/serializers/rdfxml.py new file mode 100644 index 0000000..002f3e5 --- /dev/null +++ b/creactistore/_templates/lib/rdflib_/plugins/serializers/rdfxml.py @@ -0,0 +1,282 @@ +from __future__ import generators + +from rdflib_.plugins.serializers.xmlwriter import XMLWriter + +from rdflib_.namespace import Namespace, RDF, RDFS, split_uri + +from rdflib_.term import URIRef, Literal, BNode +from rdflib_.util import first, more_than +from rdflib_.collection import Collection +from rdflib_.serializer import Serializer + +from rdflib_.exceptions import Error + +from rdflib_.py3compat import b + +from xml.sax.saxutils import quoteattr, escape + +__all__ = ['fix', 'XMLSerializer', 'PrettyXMLSerializer'] + +class XMLSerializer(Serializer): + + def __init__(self, store): + super(XMLSerializer, self).__init__(store) + + def __bindings(self): + store = self.store + nm = store.namespace_manager + bindings = {} + for predicate in set(store.predicates()): + prefix, namespace, name = nm.compute_qname(predicate) + bindings[prefix] = URIRef(namespace) + RDFNS = URIRef("http://www.w3.org/1999/02/22-rdf-syntax-ns#") + if "rdf" in bindings: + assert bindings["rdf"]==RDFNS + else: + bindings["rdf"] = RDFNS + for prefix, namespace in bindings.iteritems(): + yield prefix, namespace + + + def serialize(self, stream, base=None, encoding=None, **args): + self.base = base + self.__stream = stream + self.__serialized = {} + encoding = self.encoding + self.write = write = lambda uni: stream.write(uni.encode(encoding, 'replace')) + + # startDocument + write('\n' % self.encoding) + + # startRDF + write('\n') + + # write out triples by subject + for subject in self.store.subjects(): + self.subject(subject, 1) + + # endRDF + write( "\n" ) + + # Set to None so that the memory can get garbage collected. + #self.__serialized = None + del self.__serialized + + + def subject(self, subject, depth=1): + if not subject in self.__serialized: + self.__serialized[subject] = 1 + if isinstance(subject, (BNode,URIRef)): + write = self.write + indent = " " * depth + element_name = "rdf:Description" + if isinstance(subject, BNode): + write( '%s<%s rdf:nodeID="%s"' % + (indent, element_name, subject)) + else: + uri = quoteattr(self.relativize(subject)) + write( "%s<%s rdf:about=%s" % (indent, element_name, uri)) + if (subject, None, None) in self.store: + write( ">\n" ) + for predicate, object in self.store.predicate_objects(subject): + self.predicate(predicate, object, depth+1) + write( "%s\n" % (indent, element_name)) + else: + write( "/>\n" ) + + def predicate(self, predicate, object, depth=1): + write = self.write + indent = " " * depth + qname = self.store.namespace_manager.qname(predicate) + if isinstance(object, Literal): + attributes = "" + if object.language: + attributes += ' xml:lang="%s"'%object.language + + if object.datatype: + attributes += ' rdf:datatype="%s"'%object.datatype + + write("%s<%s%s>%s\n" % + (indent, qname, attributes, + escape(object), qname) ) + else: + if isinstance(object, BNode): + write('%s<%s rdf:nodeID="%s"/>\n' % + (indent, qname, object)) + else: + write("%s<%s rdf:resource=%s/>\n" % + (indent, qname, quoteattr(self.relativize(object)))) + + + +XMLLANG = "http://www.w3.org/XML/1998/namespacelang" +XMLBASE = "http://www.w3.org/XML/1998/namespacebase" +OWL_NS = Namespace('http://www.w3.org/2002/07/owl#') + +# TODO: +def fix(val): + "strip off _: from nodeIDs... as they are not valid NCNames" + if val.startswith("_:"): + return val[2:] + else: + return val + + +class PrettyXMLSerializer(Serializer): + + def __init__(self, store, max_depth=3): + super(PrettyXMLSerializer, self).__init__(store) + self.forceRDFAbout=set() + + def serialize(self, stream, base=None, encoding=None, **args): + self.__serialized = {} + store = self.store + self.base = base + self.max_depth = args.get("max_depth", 3) + assert self.max_depth>0, "max_depth must be greater than 0" + + self.nm = nm = store.namespace_manager + self.writer = writer = XMLWriter(stream, nm, encoding) + + namespaces = {} + possible = set(store.predicates()).union(store.objects(None, RDF.type)) + for predicate in possible: + prefix, namespace, local = nm.compute_qname(predicate) + namespaces[prefix] = namespace + namespaces["rdf"] = "http://www.w3.org/1999/02/22-rdf-syntax-ns#" + writer.push(RDF.RDF) + if "xml_base" in args: + writer.attribute(XMLBASE, args["xml_base"]) + writer.namespaces(namespaces.iteritems()) + + # Write out subjects that can not be inline + for subject in store.subjects(): + if (None, None, subject) in store: + if (subject, None, subject) in store: + self.subject(subject, 1) + else: + self.subject(subject, 1) + + # write out anything that has not yet been reached + # write out BNodes last (to ensure they can be inlined where possible) + bnodes=set() + for subject in store.subjects(): + if isinstance(subject,BNode): + bnodes.add(subject) + continue + self.subject(subject, 1) + #now serialize only those BNodes that have not been serialized yet + for bnode in bnodes: + if bnode not in self.__serialized: + self.subject(subject, 1) + writer.pop(RDF.RDF) + stream.write(b("\n")) + + # Set to None so that the memory can get garbage collected. + self.__serialized = None + + + + def subject(self, subject, depth=1): + store = self.store + writer = self.writer + if subject in self.forceRDFAbout: + writer.push(RDF.Description) + writer.attribute(RDF.about, self.relativize(subject)) + writer.pop(RDF.Description) + self.forceRDFAbout.remove(subject) + elif not subject in self.__serialized: + self.__serialized[subject] = 1 + type = first(store.objects(subject, RDF.type)) + try: + self.nm.qname(type) + except: + type = None + element = type or RDF.Description + writer.push(element) + if isinstance(subject, BNode): + def subj_as_obj_more_than(ceil): + return True + # more_than(store.triples((None, None, subject)), ceil) + + #here we only include BNode labels if they are referenced + #more than once (this reduces the use of redundant BNode identifiers) + if subj_as_obj_more_than(1): + writer.attribute(RDF.nodeID, fix(subject)) + else: + writer.attribute(RDF.about, self.relativize(subject)) + if (subject, None, None) in store: + for predicate, object in store.predicate_objects(subject): + if not (predicate==RDF.type and object==type): + self.predicate(predicate, object, depth+1) + writer.pop(element) + elif subject in self.forceRDFAbout: + writer.push(RDF.Description) + writer.attribute(RDF.about, self.relativize(subject)) + writer.pop(RDF.Description) + self.forceRDFAbout.remove(subject) + + def predicate(self, predicate, object, depth=1): + writer = self.writer + store = self.store + writer.push(predicate) + if isinstance(object, Literal): + attributes = "" + if object.language: + writer.attribute(XMLLANG, object.language) + if object.datatype: + writer.attribute(RDF.datatype, object.datatype) + writer.text(object) + elif object in self.__serialized or not (object, None, None) in store: + if isinstance(object, BNode): + if more_than(store.triples((None, None, object)), 0): + writer.attribute(RDF.nodeID, fix(object)) + else: + writer.attribute(RDF.resource, self.relativize(object)) + else: + if first(store.objects(object, RDF.first)): # may not have type RDF.List + collection = object + self.__serialized[object] = 1 + # TODO: warn that any assertions on object other than + # RDF.first and RDF.rest are ignored... including RDF.List + writer.attribute(RDF.parseType, "Collection") + col=Collection(store,object) + for item in col: + if isinstance(item,URIRef): + self.forceRDFAbout.add(item) + self.subject(item) + if not isinstance(item,URIRef): + self.__serialized[item] = 1 + else: + if first(store.triples_choices((object, + RDF.type, + [OWL_NS.Class,RDFS.Class]))) and\ + isinstance(object, URIRef): + writer.attribute(RDF.resource, self.relativize(object)) + elif depth<=self.max_depth: + self.subject(object, depth+1) + elif isinstance(object, BNode): + if not object in self.__serialized and \ + (object, None, None) in store and \ + len(list(store.subjects(object=object)))==1: + #inline blank nodes if they haven't been serialized yet and are + #only referenced once (regardless of depth) + self.subject(object, depth+1) + else: + writer.attribute(RDF.nodeID, fix(object)) + else: + writer.attribute(RDF.resource, self.relativize(object)) + writer.pop(predicate) + -- cgit v0.9.1