1 files changed, 168 insertions, 0 deletions
diff --git a/creactistore/_templates/lib/rdflib/plugins/parsers/rdfa/__init__.py b/creactistore/_templates/lib/rdflib/plugins/parsers/rdfa/__init__.py
new file mode 100644
index 0000000..9553349
--- /dev/null
+++ b/creactistore/_templates/lib/rdflib/plugins/parsers/rdfa/__init__.py
@@ -0,0 +1,168 @@
+"""
+From a Python file, expecting an RDF/XML pretty printed output::
+
+    import rdflib.graph as g
+    graph = g.Graph()
+    graph.parse('filename.html', format='rdfa')
+    print graph.serialize(format='pretty-xml')
+
+For details on RDFa, the reader should consult the `RDFa syntax document`__.
+
+This is an adapted version of pyRdfa (`W3C RDFa Distiller page`__) by Ivan Herman
+
+.. __: http://www.w3.org/TR/rdfa-syntax
+.. __: http://www.w3.org/2007/08/pyRdfa/
+
+"""
+
+
+import sys
+import urllib
+import xml.dom.minidom
+
+from rdflib.term import URIRef
+from rdflib.parser import Parser
+from rdflib.plugins.parsers.rdfa.state import ExecutionContext
+from rdflib.plugins.parsers.rdfa.parse import parse_one_node
+from rdflib.plugins.parsers.rdfa.options import (Options, _add_to_comment_graph,
+        DIST_NS, ERROR, GENERIC_XML, XHTML_RDFA, HTML5_RDFA)
+
+from rdflib.plugins.parsers.rdfa.transform.headabout import head_about_transform
+
+__all__ = ['RDFaParser']
+
+# These are part of the RDFa spec.
+BUILT_IN_TRANSFORMERS = [
+    head_about_transform
+]
+
+# Exception handling. Essentially, all the different exceptions are re-packaged
+# into separate exception class, to allow for an easier management on the user
+# level
+class RDFaError(Exception) :
+    """Just a wrapper around the local exceptions. It does not add any new
+    functionality to the Exception class."""
+    pass
+
+# For some doctype and element name combinations an automatic switch to an
+# input mode is done
+_HOST_LANG = {
+    ("http://www.w3.org/1999/xhtml", "html"): XHTML_RDFA,
+    ("http://www.w3.org/2000/svg", "svg"): GENERIC_XML
+}
+
+
+class RDFaParser(Parser):
+
+    def parse(self, source, sink,
+            warnings=False, space_preserve=True,
+            transformers=None, xhtml=True, lax=True, html5=False, encoding=None):
+        if transformers is None:
+            transformers = []
+        options = Options(warnings, space_preserve, transformers, xhtml, lax)
+        baseURI = source.getPublicId()
+        stream = source.getByteStream()
+        if html5:
+            dom = _process_html5_source(stream, options, encoding)
+        else:
+            dom = _try_process_source(stream, options, encoding)
+        _process_DOM(dom, baseURI, sink, options)
+
+
+def _process_DOM(dom, base, graph, options=None):
+    """
+    Core processing. The transformers ("pre-processing") is done on the DOM
+    tree, the state is initialized, and the "real" RDFa parsing is done.
+    The result is put into the provided Graph.
+
+    The real work is done in the parser function ``parse_one_node()``.
+
+    Params:
+    dom -- XML DOM Tree node (for the top level)
+    base -- URI for the default "base" value (usually the URI of the file to be processed)
+    
+    Options: 
+    obj -- `Options` for the distiller
+    raise RDFaError -- when called via CGI, this encapsulates the possible 
+    exceptions raised by the RDFLib serializer or the processing itself
+    """
+    html = dom.documentElement
+    # Perform the built-in and external transformations on the HTML tree. This is,
+    # in simulated form, the hGRDDL approach of Ben Adida.
+    for trans in options.transformers + BUILT_IN_TRANSFORMERS:
+        trans(html, options)
+    # Collect the initial state. This takes care of things
+    # like base, top level namespace settings, etc.
+    # Ensure the proper initialization.
+    state = ExecutionContext(html, graph, base=base, options=options)
+    # The top level subject starts with the current document; this
+    # is used by the recursion
+    subject = URIRef(state.base)
+    # Parse the whole thing recursively and fill the graph.
+    parse_one_node(html, graph, subject, state, [])
+    if options.comment_graph.graph != None:
+        # Add the content of the comment graph to the output.
+        graph.bind("dist", DIST_NS)
+        for t in options.comment_graph.graph:
+            graph.add(t)
+
+def _try_process_source(stream, options, encoding):
+    """
+    Tries to parse input as xhtml, xml (e.g. svg) or html(5), modifying options
+    while figuring out input..
+
+    Returns a DOM tree.
+    """
+    parse = xml.dom.minidom.parse
+    try:
+        dom = parse(stream)
+        # Try to second-guess the input type
+        # This is _not_ really kosher, but the minidom is not really namespace aware...
+        # In practice the goal is to have the system recognize svg content automatically
+        # First see if there is a default namespace defined for the document:
+        top = dom.documentElement
+        if top.hasAttribute("xmlns"):
+            key = (top.getAttribute("xmlns"), top.nodeName)
+            if key in _HOST_LANG:
+                options.host_language = _HOST_LANG[key]
+        return dom
+    except:
+        # XML Parsing error in the input
+        type, value, traceback = sys.exc_info()
+        if options.host_language == GENERIC_XML or options.lax == False:
+            raise RDFaError('Parsing error in input file: "%s"' % value)
+
+        # XML Parsing error in the input
+        msg = "XHTML Parsing error in input file: %s. Falling back on the HTML5 parser" % value
+        if options != None and options.warnings:
+            options.comment_graph.add_warning(msg)
+
+        # in Ivan's original code he reopened the stream if it was from urllib 
+        if isinstance(stream, urllib.addinfourl):
+            stream = urllib.urlopen(stream.url)
+            
+        return _process_html5_source(stream, options, encoding)
+
+
+def _process_html5_source(stream, options, encoding):
+    # Now try to see if and HTML5 parser is an alternative...
+    try:
+        from html5lib import HTMLParser, treebuilders
+    except ImportError:
+        # no alternative to the XHTML error, because HTML5 parser not available...
+        msg2 = 'XHTML Parsing error in input file: %s. Though parsing is lax, HTML5 parser not available. Try installing html5lib <http://code.google.com/p/html5lib>' 
+        raise RDFaError(msg2)
+
+    parser = HTMLParser(tree=treebuilders.getTreeBuilder("dom"))
+    parse = parser.parse
+    try:
+        dom = parse(stream, encoding)
+        # The host language has changed
+        options.host_language = HTML5_RDFA
+    except:
+        # Well, even the HTML5 parser could not do anything with this...
+        (type, value, traceback) = sys.exc_info()
+        msg2 = 'Parsing error in input file as HTML5: "%s"' % value
+        raise RDFaError, msg2
+
+    return dom