Web   ·   Wiki   ·   Activities   ·   Blog   ·   Lists   ·   Chat   ·   Meeting   ·   Bugs   ·   Git   ·   Translate   ·   Archive   ·   People   ·   Donate
summaryrefslogtreecommitdiffstats
path: root/creactistore/_templates/lib/rdflib/plugins/parsers/rdfa/literal.py
diff options
context:
space:
mode:
Diffstat (limited to 'creactistore/_templates/lib/rdflib/plugins/parsers/rdfa/literal.py')
-rw-r--r--creactistore/_templates/lib/rdflib/plugins/parsers/rdfa/literal.py180
1 files changed, 180 insertions, 0 deletions
diff --git a/creactistore/_templates/lib/rdflib/plugins/parsers/rdfa/literal.py b/creactistore/_templates/lib/rdflib/plugins/parsers/rdfa/literal.py
new file mode 100644
index 0000000..2ab9b44
--- /dev/null
+++ b/creactistore/_templates/lib/rdflib/plugins/parsers/rdfa/literal.py
@@ -0,0 +1,180 @@
+# -*- coding: utf-8 -*-
+"""
+Implementation of the Literal handling. Details of the algorithm are described on
+U{RDFa Task Force's wiki page<http://www.w3.org/2006/07/SWD/wiki/RDFa/LiteralObject>}.
+
+@summary: RDFa Literal generation
+@requires: U{RDFLib package<http://rdflib.net>}
+@organization: U{World Wide Web Consortium<http://www.w3.org>}
+@author: U{Ivan Herman<a href="http://www.w3.org/People/Ivan/">}
+@license: This software is available for use under the
+U{W3C® SOFTWARE NOTICE AND LICENSE<href="http://www.w3.org/Consortium/Legal/2002/copyright-software-20021231">}
+"""
+
+import re
+from rdflib.namespace import RDF
+from rdflib.term import Literal
+
+__all__ = ['generate_literal']
+
+XMLLiteral = RDF.XMLLiteral
+
+
+def __putBackEntities(str):
+ """Put 'back' entities for the '&', '<', and '>' characters, to produce kosher XML string.
+ Used by XML Literal
+ @param str: string to be converted
+ @return: string with entities
+ @rtype: string
+ """
+ return str.replace('&', '&amp;').replace('<', '&lt;').replace('>', '&gt;')
+
+#### The real meat...
+def generate_literal(node, graph, subject, state):
+ """Generate the literal the C{@property}, taking into account datatype, etc.
+ Note: this method is called only if the C{@property} is indeed present, no need to check.
+
+ This method is an encoding of the algorithm documented
+ U{task force's wiki page<http://www.w3.org/2006/07/SWD/wiki/RDFa/LiteralObject>}.
+
+ The method returns a value whether the literal is a 'normal' literal (regardless of its datatype)
+ or an XML Literal. The return value is True or False, respectively. This value is used to control whether
+ the parser should stop recursion. This also means that that if the literal is generated from @content,
+ the return value is False, regardless of the possible @datatype value.
+
+ @param node: DOM element node
+ @param graph: the (RDF) graph to add the properies to
+ @param subject: the RDFLib URIRef serving as a subject for the generated triples
+ @param state: the current state to be used for the CURIE-s
+ @type state: L{State.ExecutionContext}
+ @return: whether the literal is a 'normal' or an XML Literal (return value is True or False, respectively). Note that if the literal is generated from @content, the return value is False, regardless of the possible @datatype value.
+ @rtype: Boolean
+ """
+ def _get_literal(Pnode):
+ """
+ Get (recursively) the full text from a DOM Node.
+
+ @param Pnode: DOM Node
+ @return: string
+ """
+ rc = ""
+ for node in Pnode.childNodes:
+ if node.nodeType == node.TEXT_NODE:
+ rc = rc + node.data
+ elif node.nodeType == node.ELEMENT_NODE:
+ rc = rc + _get_literal(node)
+
+ # The decision of the group in February 2008 is not to normalize the result by default.
+ # This is reflected in the default value of the option
+ if state.options.space_preserve:
+ return rc
+ else:
+ return re.sub(r'(\r| |\n|\t)+', " ", rc).strip()
+ # end getLiteral
+
+ def _get_XML_literal(Pnode):
+ """
+ Get (recursively) the XML Literal content of a DOM Node. (Most of the processing is done
+ via a C{node.toxml} call of the xml minidom implementation.)
+
+ @param Pnode: DOM Node
+ @return: string
+ """
+ def collectPrefixes(prefixes, node):
+ def addPf(prefx, string):
+ pf = string.split(':')[0]
+ if pf != string and pf not in prefx : prefx.append(pf)
+ # edn addPf
+
+ # first the local name of the node
+ addPf(prefixes, node.tagName)
+ # get all the attributes and children
+ for child in node.childNodes:
+ if child.nodeType == node.ELEMENT_NODE:
+ collectPrefixes(prefixes, child)
+ elif child.nodeType == node.ATTRIBUTE_NODE:
+ addPf(prefixes, node.child.name)
+ # end collectPrefixes
+
+ rc = ""
+ prefixes = []
+ for node in Pnode.childNodes:
+ if node.nodeType == node.ELEMENT_NODE:
+ collectPrefixes(prefixes, node)
+
+ for node in Pnode.childNodes:
+ if node.nodeType == node.TEXT_NODE:
+ rc = rc + __putBackEntities(node.data)
+ elif node.nodeType == node.ELEMENT_NODE:
+ # Decorate the element with namespaces and lang values
+ for prefix in prefixes:
+ if prefix in state.ns and not node.hasAttribute("xmlns:%s" % prefix):
+ node.setAttribute("xmlns:%s" % prefix, "%s" % state.ns[prefix])
+ # Set the default namespace, if not done (and is available)
+ if not node.getAttribute("xmlns") and state.defaultNS != None:
+ node.setAttribute("xmlns", state.defaultNS)
+ # Get the lang, if necessary
+ if not node.getAttribute("xml:lang") and state.lang != None:
+ node.setAttribute("xml:lang", state.lang)
+ rc = rc + node.toxml()
+ return rc
+ # If XML Literals must be canonicalized for space, then this is the return line:
+ #return re.sub(r'(\r| |\n|\t)+', " ", rc).strip()
+ # end getXMLLiteral
+
+ # Most of the times the literal is a 'normal' one, ie, not an XML Literal
+ retval = True
+
+ # Get the Property URI-s
+ props = state.get_resources(node.getAttribute("property"), prop=True)
+
+ # Get, if exists, the value of @datatype, and figure out the language
+ datatype = None
+ dtset = False
+ lang = state.lang
+ if node.hasAttribute("datatype"):
+ dtset = True
+ dt = node.getAttribute("datatype")
+ if dt != "":
+ datatype = state.get_resource(dt)
+ lang = None
+
+ # The simple case: separate @content attribute
+ if node.hasAttribute("content"):
+ val = node.getAttribute("content")
+ object = Literal(node.getAttribute("content"), datatype=datatype, lang=lang)
+ # The value of datatype has been set, and the keyword paramaters take care of the rest
+ else:
+ # see if there *is* a datatype (even if it is empty!)
+ if dtset:
+ # yep. The Literal content is the pure text part of the current element:
+ # We have to check whether the specified datatype is, in fact, and
+ # explicit XML Literal
+ if datatype == XMLLiteral:
+ object = Literal(_get_XML_literal(node), datatype=XMLLiteral)
+ retval = False
+ else:
+ object = Literal(_get_literal(node), datatype=datatype, lang=lang)
+ else:
+ # no controlling @datatype. We have to see if there is markup in the contained
+ # element
+ if True in [ n.nodeType == node.ELEMENT_NODE for n in node.childNodes ]:
+ # yep, and XML Literal should be generated
+ object = Literal(_get_XML_literal(node), datatype=XMLLiteral)
+ retval = False
+ else:
+ val = _get_literal(node)
+ # At this point, there might be entities in the string that are returned as real characters by the dom
+ # implementation. That should be turned back
+ object = Literal(_get_literal(node), lang=lang)
+
+ # NOTE: rdflib<2.5 didn't equal Literal with lang="", hence this check
+ # proably always passed?
+ # All tests pass with this check removed; going with that..
+ ## The object may be empty, for example in an ill-defined <meta> element...
+ if True:#object != "":
+ for prop in props:
+ graph.add((subject, prop, object))
+
+ return retval
+