diff options
Diffstat (limited to 'creactistore/_templates/lib/rdflib_/plugins/parsers/rdfa/state.py')
-rw-r--r-- | creactistore/_templates/lib/rdflib_/plugins/parsers/rdfa/state.py | 434 |
1 files changed, 0 insertions, 434 deletions
diff --git a/creactistore/_templates/lib/rdflib_/plugins/parsers/rdfa/state.py b/creactistore/_templates/lib/rdflib_/plugins/parsers/rdfa/state.py deleted file mode 100644 index 0e6be08..0000000 --- a/creactistore/_templates/lib/rdflib_/plugins/parsers/rdfa/state.py +++ /dev/null @@ -1,434 +0,0 @@ -# -*- coding: utf-8 -*- -""" -Parser's execution context (a.k.a. state) object and handling. The state includes: - - - dictionary for namespaces. Keys are the namespace prefixes, values are RDFLib Namespace instances - - language, retrieved from C{@xml:lang} - - URI base, determined by <base> (or set explicitly). This is a little bit superfluous, because the current RDFa syntax does not make use of C{@xml:base}; ie, this could be a global value. But the structure is prepared to add C{@xml:base} easily, if needed. - - options, in the form of an L{Options<pyRdfa.Options>} instance - -The execution context object is also used to turn relative URI-s and CURIES into real URI references. - -@summary: RDFa core parser processing step -@requires: U{RDFLib package<http://rdflib_.net>} -@organization: U{World Wide Web Consortium<http://www.w3.org>} -@author: U{Ivan Herman<a href="http://www.w3.org/People/Ivan/">} -@license: This software is available for use under the -U{W3C® SOFTWARE NOTICE AND LICENSE<href="http://www.w3.org/Consortium/Legal/2002/copyright-software-20021231">} - -@var XHTML_PREFIX: prefix for the XHTML vocabulary namespace -@var XHTML_URI: URI prefix of the XHTML vocabulary -@var RDFa_PROFILE: the official RDFa profile URI -@var RDFa_VERSION: the official version string of RDFa -@var usual_protocols: list of "usual" protocols (used to generate warnings when CURIES are not protected) -@var _predefined_rel: list of predefined C{@rev} and C{@rel} values that should be mapped onto the XHTML vocabulary URI-s. -@var _predefined_property: list of predefined C{@property} values that should be mapped onto the XHTML vocabulary URI-s. (At present, this list is empty, but this has been an ongoing question in the group, so the I{mechanism} of checking is still there.) -@var __bnodes: dictionary of blank node names to real blank node -@var __empty_bnode: I{The} Bnode to be associated with the CURIE of the form "C{_:}". -""" - -from rdflib_.namespace import Namespace, RDF, RDFS -from rdflib_.term import BNode, URIRef -from rdflib_.plugins.parsers.rdfa.options import Options, GENERIC_XML, XHTML_RDFA, HTML5_RDFA - -import re -import random -import urlparse - -__all__ = ['ExecutionContext'] - -RDFa_PROFILE = "http://www.w3.org/1999/xhtml/vocab" -RDFa_VERSION = "XHTML+RDFa 1.0" -RDFa_PublicID = "-//W3C//DTD XHTML+RDFa 1.0//EN" -RDFa_SystemID = "http://www.w3.org/MarkUp/DTD/xhtml-rdfa-1.dtd" - -usual_protocols = ["http", "https", "mailto", "ftp", "urn", "gopher", "tel", "ldap", "doi", "news"] - -####Predefined @rel/@rev/@property values -# predefined values for the @rel and @rev values. These are considered to be part of a specific -# namespace, defined by the RDFa document. -# At the moment, there are no predefined @property values, but the code is there in case -# some will be defined -XHTML_PREFIX = "xhv" -XHTML_URI = "http://www.w3.org/1999/xhtml/vocab#" - -_predefined_rel = ['alternate', 'appendix', 'cite', 'bookmark', 'chapter', 'contents', -'copyright', 'glossary', 'help', 'icon', 'index', 'meta', 'next', 'p3pv1', 'prev', -'role', 'section', 'subsection', 'start', 'license', 'up', 'last', 'stylesheet', 'first', 'top'] - -_predefined_property = [] - -#### Managing blank nodes for CURIE-s -__bnodes = {} -__empty_bnode = BNode() -def _get_bnode_from_Curie(var): - """ - 'Var' gives the string after the coloumn in a CURIE of the form C{_:XXX}. If this variable has been used - before, then the corresponding BNode is returned; otherwise a new BNode is created and - associated to that value. - @param var: CURIE BNode identifier - @return: BNode - """ - if len(var) == 0: - return __empty_bnode - if var in __bnodes: - return __bnodes[var] - else: - retval = BNode() - __bnodes[var] = retval - return retval - -#### Quote URI-s -import urllib -# 'safe' characters for the URI quoting, ie, characters that can safely stay as they are. Other -# special characters are converted to their %.. equivalents for namespace prefixes -_unquotedChars = ':/\?=#' -_warnChars = [' ', '\n', '\r', '\t'] -def _quote(uri, options): - """ - 'quote' a URI, ie, exchange special characters for their '%..' equivalents. Some of the characters - may stay as they are (listed in L{_unquotedChars}. If one of the characters listed in L{_warnChars} - is also in the uri, an extra warning is also generated. - @param uri: URI - @param options: - @type options: L{Options<pyRdfa.Options>} - """ - suri = uri.strip() - for c in _warnChars: - if suri.find(c) != -1: - if options != None: - options.comment_graph.add_warning('Unusual character in uri:%s; possible error?' % suri) - break - return urllib.quote(suri, _unquotedChars) - - -#### Core Class definition -class ExecutionContext(object): - """State at a specific node, including the current set - of namespaces in the RDFLib sense, the - current language, and the base. The class is also used to interpret URI-s and CURIE-s to produce - URI references for RDFLib. - - @ivar options: reference to the overall options - @type ivar: L{Options.Options} - @ivar base: the 'base' URI - @ivar defaultNS: default namespace - @ivar lang: language tag (possibly None) - @ivar ns: dictionary of namespaces - @type ns: dictionary, each value is an RDFLib Namespace object - - """ - def __init__(self, node, graph, inherited_state=None, base="", options=None): - """ - @param node: the current DOM Node - @param graph: the RDFLib Graph - @keyword inherited_state: the state as inherited - from upper layers. This inherited_state is mixed with the state information - retrieved from the current node. - @type inherited_state: L{State.ExecutionContext} - @keyword base: string denoting the base URI for the specific node. This overrides the possible - base inherited from the upper layers. The - current XHTML+RDFa syntax does not allow the usage of C{@xml:base}, but SVG1.2 does, so this is - necessary for SVG (and other possible XML dialects that accept C{@xml:base}) - @keyword options: invocation option - @type options: L{Options<pyRdfa.Options>} - """ - #----------------------------------------------------------------- - # settling the base - # note that, strictly speaking, it is not necessary to add the base to the - # context, because there is only one place to set it (<base> element of the <header>). - # It is done because it is prepared for a possible future change in direction of - # accepting xml:base on each element. - # At the moment, it is invoked with a 'None' at the top level of parsing, that is - # when the <base> element is looked for. - if inherited_state: - self.base = inherited_state.base - self.options = inherited_state.options - # for generic XML versions the xml:base attribute should be handled - if self.options.host_language == GENERIC_XML and node.hasAttribute("xml:base"): - self.base = node.getAttribute("xml:base") - else: - # this is the branch called from the very top - self.base = "" - for bases in node.getElementsByTagName("base"): - if bases.hasAttribute("href"): - self.base = bases.getAttribute("href") - continue - if self.base == "": - self.base = base - - # this is just to play safe. I believe this branch should actually not happen... - if options == None: - from pyRdfa import Options - self.options = Options() - else: - self.options = options - - # xml:base is not part of XHTML+RDFa, but it is a valid setting for, say, SVG1.2 - if self.options.host_language == GENERIC_XML and node.hasAttribute("xml:base"): - self.base = node.getAttribute("xml:base") - - self.options.comment_graph.set_base_URI(URIRef(_quote(base, self.options))) - - # check the the presense of the @profile and or @version attribute for the RDFa profile... - # This whole branch is, however, irrelevant if the host language is a generic XML one (eg, SVG) - if self.options.host_language != GENERIC_XML: - doctype = None - try: - # I am not 100% sure the HTML5 minidom implementation has this, so let us just be - # cautious here... - doctype = node.ownerDocument.doctype - except: - pass - if doctype == None or not( doctype.publicId == RDFa_PublicID and doctype.systemId == RDFa_SystemID ): - # next level: check the version - html = node.ownerDocument.documentElement - if not( html.hasAttribute("version") and RDFa_VERSION == html.getAttribute("version") ): - # see if least the profile has been set - # Find the <head> element - head = None - for index in range(0, html.childNodes.length-1): - if html.childNodes.item(index).nodeName == "head": - head = html.childNodes.item(index) - break - if not( head != None and head.hasAttribute("profile") and RDFa_PROFILE in head.getAttribute("profile").strip().split() ): - if self.options.host_language == HTML5_RDFA: - self.options.comment_graph.add_info("RDFa profile or RFDa version has not been set (for a correct identification of RDFa). This is not a requirement for RDFa, but it is advised to use one of those nevertheless. Note that in the case of HTML5, the DOCTYPE setting may not work...") - else: - self.options.comment_graph.add_info("None of the RDFa DOCTYPE, RDFa profile, or RFDa version has been set (for a correct identification of RDFa). This is not a requirement for RDFa, but it is advised to use one of those nevertheless.") - - #----------------------------------------------------------------- - # Stripping the fragment ID from the base URI, as demanded by RFC 3986 - self.base = urlparse.urldefrag(self.base)[0] - - #----------------------------------------------------------------- - # Settling the language tags - # check first the lang or xml:lang attribute - # RDFa does not allow the lang attribute. HTML5 relies :-( on @lang; - # I just want to be prepared here... - if options != None and options.host_language == HTML5_RDFA and node.hasAttribute("lang"): - self.lang = node.getAttribute("lang") - if len(self.lang) == 0 : self.lang = None - elif node.hasAttribute("xml:lang"): - self.lang = node.getAttribute("xml:lang") - if len(self.lang) == 0 : self.lang = None - elif inherited_state: - self.lang = inherited_state.lang - else: - self.lang = None - - #----------------------------------------------------------------- - # Handling namespaces - # First get the local xmlns declarations/namespaces stuff. - dict = {} - for i in range(0, node.attributes.length): - attr = node.attributes.item(i) - if attr.name.find('xmlns:') == 0 : - # yep, there is a namespace setting - key = attr.localName - if key != "" : # exclude the top level xmlns setting... - if key == "_": - if warning: self.options.comment_graph.add_error("The '_' local CURIE prefix is reserved for blank nodes, and cannot be changed" ) - elif key.find(':') != -1: - if warning: self.options.comment_graph.add_error("The character ':' is not valid in a CURIE Prefix" ) - else : - # quote the URI, ie, convert special characters into %.. This is - # true, for example, for spaces - uri = _quote(attr.value, self.options) - # 1. create a new Namespace entry - ns = Namespace(uri) - # 2. 'bind' it in the current graph to - # get a nicer output - graph.bind(key, uri) - # 3. Add an entry to the dictionary - dict[key] = ns - - # See if anything has been collected at all. - # If not, the namespaces of the incoming state is - # taken over - self.ns = {} - if len(dict) == 0 and inherited_state: - self.ns = inherited_state.ns - else: - if inherited_state: - for k in inherited_state.ns : self.ns[k] = inherited_state.ns[k] - # copying the newly found namespace, possibly overwriting - # incoming values - for k in dict : self.ns[k] = dict[k] - else: - self.ns = dict - - # see if the xhtml core vocabulary has been set - self.xhtml_prefix = None - for key in self.ns.keys(): - if XHTML_URI == str(self.ns[key]): - self.xhtml_prefix = key - break - if self.xhtml_prefix == None: - if XHTML_PREFIX not in self.ns: - self.ns[XHTML_PREFIX] = Namespace(XHTML_URI) - self.xhtml_prefix = XHTML_PREFIX - else: - # the most disagreeable thing, the user has used - # the prefix for something else... - self.xhtml_prefix = XHTML_PREFIX + '_' + ("%d" % random.randint(1, 1000)) - self.ns[self.xhtml_prefix] = Namespace(XHTML_URI) - graph.bind(self.xhtml_prefix, XHTML_URI) - - # extra tricks for unusual usages... - # if the 'rdf' prefix is not used, it is artificially added... - if "rdf" not in self.ns: - self.ns["rdf"] = RDF - if "rdfs" not in self.ns: - self.ns["rdfs"] = RDFS - - # Final touch: setting the default namespace... - if node.hasAttribute("xmlns"): - self.defaultNS = node.getAttribute("xmlns") - elif inherited_state and inherited_state.defaultNS != None: - self.defaultNS = inherited_state.defaultNS - else: - self.defaultNS = None - - def _get_predefined_rels(self, val, warning): - """Get the predefined URI value for the C{@rel/@rev} attribute. - @param val: attribute name - @param warning: whether a warning should be generated or not - @type warning: boolean - @return: URIRef for the predefined URI (or None) - """ - vv = val.strip().lower() - if vv in _predefined_rel: - return self.ns[self.xhtml_prefix][vv] - else: - if warning: self.options.comment_graph.add_warning("invalid @rel/@rev value: '%s'" % val) - return None - - def _get_predefined_properties(self, val, warning): - """Get the predefined value for the C{@property} attribute. - @param val: attribute name - @param warning: whether a warning should be generated or not - @type warning: boolean - @return: URIRef for the predefined URI (or None) - """ - vv = val.strip().lower() - if vv in _predefined_property: - return self.ns[self.xhtml_prefix][vv] - else: - if warning: self.options.comment_graph.add_warning("invalid @property value: '%s'" % val) - return None - - def get_resource(self, val, rel=False, prop=False, warning=True): - """Get a resource for a CURIE. - The input argument is a CURIE; this is interpreted - via the current namespaces and the corresponding URI Reference is returned - @param val: string of the form "prefix:lname" - @keyword rel: whether the predefined C{@rel/@rev} values should also be interpreted - @keyword prop: whether the predefined C{@property} values should also be interpreted - @return: an RDFLib URIRef instance (or None) - """ - if val == "": - return None - elif val.find(":") != -1: - key = val.split(":", 1)[0] - lname = val.split(":", 1)[1] - if key == "_": - # A possible error: this method is invoked for property URI-s, which - # should not refer to a blank node. This case is checked and a possible - # error condition is handled - self.options.comment_graph.add_error("Blank node CURIE cannot be used in property position: _:%s" % lname) - return None - if key == "": - # This is the ":blabla" case - key = self.xhtml_prefix - else: - # if the resources correspond to a @rel or @rev or @property, then there - # may be one more possibility here, namely that it is one of the - # predefined values - if rel: - return self._get_predefined_rels(val, warning) - elif prop: - return self._get_predefined_properties(val, warning) - else: - self.options.comment_graph.add_warning("Invalid CURIE (without prefix): '%s'" % val) - return None - - if key not in self.ns: - self.options.comment_graph.add_error("CURIE used with non declared prefix: %s" % key) - return None - else: - if lname == "": - return URIRef(str(self.ns[key])) - else: - return self.ns[key][lname] - - def get_resources(self, val, rel=False, prop=False): - """Get a series of resources encoded in CURIE-s. - The input argument is a list of CURIE-s; these are interpreted - via the current namespaces and the corresponding URI References are returned. - @param val: strings of the form prefix':'lname, separated by space - @keyword rel: whether the predefined C{@rel/@rev} values should also be interpreted - @keyword prop: whether the predefined C{@property} values should also be interpreted - @return: a list of RDFLib URIRef instances (possibly empty) - """ - val.strip() - resources = [ self.get_resource(v, rel, prop) for v in val.split() if v != None ] - return [ r for r in resources if r != None ] - - def get_URI_ref(self, val): - """Create a URI RDFLib resource for a URI. - The input argument is a URI. It is checked whether it is a local - reference with a '#' or not. If yes, a URIRef combined with the - stored base value is returned. In both cases a URIRef for a full URI is created - and returned - @param val: URI string - @return: an RDFLib URIRef instance - """ - if val == "": - return URIRef(self.base) - elif val[0] == '[' and val[-1] == ']': - self.options.comment_graph.add_error("Illegal usage of CURIE: %s" % val) - return None - else: - return URIRef(urlparse.urljoin(self.base, val)) - - def get_Curie_ref(self, val): - """Create a URI RDFLib resource for a CURIE. - The input argument is a CURIE. This means that it is: - - either of the form [a:b] where a:b should be resolved as an - 'unprotected' CURIE, or - - it is a traditional URI (relative or absolute) - - If the second case the URI value is also compared to 'usual' URI - protocols ('http', 'https', 'ftp', etc) (see L{usual_protocols}). - If there is no match, a warning is generated (indeed, a frequent - mistake in authoring RDFa is to forget the '[' and ']' characters to - "protect" CURIE-s.) - - @param val: CURIE string - @return: an RDFLib URIRef instance - """ - if len(val) == 0: - return URIRef(self.base) - elif val[0] == "[": - if val[-1] == "]": - curie = val[1:-1] - # A possible Blank node reference should be separated here: - if len(curie) >= 2 and curie[0] == "_" and curie[1] == ":": - return _get_bnode_from_Curie(curie[2:]) - else: - return self.get_resource(val[1:-1]) - else: - # illegal CURIE... - self.options.comment_graph.add_error("Illegal CURIE: %s" % val) - return None - else: - # check the value, to see if an error may have been made... - # Usual protocol values in the URI - v = val.strip().lower() - protocol = urlparse.urlparse(val)[0] - if protocol != "" and protocol not in usual_protocols: - err = "Possible URI error with '%s'; the intention may have been to use a protected CURIE" % val - self.options.comment_graph.add_warning(err) - return self.get_URI_ref(val) - |