Web   ·   Wiki   ·   Activities   ·   Blog   ·   Lists   ·   Chat   ·   Meeting   ·   Bugs   ·   Git   ·   Translate   ·   Archive   ·   People   ·   Donate
summaryrefslogtreecommitdiffstats
path: root/creactistore/_templates/lib/rdflib/plugins/parsers/rdfa/parse.py
blob: d5b411fe199697f03cb985511485a5e15f7ae7f1 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
# -*- coding: utf-8 -*-
"""
The core parsing function of RDFa. Some details are
put into other modules to make it clearer to update/modify (eg, generation of literals, or managing the current state).

@summary: RDFa core parser processing step
@requires: U{RDFLib package<http://rdflib.net>}
@organization: U{World Wide Web Consortium<http://www.w3.org>}
@author: U{Ivan Herman<a href="http://www.w3.org/People/Ivan/">}
@license: This software is available for use under the
U{W3C® SOFTWARE NOTICE AND LICENSE<href="http://www.w3.org/Consortium/Legal/2002/copyright-software-20021231">}
"""

from rdflib.term import BNode, URIRef
from rdflib.namespace import RDF

from rdflib.plugins.parsers.rdfa.state import ExecutionContext
from rdflib.plugins.parsers.rdfa.literal import generate_literal
from rdflib.plugins.parsers.rdfa.embeddedrdf import handle_embeddedRDF
from rdflib.plugins.parsers.rdfa.options import GENERIC_XML, XHTML_RDFA, HTML5_RDFA

__all__ = ['parse_one_node']

def parse_one_node(node, graph, parent_object, incoming_state, parent_incomplete_triples):
    """The (recursive) step of handling a single node. See the
    U{RDFa syntax document<http://www.w3.org/TR/rdfa-syntax>} for further details.

    @param node: the DOM node to handle
    @param graph: the RDF graph
    @type graph: RDFLib's Graph object instance
    @param parent_object: the parent's object, as an RDFLib URIRef
    @param incoming_state: the inherited state (namespaces, lang, etc)
    @type incoming_state: L{State.ExecutionContext}
    @param parent_incomplete_triples: list of hanging triples (the missing resource set to None) to be handled (or not)
    by the current node.
    @return: whether the caller has to complete it's parent's incomplete triples
    @rtype: Boolean
    """
    def _get_resources_for_attr(attr):
        """Get a series of resources encoded via CURIE-s for an attribute on a specific node.
        @param attr: the name of the attribute
        @return: a list of RDFLib URIRef instances
        """
        if not node.hasAttribute(attr):
            return []
        else:
            rel  = (attr == "rel") or (attr == "rev")
            prop = (attr == "property")
            return state.get_resources(node.getAttribute(attr), rel, prop)

    # Update the state. This means, for example, the possible local settings of
    # namespaces and lang
    state = ExecutionContext(node, graph, inherited_state=incoming_state)

    #---------------------------------------------------------------------------------
    # Handle the special case for embedded RDF, eg, in SVG1.2.
    # This may add some triples to the target graph that does not originate from RDFa parsing
    # If the function return TRUE, that means that an rdf:RDF has been found. No
    # RDFa parsing should be done on that subtree, so we simply return...
    if state.options.host_language == GENERIC_XML and node.nodeType == node.ELEMENT_NODE and handle_embeddedRDF(node, graph, state):
        return

    #---------------------------------------------------------------------------------
    # First, let us check whether there is anything to do at all. Ie,
    # whether there is any relevant RDFa specific attribute on the element
    #
    if not _has_one_of_attributes(node, "href", "resource", "about", "property", "rel", "rev", "typeof", "src"):
        # nop, there is nothing to do here, just go down the tree and return...
        for n in node.childNodes:
            if n.nodeType == node.ELEMENT_NODE : parse_one_node(n, graph, parent_object, state, parent_incomplete_triples)
        return


    #-----------------------------------------------------------------
    # The goal is to establish the subject and object for local processing
    # The behaviour is slightly different depending on the presense or not
    # of the @rel/@rev attributes
    current_subject = None
    current_object  = None

    if _has_one_of_attributes(node, "rel", "rev"):
        # in this case there is the notion of 'left' and 'right' of @rel/@rev
        # in establishing the new Subject and the objectResource

        # set first the subject
        if node.hasAttribute("about"):
            current_subject = state.get_Curie_ref(node.getAttribute("about"))
        elif node.hasAttribute("src"):
            current_subject = state.get_URI_ref(node.getAttribute("src"))
        elif node.hasAttribute("typeof"):
            current_subject = BNode()

        # get_URI_ref may return None in case of an illegal Curie, so
        # we have to be careful here, not use only an 'else'
        if current_subject == None:
            current_subject = parent_object

        # set the object resource
        if node.hasAttribute("resource"):
            current_object = state.get_Curie_ref(node.getAttribute("resource"))
        elif node.hasAttribute("href"):
            current_object = state.get_URI_ref(node.getAttribute("href"))
    else:
        # in this case all the various 'resource' setting attributes
        # behave identically, except that their value might be different
        # in terms of CURIE-s and they also have their own priority, of course
        if node.hasAttribute("about"):
            current_subject = state.get_Curie_ref(node.getAttribute("about"))
        elif node.hasAttribute("src"):
            current_subject = state.get_URI_ref(node.getAttribute("src"))
        elif node.hasAttribute("resource"):
            current_subject = state.get_Curie_ref(node.getAttribute("resource"))
        elif node.hasAttribute("href"):
            current_subject = state.get_URI_ref(node.getAttribute("href"))
        elif node.hasAttribute("typeof"):
            current_subject = BNode()

        # get_URI_ref may return None in case of an illegal Curie, so
        # we have to be careful here, not use only an 'else'
        if current_subject == None:
            current_subject = parent_object

        # in this case no non-literal triples will be generated, so the
        # only role of the current_objectResource is to be transferred to
        # the children node
        current_object = current_subject

    # ---------------------------------------------------------------------
    # The possible typeof indicates a number of type statements on the newSubject
    for defined_type in _get_resources_for_attr("typeof"):
        graph.add((current_subject, RDF.type, defined_type))

    # ---------------------------------------------------------------------
    # In case of @rel/@rev, either triples or incomplete triples are generated
    # the (possible) incomplete triples are collected, to be forwarded to the children
    incomplete_triples  = []
    for prop in _get_resources_for_attr("rel"):
        theTriple = (current_subject, prop, current_object)
        if current_object != None:
            graph.add(theTriple)
        else:
            incomplete_triples.append(theTriple)
    for prop in _get_resources_for_attr("rev"):
        theTriple = (current_object, prop, current_subject)
        if current_object != None:
            graph.add(theTriple)
        else:
            incomplete_triples.append(theTriple)

    # ----------------------------------------------------------------------
    # Generation of the literal values. The newSubject is the subject
    # A particularity of property is that it stops the parsing down the DOM tree if an XML Literal is generated,
    # because everything down there is part of the generated literal. For this purpose the recurse flag is set (and used later
    # in the parsing process).
    if node.hasAttribute("property"):
        # Generate the literal. It has been put it into a separate module to make it more managable
        # the overall return value should be set to true if any valid triple has been generated
        recurse = generate_literal(node, graph, current_subject, state)
    else:
        recurse = True

    # ----------------------------------------------------------------------
    # Setting the current object to a bnode is setting up a possible resource
    # for the incomplete triples downwards
    if current_object == None:
        object_to_children = BNode()
    else:
        object_to_children = current_object

    #-----------------------------------------------------------------------
    # Here is the recursion step for all the children
    if recurse:
        for n in node.childNodes:
            if n.nodeType == node.ELEMENT_NODE:
                parse_one_node(n, graph, object_to_children, state, incomplete_triples)

    # ---------------------------------------------------------------------
    # At this point, the parent's incomplete triples may be completed
    for s, p, o in parent_incomplete_triples:
        if s == None: s = current_subject
        if o == None: o = current_subject
        graph.add((s, p, o))

    # -------------------------------------------------------------------
    # This should be it...
    # -------------------------------------------------------------------
    return


def _has_one_of_attributes(node, *args):
    """
    Check whether one of the listed attributes is present on a (DOM) node.
    @param node: DOM element node
    @param args: possible attribute names
    @return: True or False
    @rtype: Boolean
    """
    return True in [ node.hasAttribute(attr) for attr in args ]