Web   ·   Wiki   ·   Activities   ·   Blog   ·   Lists   ·   Chat   ·   Meeting   ·   Bugs   ·   Git   ·   Translate   ·   Archive   ·   People   ·   Donate
summaryrefslogtreecommitdiffstats
path: root/creactistore/_templates/lib/rdflib/compare.py
diff options
context:
space:
mode:
Diffstat (limited to 'creactistore/_templates/lib/rdflib/compare.py')
-rw-r--r--creactistore/_templates/lib/rdflib/compare.py257
1 files changed, 257 insertions, 0 deletions
diff --git a/creactistore/_templates/lib/rdflib/compare.py b/creactistore/_templates/lib/rdflib/compare.py
new file mode 100644
index 0000000..3567ecb
--- /dev/null
+++ b/creactistore/_templates/lib/rdflib/compare.py
@@ -0,0 +1,257 @@
+# -*- coding: utf-8 -*-
+"""
+A collection of utilities for canonicalizing and inspecting graphs.
+
+Among other things, they solve of the problem of deterministic bnode
+comparisons.
+
+Warning: the time to canonicalize bnodes may increase exponentially on larger
+graphs. Use with care!
+
+Example of comparing two graphs::
+
+ >>> g1 = Graph().parse(format='n3', data='''
+ ... @prefix : <http://example.org/ns#> .
+ ... <http://example.org> :rel
+ ... <http://example.org/same>,
+ ... [ :label "Same" ],
+ ... <http://example.org/a>,
+ ... [ :label "A" ] .
+ ... ''')
+ >>> g2 = Graph().parse(format='n3', data='''
+ ... @prefix : <http://example.org/ns#> .
+ ... <http://example.org> :rel
+ ... <http://example.org/same>,
+ ... [ :label "Same" ],
+ ... <http://example.org/b>,
+ ... [ :label "B" ] .
+ ... ''')
+ >>>
+ >>> iso1 = to_isomorphic(g1)
+ >>> iso2 = to_isomorphic(g2)
+
+These are not isomorphic::
+
+ >>> iso1 == iso2
+ False
+
+Diff the two graphs::
+
+ >>> in_both, in_first, in_second = graph_diff(iso1, iso2)
+
+Present in both::
+
+ >>> def dump_nt_sorted(g):
+ ... for l in sorted(g.serialize(format='nt').splitlines()):
+ ... if l: print(l.decode('ascii'))
+
+ >>> dump_nt_sorted(in_both)
+ <http://example.org> <http://example.org/ns#rel> <http://example.org/same> .
+ <http://example.org> <http://example.org/ns#rel> _:cbcaabaaba17fecbc304a64f8edee4335e .
+ _:cbcaabaaba17fecbc304a64f8edee4335e <http://example.org/ns#label> "Same" .
+
+Only in first::
+
+ >>> dump_nt_sorted(in_first)
+ <http://example.org> <http://example.org/ns#rel> <http://example.org/a> .
+ <http://example.org> <http://example.org/ns#rel> _:cb124e4c6da0579f810c0ffe4eff485bd9 .
+ _:cb124e4c6da0579f810c0ffe4eff485bd9 <http://example.org/ns#label> "A" .
+
+Only in second::
+
+ >>> dump_nt_sorted(in_second)
+ <http://example.org> <http://example.org/ns#rel> <http://example.org/b> .
+ <http://example.org> <http://example.org/ns#rel> _:cb558f30e21ddfc05ca53108348338ade8 .
+ _:cb558f30e21ddfc05ca53108348338ade8 <http://example.org/ns#label> "B" .
+"""
+
+# TODO:
+# - Doesn't handle quads.
+# - Add warning and/or safety mechanism before working on large graphs?
+# - use this in existing Graph.isomorphic?
+
+__all__ = ['IsomorphicGraph', 'to_isomorphic', 'isomorphic', 'to_canonical_graph', 'graph_diff', 'similar']
+
+from rdflib.graph import Graph, ConjunctiveGraph, ReadOnlyGraphAggregate
+from rdflib.term import BNode
+try:
+ import hashlib
+ md = hashlib.md5()
+except ImportError:
+ # for Python << 2.5
+ import md5
+ md = md5.new()
+
+class IsomorphicGraph(ConjunctiveGraph):
+ """
+ Ported from <http://www.w3.org/2001/sw/DataAccess/proto-tests/tools/rdfdiff.py>
+ (Sean B Palmer's RDF Graph Isomorphism Tester).
+ """
+
+ def __init__(self, **kwargs):
+ super(IsomorphicGraph, self).__init__(**kwargs)
+
+ def __eq__(self, other):
+ """Graph isomorphism testing."""
+ if not isinstance(other, IsomorphicGraph):
+ return False
+ elif len(self) != len(other):
+ return False
+ elif list(self) == list(other):
+ return True # TODO: really generally cheaper?
+ return self.internal_hash() == other.internal_hash()
+
+ def __ne__(self, other):
+ """Negative graph isomorphism testing."""
+ return not self.__eq__(other)
+
+ def internal_hash(self):
+ """
+ This is defined instead of __hash__ to avoid a circular recursion
+ scenario with the Memory store for rdflib which requires a hash lookup
+ in order to return a generator of triples.
+ """
+ return _TripleCanonicalizer(self).to_hash()
+
+
+class _TripleCanonicalizer(object):
+
+ def __init__(self, graph, hashfunc=hash):
+ self.graph = graph
+ self.hashfunc = hashfunc
+
+ def to_hash(self):
+ return self.hashfunc(tuple(sorted(
+ map(self.hashfunc, self.canonical_triples()) )))
+
+ def canonical_triples(self):
+ for triple in self.graph:
+ yield tuple(self._canonicalize_bnodes(triple))
+
+ def _canonicalize_bnodes(self, triple):
+ for term in triple:
+ if isinstance(term, BNode):
+ yield BNode(value="cb%s"%self._canonicalize(term))
+ else:
+ yield term
+
+ def _canonicalize(self, term, done=False):
+ return self.hashfunc(tuple(sorted(self._vhashtriples(term, done),
+ key=_hetero_tuple_key)))
+
+ def _vhashtriples(self, term, done):
+ for triple in self.graph:
+ if term in triple:
+ yield tuple(self._vhashtriple(triple, term, done))
+
+ def _vhashtriple(self, triple, target_term, done):
+ for i, term in enumerate(triple):
+ if not isinstance(term, BNode):
+ yield term
+ elif done or (term == target_term):
+ yield i
+ else:
+ yield self._canonicalize(term, done=True)
+
+def _hetero_tuple_key(x):
+ "Sort like Python 2 - by name of type, then by value. Expects tuples."
+ return tuple((type(a).__name__, a) for a in x)
+
+
+def to_isomorphic(graph):
+ if isinstance(graph, IsomorphicGraph):
+ return graph
+ return IsomorphicGraph(store=graph.store)
+
+
+def isomorphic(graph1, graph2):
+ """
+ Compare graph for equality. Uses an algorithm to compute unique hashes
+ which takes bnodes into account.
+
+ Examples::
+
+ >>> g1 = Graph().parse(format='n3', data='''
+ ... @prefix : <http://example.org/ns#> .
+ ... <http://example.org> :rel <http://example.org/a> .
+ ... <http://example.org> :rel <http://example.org/b> .
+ ... <http://example.org> :rel [ :label "A bnode." ] .
+ ... ''')
+ >>> g2 = Graph().parse(format='n3', data='''
+ ... @prefix ns: <http://example.org/ns#> .
+ ... <http://example.org> ns:rel [ ns:label "A bnode." ] .
+ ... <http://example.org> ns:rel <http://example.org/b>,
+ ... <http://example.org/a> .
+ ... ''')
+ >>> isomorphic(g1, g2)
+ True
+
+ >>> g3 = Graph().parse(format='n3', data='''
+ ... @prefix : <http://example.org/ns#> .
+ ... <http://example.org> :rel <http://example.org/a> .
+ ... <http://example.org> :rel <http://example.org/b> .
+ ... <http://example.org> :rel <http://example.org/c> .
+ ... ''')
+ >>> isomorphic(g1, g3)
+ False
+ """
+ return _TripleCanonicalizer(graph1).to_hash() == _TripleCanonicalizer(graph2).to_hash()
+
+
+def to_canonical_graph(g1):
+ """
+ Creates a canonical, read-only graph where all bnode id:s are based on
+ deterministical MD5 checksums, correlated with the graph contents.
+ """
+ graph = Graph()
+ graph += _TripleCanonicalizer(g1, _md5_hash).canonical_triples()
+ return ReadOnlyGraphAggregate([graph])
+
+
+def graph_diff(g1, g2):
+ """
+ Returns three sets of triples: "in both", "in first" and "in second".
+ """
+ # bnodes have deterministic values in canonical graphs:
+ cg1 = to_canonical_graph(g1)
+ cg2 = to_canonical_graph(g2)
+ in_both = cg1*cg2
+ in_first = cg1-cg2
+ in_second = cg2-cg1
+ return (in_both, in_first, in_second)
+
+
+def _md5_hash(t):
+ h = md
+ for i in t:
+ if isinstance(i, tuple):
+ h.update(_md5_hash(i).encode('ascii'))
+ else:
+ h.update(unicode(i).encode("utf8"))
+ return h.hexdigest()
+
+
+_MOCK_BNODE = BNode()
+
+def similar(g1, g2):
+ """
+ Checks if the two graphs are "similar", by comparing sorted triples where
+ all bnodes have been replaced by a singular mock bnode (the
+ ``_MOCK_BNODE``).
+
+ This is a much cheaper, but less reliable, alternative to the comparison
+ algorithm in ``isomorphic``.
+ """
+ return all(t1 == t2 for (t1, t2) in _squashed_graphs_triples(g1, g2))
+
+def _squashed_graphs_triples(g1, g2):
+ for (t1, t2) in zip(sorted(_squash_graph(g1)), sorted(_squash_graph(g2))):
+ yield t1, t2
+
+def _squash_graph(graph):
+ return (_squash_bnodes(triple) for triple in graph)
+
+def _squash_bnodes(triple):
+ return tuple((isinstance(t, BNode) and _MOCK_BNODE) or t for t in triple)
+
+