diff options
Diffstat (limited to 'creactistore/_templates/lib/rdflib/compare.py')
-rw-r--r-- | creactistore/_templates/lib/rdflib/compare.py | 257 |
1 files changed, 0 insertions, 257 deletions
diff --git a/creactistore/_templates/lib/rdflib/compare.py b/creactistore/_templates/lib/rdflib/compare.py deleted file mode 100644 index 3567ecb..0000000 --- a/creactistore/_templates/lib/rdflib/compare.py +++ /dev/null @@ -1,257 +0,0 @@ -# -*- coding: utf-8 -*- -""" -A collection of utilities for canonicalizing and inspecting graphs. - -Among other things, they solve of the problem of deterministic bnode -comparisons. - -Warning: the time to canonicalize bnodes may increase exponentially on larger -graphs. Use with care! - -Example of comparing two graphs:: - - >>> g1 = Graph().parse(format='n3', data=''' - ... @prefix : <http://example.org/ns#> . - ... <http://example.org> :rel - ... <http://example.org/same>, - ... [ :label "Same" ], - ... <http://example.org/a>, - ... [ :label "A" ] . - ... ''') - >>> g2 = Graph().parse(format='n3', data=''' - ... @prefix : <http://example.org/ns#> . - ... <http://example.org> :rel - ... <http://example.org/same>, - ... [ :label "Same" ], - ... <http://example.org/b>, - ... [ :label "B" ] . - ... ''') - >>> - >>> iso1 = to_isomorphic(g1) - >>> iso2 = to_isomorphic(g2) - -These are not isomorphic:: - - >>> iso1 == iso2 - False - -Diff the two graphs:: - - >>> in_both, in_first, in_second = graph_diff(iso1, iso2) - -Present in both:: - - >>> def dump_nt_sorted(g): - ... for l in sorted(g.serialize(format='nt').splitlines()): - ... if l: print(l.decode('ascii')) - - >>> dump_nt_sorted(in_both) - <http://example.org> <http://example.org/ns#rel> <http://example.org/same> . - <http://example.org> <http://example.org/ns#rel> _:cbcaabaaba17fecbc304a64f8edee4335e . - _:cbcaabaaba17fecbc304a64f8edee4335e <http://example.org/ns#label> "Same" . - -Only in first:: - - >>> dump_nt_sorted(in_first) - <http://example.org> <http://example.org/ns#rel> <http://example.org/a> . - <http://example.org> <http://example.org/ns#rel> _:cb124e4c6da0579f810c0ffe4eff485bd9 . - _:cb124e4c6da0579f810c0ffe4eff485bd9 <http://example.org/ns#label> "A" . - -Only in second:: - - >>> dump_nt_sorted(in_second) - <http://example.org> <http://example.org/ns#rel> <http://example.org/b> . - <http://example.org> <http://example.org/ns#rel> _:cb558f30e21ddfc05ca53108348338ade8 . - _:cb558f30e21ddfc05ca53108348338ade8 <http://example.org/ns#label> "B" . -""" - -# TODO: -# - Doesn't handle quads. -# - Add warning and/or safety mechanism before working on large graphs? -# - use this in existing Graph.isomorphic? - -__all__ = ['IsomorphicGraph', 'to_isomorphic', 'isomorphic', 'to_canonical_graph', 'graph_diff', 'similar'] - -from rdflib.graph import Graph, ConjunctiveGraph, ReadOnlyGraphAggregate -from rdflib.term import BNode -try: - import hashlib - md = hashlib.md5() -except ImportError: - # for Python << 2.5 - import md5 - md = md5.new() - -class IsomorphicGraph(ConjunctiveGraph): - """ - Ported from <http://www.w3.org/2001/sw/DataAccess/proto-tests/tools/rdfdiff.py> - (Sean B Palmer's RDF Graph Isomorphism Tester). - """ - - def __init__(self, **kwargs): - super(IsomorphicGraph, self).__init__(**kwargs) - - def __eq__(self, other): - """Graph isomorphism testing.""" - if not isinstance(other, IsomorphicGraph): - return False - elif len(self) != len(other): - return False - elif list(self) == list(other): - return True # TODO: really generally cheaper? - return self.internal_hash() == other.internal_hash() - - def __ne__(self, other): - """Negative graph isomorphism testing.""" - return not self.__eq__(other) - - def internal_hash(self): - """ - This is defined instead of __hash__ to avoid a circular recursion - scenario with the Memory store for rdflib which requires a hash lookup - in order to return a generator of triples. - """ - return _TripleCanonicalizer(self).to_hash() - - -class _TripleCanonicalizer(object): - - def __init__(self, graph, hashfunc=hash): - self.graph = graph - self.hashfunc = hashfunc - - def to_hash(self): - return self.hashfunc(tuple(sorted( - map(self.hashfunc, self.canonical_triples()) ))) - - def canonical_triples(self): - for triple in self.graph: - yield tuple(self._canonicalize_bnodes(triple)) - - def _canonicalize_bnodes(self, triple): - for term in triple: - if isinstance(term, BNode): - yield BNode(value="cb%s"%self._canonicalize(term)) - else: - yield term - - def _canonicalize(self, term, done=False): - return self.hashfunc(tuple(sorted(self._vhashtriples(term, done), - key=_hetero_tuple_key))) - - def _vhashtriples(self, term, done): - for triple in self.graph: - if term in triple: - yield tuple(self._vhashtriple(triple, term, done)) - - def _vhashtriple(self, triple, target_term, done): - for i, term in enumerate(triple): - if not isinstance(term, BNode): - yield term - elif done or (term == target_term): - yield i - else: - yield self._canonicalize(term, done=True) - -def _hetero_tuple_key(x): - "Sort like Python 2 - by name of type, then by value. Expects tuples." - return tuple((type(a).__name__, a) for a in x) - - -def to_isomorphic(graph): - if isinstance(graph, IsomorphicGraph): - return graph - return IsomorphicGraph(store=graph.store) - - -def isomorphic(graph1, graph2): - """ - Compare graph for equality. Uses an algorithm to compute unique hashes - which takes bnodes into account. - - Examples:: - - >>> g1 = Graph().parse(format='n3', data=''' - ... @prefix : <http://example.org/ns#> . - ... <http://example.org> :rel <http://example.org/a> . - ... <http://example.org> :rel <http://example.org/b> . - ... <http://example.org> :rel [ :label "A bnode." ] . - ... ''') - >>> g2 = Graph().parse(format='n3', data=''' - ... @prefix ns: <http://example.org/ns#> . - ... <http://example.org> ns:rel [ ns:label "A bnode." ] . - ... <http://example.org> ns:rel <http://example.org/b>, - ... <http://example.org/a> . - ... ''') - >>> isomorphic(g1, g2) - True - - >>> g3 = Graph().parse(format='n3', data=''' - ... @prefix : <http://example.org/ns#> . - ... <http://example.org> :rel <http://example.org/a> . - ... <http://example.org> :rel <http://example.org/b> . - ... <http://example.org> :rel <http://example.org/c> . - ... ''') - >>> isomorphic(g1, g3) - False - """ - return _TripleCanonicalizer(graph1).to_hash() == _TripleCanonicalizer(graph2).to_hash() - - -def to_canonical_graph(g1): - """ - Creates a canonical, read-only graph where all bnode id:s are based on - deterministical MD5 checksums, correlated with the graph contents. - """ - graph = Graph() - graph += _TripleCanonicalizer(g1, _md5_hash).canonical_triples() - return ReadOnlyGraphAggregate([graph]) - - -def graph_diff(g1, g2): - """ - Returns three sets of triples: "in both", "in first" and "in second". - """ - # bnodes have deterministic values in canonical graphs: - cg1 = to_canonical_graph(g1) - cg2 = to_canonical_graph(g2) - in_both = cg1*cg2 - in_first = cg1-cg2 - in_second = cg2-cg1 - return (in_both, in_first, in_second) - - -def _md5_hash(t): - h = md - for i in t: - if isinstance(i, tuple): - h.update(_md5_hash(i).encode('ascii')) - else: - h.update(unicode(i).encode("utf8")) - return h.hexdigest() - - -_MOCK_BNODE = BNode() - -def similar(g1, g2): - """ - Checks if the two graphs are "similar", by comparing sorted triples where - all bnodes have been replaced by a singular mock bnode (the - ``_MOCK_BNODE``). - - This is a much cheaper, but less reliable, alternative to the comparison - algorithm in ``isomorphic``. - """ - return all(t1 == t2 for (t1, t2) in _squashed_graphs_triples(g1, g2)) - -def _squashed_graphs_triples(g1, g2): - for (t1, t2) in zip(sorted(_squash_graph(g1)), sorted(_squash_graph(g2))): - yield t1, t2 - -def _squash_graph(graph): - return (_squash_bnodes(triple) for triple in graph) - -def _squash_bnodes(triple): - return tuple((isinstance(t, BNode) and _MOCK_BNODE) or t for t in triple) - - |