1from collections import Counter
2from rdflib import Graph, RDF, BNode, URIRef, Namespace, ConjunctiveGraph, Literal
3from rdflib.compare import to_isomorphic, to_canonical_graph
4
5import rdflib
6from rdflib.plugins.memory import IOMemory
7
8from six import text_type
9from io import StringIO
10
11
12def get_digest_value(rdf, mimetype):
13    graph = Graph()
14    graph.load(StringIO(rdf), format=mimetype)
15    stats = {}
16    ig = to_isomorphic(graph)
17    result = ig.graph_digest(stats)
18    print(stats)
19    return result
20
21
22def negative_graph_match_test():
23    '''Test of FRIR identifiers against tricky RDF graphs with blank nodes.'''
24    testInputs = [
25        [text_type('''@prefix : <http://example.org/ns#> .
26     <http://example.org> :rel
27         [ :label "Same" ].
28         '''),
29         text_type('''@prefix : <http://example.org/ns#> .
30     <http://example.org> :rel
31         [ :label "Same" ],
32         [ :label "Same" ].
33         '''),
34         False
35         ],
36        [text_type('''@prefix : <http://example.org/ns#> .
37     <http://example.org> :rel
38         <http://example.org/a>.
39         '''),
40         text_type('''@prefix : <http://example.org/ns#> .
41     <http://example.org> :rel
42         <http://example.org/a>,
43         <http://example.org/a>.
44         '''),
45         True
46         ],
47        [text_type('''@prefix : <http://example.org/ns#> .
48     :linear_two_step_symmetry_start :related [ :related [ :related :linear_two_step_symmatry_end]],
49                                              [ :related [ :related :linear_two_step_symmatry_end]].'''),
50         text_type('''@prefix : <http://example.org/ns#> .
51     :linear_two_step_symmetry_start :related [ :related [ :related :linear_two_step_symmatry_end]],
52                                              [ :related [ :related :linear_two_step_symmatry_end]].'''),
53         True
54         ],
55        [text_type('''@prefix : <http://example.org/ns#> .
56     _:a :rel [
57         :rel [
58         :rel [
59         :rel [
60           :rel _:a;
61          ];
62          ];
63          ];
64          ].'''),
65         text_type('''@prefix : <http://example.org/ns#> .
66     _:a :rel [
67         :rel [
68         :rel [
69         :rel [
70         :rel [
71           :rel _:a;
72          ];
73          ];
74          ];
75          ];
76          ].'''),
77         False
78         ],
79        # This test fails because the algorithm purposefully breaks the symmetry of symetric
80        [text_type('''@prefix : <http://example.org/ns#> .
81     _:a :rel [
82         :rel [
83         :rel [
84         :rel [
85           :rel _:a;
86          ];
87          ];
88          ];
89          ].'''),
90         text_type('''@prefix : <http://example.org/ns#> .
91     _:a :rel [
92         :rel [
93         :rel [
94         :rel [
95           :rel _:a;
96          ];
97          ];
98          ];
99          ].'''),
100         True
101         ],
102        [text_type('''@prefix : <http://example.org/ns#> .
103     _:a :rel [
104         :rel [
105         :label "foo";
106         :rel [
107         :rel [
108           :rel _:a;
109          ];
110          ];
111          ];
112          ].'''),
113         text_type('''@prefix : <http://example.org/ns#> .
114     _:a :rel [
115         :rel [
116         :rel [
117         :rel [
118           :rel _:a;
119          ];
120          ];
121          ];
122          ].'''),
123         False
124         ],
125        [text_type('''@prefix : <http://example.org/ns#> .
126     _:0001 :rel _:0003, _:0004.
127     _:0002 :rel _:0005, _:0006.
128     _:0003 :rel _:0001, _:0007, _:0010.
129     _:0004 :rel _:0001, _:0009, _:0008.
130     _:0005 :rel _:0002, _:0007, _:0009.
131     _:0006 :rel _:0002, _:0008, _:0010.
132     _:0007 :rel _:0003, _:0005, _:0009.
133     _:0008 :rel _:0004, _:0006, _:0010.
134     _:0009 :rel _:0004, _:0005, _:0007.
135     _:0010 :rel _:0003, _:0006, _:0008.
136     '''),
137         text_type('''@prefix : <http://example.org/ns#> .
138     _:0001 :rel _:0003, _:0004.
139     _:0002 :rel _:0005, _:0006.
140     _:0003 :rel _:0001, _:0007, _:0010.
141     _:0008 :rel _:0004, _:0006, _:0010.
142     _:0009 :rel _:0004, _:0005, _:0007.
143     _:0010 :rel _:0003, _:0006, _:0008.
144     _:0004 :rel _:0001, _:0009, _:0008.
145     _:0005 :rel _:0002, _:0007, _:0009.
146     _:0006 :rel _:0002, _:0008, _:0010.
147     _:0007 :rel _:0003, _:0005, _:0009.
148     '''),
149         True
150         ],
151    ]
152
153    def fn(rdf1, rdf2, identical):
154        digest1 = get_digest_value(rdf1, "text/turtle")
155        digest2 = get_digest_value(rdf2, "text/turtle")
156        print(rdf1)
157        print(digest1)
158        print(rdf2)
159        print(digest2)
160        assert (digest1 == digest2) == identical
161    for inputs in testInputs:
162        yield fn, inputs[0], inputs[1], inputs[2]
163
164
165def test_issue494_collapsing_bnodes():
166    """Test for https://github.com/RDFLib/rdflib/issues/494 collapsing BNodes"""
167    g = Graph()
168    g += [
169        (BNode('Na1a8fbcf755f41c1b5728f326be50994'),
170         RDF['object'],
171         URIRef(u'source')),
172        (BNode('Na1a8fbcf755f41c1b5728f326be50994'),
173         RDF['predicate'],
174         BNode('vcb3')),
175        (BNode('Na1a8fbcf755f41c1b5728f326be50994'),
176         RDF['subject'],
177         BNode('vcb2')),
178        (BNode('Na1a8fbcf755f41c1b5728f326be50994'),
179         RDF['type'],
180         RDF['Statement']),
181        (BNode('Na713b02f320d409c806ff0190db324f4'),
182         RDF['object'],
183         URIRef(u'target')),
184        (BNode('Na713b02f320d409c806ff0190db324f4'),
185         RDF['predicate'],
186         BNode('vcb0')),
187        (BNode('Na713b02f320d409c806ff0190db324f4'),
188         RDF['subject'],
189         URIRef(u'source')),
190        (BNode('Na713b02f320d409c806ff0190db324f4'),
191         RDF['type'],
192         RDF['Statement']),
193        (BNode('Ndb804ba690a64b3dbb9063c68d5e3550'),
194         RDF['object'],
195         BNode('vr0KcS4')),
196        (BNode('Ndb804ba690a64b3dbb9063c68d5e3550'),
197         RDF['predicate'],
198         BNode('vrby3JV')),
199        (BNode('Ndb804ba690a64b3dbb9063c68d5e3550'),
200         RDF['subject'],
201         URIRef(u'source')),
202        (BNode('Ndb804ba690a64b3dbb9063c68d5e3550'),
203         RDF['type'],
204         RDF['Statement']),
205        (BNode('Ndfc47fb1cd2d4382bcb8d5eb7835a636'),
206         RDF['object'],
207         URIRef(u'source')),
208        (BNode('Ndfc47fb1cd2d4382bcb8d5eb7835a636'),
209         RDF['predicate'],
210         BNode('vcb5')),
211        (BNode('Ndfc47fb1cd2d4382bcb8d5eb7835a636'),
212         RDF['subject'],
213         URIRef(u'target')),
214        (BNode('Ndfc47fb1cd2d4382bcb8d5eb7835a636'),
215         RDF['type'],
216         RDF['Statement']),
217        (BNode('Nec6864ef180843838aa9805bac835c98'),
218         RDF['object'],
219         URIRef(u'source')),
220        (BNode('Nec6864ef180843838aa9805bac835c98'),
221         RDF['predicate'],
222         BNode('vcb4')),
223        (BNode('Nec6864ef180843838aa9805bac835c98'),
224         RDF['subject'],
225         URIRef(u'source')),
226        (BNode('Nec6864ef180843838aa9805bac835c98'),
227         RDF['type'],
228         RDF['Statement']),
229    ]
230
231    # print('graph length: %d, nodes: %d' % (len(g), len(g.all_nodes())))
232    # print('triple_bnode degrees:')
233    # for triple_bnode in g.subjects(RDF['type'], RDF['Statement']):
234    #     print(len(list(g.triples([triple_bnode, None, None]))))
235    # print('all node degrees:')
236    g_node_degs = sorted([
237        len(list(g.triples([node, None, None])))
238        for node in g.all_nodes()
239    ], reverse=True)
240    # print(g_node_degs)
241
242    cg = to_canonical_graph(g)
243    # print('graph length: %d, nodes: %d' % (len(cg), len(cg.all_nodes())))
244    # print('triple_bnode degrees:')
245    # for triple_bnode in cg.subjects(RDF['type'], RDF['Statement']):
246    #     print(len(list(cg.triples([triple_bnode, None, None]))))
247    # print('all node degrees:')
248    cg_node_degs = sorted([
249        len(list(cg.triples([node, None, None])))
250        for node in cg.all_nodes()
251    ], reverse=True)
252    # print(cg_node_degs)
253
254    assert len(g) == len(cg), \
255        'canonicalization changed number of triples in graph'
256    assert len(g.all_nodes()) == len(cg.all_nodes()), \
257        'canonicalization changed number of nodes in graph'
258    assert len(list(g.subjects(RDF['type'], RDF['Statement']))) == \
259        len(list(cg.subjects(RDF['type'], RDF['Statement']))), \
260        'canonicalization changed number of statements'
261    assert g_node_degs == cg_node_degs, \
262        'canonicalization changed node degrees'
263
264    # counter for subject, predicate and object nodes
265    g_pos_counts = Counter(), Counter(), Counter()
266    for t in g:
267        for i, node in enumerate(t):
268            g_pos_counts[i][t] += 1
269    g_count_signature = [sorted(c.values()) for c in g_pos_counts]
270
271    cg = to_canonical_graph(g)
272    cg_pos_counts = Counter(), Counter(), Counter()
273    for t in cg:
274        for i, node in enumerate(t):
275            cg_pos_counts[i][t] += 1
276    cg_count_signature = [sorted(c.values()) for c in cg_pos_counts]
277
278    assert g_count_signature == cg_count_signature, \
279        'canonicalization changed node position counts'
280
281
282def test_issue682_signing_named_graphs():
283    ns = Namespace("http://love.com#")
284
285    mary = BNode()
286    john = URIRef("http://love.com/lovers/john#")
287
288    cmary = URIRef("http://love.com/lovers/mary#")
289    cjohn = URIRef("http://love.com/lovers/john#")
290
291    store = IOMemory()
292
293    g = ConjunctiveGraph(store=store)
294    g.bind("love", ns)
295
296    gmary = Graph(store=store, identifier=cmary)
297
298    gmary.add((mary, ns['hasName'], Literal("Mary")))
299    gmary.add((mary, ns['loves'], john))
300
301    gjohn = Graph(store=store, identifier=cjohn)
302    gjohn.add((john, ns['hasName'], Literal("John")))
303
304    ig = to_isomorphic(g)
305    igmary = to_isomorphic(gmary)
306
307    assert len(igmary) == len(gmary)
308    assert len(ig) == len(g)
309    assert len(igmary) < len(ig)
310    assert ig.graph_digest() != igmary.graph_digest()
311
312
313def test_issue725_collapsing_bnodes_2():
314    g = Graph()
315    g += [
316        (BNode('N0a76d42406b84fe4b8029d0a7fa04244'),
317         URIRef(u'http://www.w3.org/1999/02/22-rdf-syntax-ns#object'),
318         BNode('v2')),
319        (BNode('N0a76d42406b84fe4b8029d0a7fa04244'),
320         URIRef(u'http://www.w3.org/1999/02/22-rdf-syntax-ns#predicate'),
321         BNode('v0')),
322        (BNode('N0a76d42406b84fe4b8029d0a7fa04244'),
323         URIRef(u'http://www.w3.org/1999/02/22-rdf-syntax-ns#subject'),
324         URIRef(u'urn:gp_learner:fixed_var:target')),
325        (BNode('N0a76d42406b84fe4b8029d0a7fa04244'),
326         URIRef(u'http://www.w3.org/1999/02/22-rdf-syntax-ns#type'),
327         URIRef(u'http://www.w3.org/1999/02/22-rdf-syntax-ns#Statement')),
328        (BNode('N2f62af5936b94a8eb4b1e4bfa8e11d95'),
329         URIRef(u'http://www.w3.org/1999/02/22-rdf-syntax-ns#object'),
330         BNode('v1')),
331        (BNode('N2f62af5936b94a8eb4b1e4bfa8e11d95'),
332         URIRef(u'http://www.w3.org/1999/02/22-rdf-syntax-ns#predicate'),
333         BNode('v0')),
334        (BNode('N2f62af5936b94a8eb4b1e4bfa8e11d95'),
335         URIRef(u'http://www.w3.org/1999/02/22-rdf-syntax-ns#subject'),
336         URIRef(u'urn:gp_learner:fixed_var:target')),
337        (BNode('N2f62af5936b94a8eb4b1e4bfa8e11d95'),
338         URIRef(u'http://www.w3.org/1999/02/22-rdf-syntax-ns#type'),
339         URIRef(u'http://www.w3.org/1999/02/22-rdf-syntax-ns#Statement')),
340        (BNode('N5ae541f93e1d4e5880450b1bdceb6404'),
341         URIRef(u'http://www.w3.org/1999/02/22-rdf-syntax-ns#object'),
342         BNode('v5')),
343        (BNode('N5ae541f93e1d4e5880450b1bdceb6404'),
344         URIRef(u'http://www.w3.org/1999/02/22-rdf-syntax-ns#predicate'),
345         BNode('v4')),
346        (BNode('N5ae541f93e1d4e5880450b1bdceb6404'),
347         URIRef(u'http://www.w3.org/1999/02/22-rdf-syntax-ns#subject'),
348         URIRef(u'urn:gp_learner:fixed_var:target')),
349        (BNode('N5ae541f93e1d4e5880450b1bdceb6404'),
350         URIRef(u'http://www.w3.org/1999/02/22-rdf-syntax-ns#type'),
351         URIRef(u'http://www.w3.org/1999/02/22-rdf-syntax-ns#Statement')),
352        (BNode('N86ac7ca781f546ae939b8963895f672e'),
353         URIRef(u'http://www.w3.org/1999/02/22-rdf-syntax-ns#object'),
354         URIRef(u'urn:gp_learner:fixed_var:source')),
355        (BNode('N86ac7ca781f546ae939b8963895f672e'),
356         URIRef(u'http://www.w3.org/1999/02/22-rdf-syntax-ns#predicate'),
357         BNode('v0')),
358        (BNode('N86ac7ca781f546ae939b8963895f672e'),
359         URIRef(u'http://www.w3.org/1999/02/22-rdf-syntax-ns#subject'),
360         URIRef(u'urn:gp_learner:fixed_var:target')),
361        (BNode('N86ac7ca781f546ae939b8963895f672e'),
362         URIRef(u'http://www.w3.org/1999/02/22-rdf-syntax-ns#type'),
363         URIRef(u'http://www.w3.org/1999/02/22-rdf-syntax-ns#Statement')),
364        (BNode('Nac82b883ca3849b5ab6820b7ac15e490'),
365         URIRef(u'http://www.w3.org/1999/02/22-rdf-syntax-ns#object'),
366         BNode('v1')),
367        (BNode('Nac82b883ca3849b5ab6820b7ac15e490'),
368         URIRef(u'http://www.w3.org/1999/02/22-rdf-syntax-ns#predicate'),
369         BNode('v3')),
370        (BNode('Nac82b883ca3849b5ab6820b7ac15e490'),
371         URIRef(u'http://www.w3.org/1999/02/22-rdf-syntax-ns#subject'),
372         URIRef(u'urn:gp_learner:fixed_var:target')),
373        (BNode('Nac82b883ca3849b5ab6820b7ac15e490'),
374         URIRef(u'http://www.w3.org/1999/02/22-rdf-syntax-ns#type'),
375         URIRef(u'http://www.w3.org/1999/02/22-rdf-syntax-ns#Statement'))
376    ]
377
378    turtle = '''
379    @prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> .
380    @prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> .
381    @prefix xml: <http://www.w3.org/XML/1998/namespace> .
382    @prefix xsd: <http://www.w3.org/2001/XMLSchema#> .
383
384    [] a rdf:Statement ;
385        rdf:object [ ] ;
386        rdf:predicate _:v0 ;
387        rdf:subject <urn:gp_learner:fixed_var:target> .
388
389    [] a rdf:Statement ;
390        rdf:object _:v1 ;
391        rdf:predicate _:v0 ;
392        rdf:subject <urn:gp_learner:fixed_var:target> .
393
394    [] a rdf:Statement ;
395        rdf:object [ ] ;
396        rdf:predicate [ ] ;
397        rdf:subject <urn:gp_learner:fixed_var:target> .
398
399    [] a rdf:Statement ;
400        rdf:object <urn:gp_learner:fixed_var:source> ;
401        rdf:predicate _:v0 ;
402        rdf:subject <urn:gp_learner:fixed_var:target> .
403
404    [] a rdf:Statement ;
405        rdf:object _:v1 ;
406        rdf:predicate [ ] ;
407        rdf:subject <urn:gp_learner:fixed_var:target> .'''
408
409    # g = Graph()
410    # g.parse(data=turtle, format='turtle')
411
412    stats = {}
413    cg = rdflib.compare.to_canonical_graph(g, stats=stats)
414
415    # print ('graph g length: %d, nodes: %d' % (len(g), len(g.all_nodes())))
416    # print ('triple_bnode degrees:')
417    # for triple_bnode in g.subjects(rdflib.RDF['type'], rdflib.RDF['Statement']):
418    #     print (len(list(g.triples([triple_bnode, None, None]))))
419    # print ('all node out-degrees:')
420    # print (sorted(
421    #     [len(list(g.triples([node, None, None]))) for node in g.all_nodes()]))
422    # print ('all node in-degrees:')
423    # print (sorted(
424    #     [len(list(g.triples([None, None, node]))) for node in g.all_nodes()]))
425    # print(g.serialize(format='n3'))
426    #
427    # print ('graph cg length: %d, nodes: %d' % (len(cg), len(cg.all_nodes())))
428    # print ('triple_bnode degrees:')
429    # for triple_bnode in cg.subjects(rdflib.RDF['type'],
430    #                                 rdflib.RDF['Statement']):
431    #     print (len(list(cg.triples([triple_bnode, None, None]))))
432    # print ('all node out-degrees:')
433    # print (sorted(
434    #     [len(list(cg.triples([node, None, None]))) for node in cg.all_nodes()]))
435    # print ('all node in-degrees:')
436    # print (sorted(
437    #     [len(list(cg.triples([None, None, node]))) for node in cg.all_nodes()]))
438    # print(cg.serialize(format='n3'))
439
440    assert (len(g.all_nodes()) == len(cg.all_nodes()))
441
442    cg = to_canonical_graph(g)
443    assert len(g) == len(cg), \
444        'canonicalization changed number of triples in graph'
445    assert len(g.all_nodes()) == len(cg.all_nodes()), \
446        'canonicalization changed number of nodes in graph'
447    assert len(list(g.subjects(RDF['type'], RDF['Statement']))) == \
448        len(list(cg.subjects(RDF['type'], RDF['Statement']))), \
449        'canonicalization changed number of statements'
450
451    # counter for subject, predicate and object nodes
452    g_pos_counts = Counter(), Counter(), Counter()
453    for t in g:
454        for i, node in enumerate(t):
455            g_pos_counts[i][t] += 1
456    g_count_signature = [sorted(c.values()) for c in g_pos_counts]
457
458    cg_pos_counts = Counter(), Counter(), Counter()
459    for t in cg:
460        for i, node in enumerate(t):
461            cg_pos_counts[i][t] += 1
462    cg_count_signature = [sorted(c.values()) for c in cg_pos_counts]
463
464    assert g_count_signature == cg_count_signature, \
465        'canonicalization changed node position counts'
466