1from collections import Counter 2from rdflib import Graph, RDF, BNode, URIRef, Namespace, ConjunctiveGraph, Literal 3from rdflib.compare import to_isomorphic, to_canonical_graph 4 5import rdflib 6from rdflib.plugins.memory import IOMemory 7 8from six import text_type 9from io import StringIO 10 11 12def get_digest_value(rdf, mimetype): 13 graph = Graph() 14 graph.load(StringIO(rdf), format=mimetype) 15 stats = {} 16 ig = to_isomorphic(graph) 17 result = ig.graph_digest(stats) 18 print(stats) 19 return result 20 21 22def negative_graph_match_test(): 23 '''Test of FRIR identifiers against tricky RDF graphs with blank nodes.''' 24 testInputs = [ 25 [text_type('''@prefix : <http://example.org/ns#> . 26 <http://example.org> :rel 27 [ :label "Same" ]. 28 '''), 29 text_type('''@prefix : <http://example.org/ns#> . 30 <http://example.org> :rel 31 [ :label "Same" ], 32 [ :label "Same" ]. 33 '''), 34 False 35 ], 36 [text_type('''@prefix : <http://example.org/ns#> . 37 <http://example.org> :rel 38 <http://example.org/a>. 39 '''), 40 text_type('''@prefix : <http://example.org/ns#> . 41 <http://example.org> :rel 42 <http://example.org/a>, 43 <http://example.org/a>. 44 '''), 45 True 46 ], 47 [text_type('''@prefix : <http://example.org/ns#> . 48 :linear_two_step_symmetry_start :related [ :related [ :related :linear_two_step_symmatry_end]], 49 [ :related [ :related :linear_two_step_symmatry_end]].'''), 50 text_type('''@prefix : <http://example.org/ns#> . 51 :linear_two_step_symmetry_start :related [ :related [ :related :linear_two_step_symmatry_end]], 52 [ :related [ :related :linear_two_step_symmatry_end]].'''), 53 True 54 ], 55 [text_type('''@prefix : <http://example.org/ns#> . 56 _:a :rel [ 57 :rel [ 58 :rel [ 59 :rel [ 60 :rel _:a; 61 ]; 62 ]; 63 ]; 64 ].'''), 65 text_type('''@prefix : <http://example.org/ns#> . 66 _:a :rel [ 67 :rel [ 68 :rel [ 69 :rel [ 70 :rel [ 71 :rel _:a; 72 ]; 73 ]; 74 ]; 75 ]; 76 ].'''), 77 False 78 ], 79 # This test fails because the algorithm purposefully breaks the symmetry of symetric 80 [text_type('''@prefix : <http://example.org/ns#> . 81 _:a :rel [ 82 :rel [ 83 :rel [ 84 :rel [ 85 :rel _:a; 86 ]; 87 ]; 88 ]; 89 ].'''), 90 text_type('''@prefix : <http://example.org/ns#> . 91 _:a :rel [ 92 :rel [ 93 :rel [ 94 :rel [ 95 :rel _:a; 96 ]; 97 ]; 98 ]; 99 ].'''), 100 True 101 ], 102 [text_type('''@prefix : <http://example.org/ns#> . 103 _:a :rel [ 104 :rel [ 105 :label "foo"; 106 :rel [ 107 :rel [ 108 :rel _:a; 109 ]; 110 ]; 111 ]; 112 ].'''), 113 text_type('''@prefix : <http://example.org/ns#> . 114 _:a :rel [ 115 :rel [ 116 :rel [ 117 :rel [ 118 :rel _:a; 119 ]; 120 ]; 121 ]; 122 ].'''), 123 False 124 ], 125 [text_type('''@prefix : <http://example.org/ns#> . 126 _:0001 :rel _:0003, _:0004. 127 _:0002 :rel _:0005, _:0006. 128 _:0003 :rel _:0001, _:0007, _:0010. 129 _:0004 :rel _:0001, _:0009, _:0008. 130 _:0005 :rel _:0002, _:0007, _:0009. 131 _:0006 :rel _:0002, _:0008, _:0010. 132 _:0007 :rel _:0003, _:0005, _:0009. 133 _:0008 :rel _:0004, _:0006, _:0010. 134 _:0009 :rel _:0004, _:0005, _:0007. 135 _:0010 :rel _:0003, _:0006, _:0008. 136 '''), 137 text_type('''@prefix : <http://example.org/ns#> . 138 _:0001 :rel _:0003, _:0004. 139 _:0002 :rel _:0005, _:0006. 140 _:0003 :rel _:0001, _:0007, _:0010. 141 _:0008 :rel _:0004, _:0006, _:0010. 142 _:0009 :rel _:0004, _:0005, _:0007. 143 _:0010 :rel _:0003, _:0006, _:0008. 144 _:0004 :rel _:0001, _:0009, _:0008. 145 _:0005 :rel _:0002, _:0007, _:0009. 146 _:0006 :rel _:0002, _:0008, _:0010. 147 _:0007 :rel _:0003, _:0005, _:0009. 148 '''), 149 True 150 ], 151 ] 152 153 def fn(rdf1, rdf2, identical): 154 digest1 = get_digest_value(rdf1, "text/turtle") 155 digest2 = get_digest_value(rdf2, "text/turtle") 156 print(rdf1) 157 print(digest1) 158 print(rdf2) 159 print(digest2) 160 assert (digest1 == digest2) == identical 161 for inputs in testInputs: 162 yield fn, inputs[0], inputs[1], inputs[2] 163 164 165def test_issue494_collapsing_bnodes(): 166 """Test for https://github.com/RDFLib/rdflib/issues/494 collapsing BNodes""" 167 g = Graph() 168 g += [ 169 (BNode('Na1a8fbcf755f41c1b5728f326be50994'), 170 RDF['object'], 171 URIRef(u'source')), 172 (BNode('Na1a8fbcf755f41c1b5728f326be50994'), 173 RDF['predicate'], 174 BNode('vcb3')), 175 (BNode('Na1a8fbcf755f41c1b5728f326be50994'), 176 RDF['subject'], 177 BNode('vcb2')), 178 (BNode('Na1a8fbcf755f41c1b5728f326be50994'), 179 RDF['type'], 180 RDF['Statement']), 181 (BNode('Na713b02f320d409c806ff0190db324f4'), 182 RDF['object'], 183 URIRef(u'target')), 184 (BNode('Na713b02f320d409c806ff0190db324f4'), 185 RDF['predicate'], 186 BNode('vcb0')), 187 (BNode('Na713b02f320d409c806ff0190db324f4'), 188 RDF['subject'], 189 URIRef(u'source')), 190 (BNode('Na713b02f320d409c806ff0190db324f4'), 191 RDF['type'], 192 RDF['Statement']), 193 (BNode('Ndb804ba690a64b3dbb9063c68d5e3550'), 194 RDF['object'], 195 BNode('vr0KcS4')), 196 (BNode('Ndb804ba690a64b3dbb9063c68d5e3550'), 197 RDF['predicate'], 198 BNode('vrby3JV')), 199 (BNode('Ndb804ba690a64b3dbb9063c68d5e3550'), 200 RDF['subject'], 201 URIRef(u'source')), 202 (BNode('Ndb804ba690a64b3dbb9063c68d5e3550'), 203 RDF['type'], 204 RDF['Statement']), 205 (BNode('Ndfc47fb1cd2d4382bcb8d5eb7835a636'), 206 RDF['object'], 207 URIRef(u'source')), 208 (BNode('Ndfc47fb1cd2d4382bcb8d5eb7835a636'), 209 RDF['predicate'], 210 BNode('vcb5')), 211 (BNode('Ndfc47fb1cd2d4382bcb8d5eb7835a636'), 212 RDF['subject'], 213 URIRef(u'target')), 214 (BNode('Ndfc47fb1cd2d4382bcb8d5eb7835a636'), 215 RDF['type'], 216 RDF['Statement']), 217 (BNode('Nec6864ef180843838aa9805bac835c98'), 218 RDF['object'], 219 URIRef(u'source')), 220 (BNode('Nec6864ef180843838aa9805bac835c98'), 221 RDF['predicate'], 222 BNode('vcb4')), 223 (BNode('Nec6864ef180843838aa9805bac835c98'), 224 RDF['subject'], 225 URIRef(u'source')), 226 (BNode('Nec6864ef180843838aa9805bac835c98'), 227 RDF['type'], 228 RDF['Statement']), 229 ] 230 231 # print('graph length: %d, nodes: %d' % (len(g), len(g.all_nodes()))) 232 # print('triple_bnode degrees:') 233 # for triple_bnode in g.subjects(RDF['type'], RDF['Statement']): 234 # print(len(list(g.triples([triple_bnode, None, None])))) 235 # print('all node degrees:') 236 g_node_degs = sorted([ 237 len(list(g.triples([node, None, None]))) 238 for node in g.all_nodes() 239 ], reverse=True) 240 # print(g_node_degs) 241 242 cg = to_canonical_graph(g) 243 # print('graph length: %d, nodes: %d' % (len(cg), len(cg.all_nodes()))) 244 # print('triple_bnode degrees:') 245 # for triple_bnode in cg.subjects(RDF['type'], RDF['Statement']): 246 # print(len(list(cg.triples([triple_bnode, None, None])))) 247 # print('all node degrees:') 248 cg_node_degs = sorted([ 249 len(list(cg.triples([node, None, None]))) 250 for node in cg.all_nodes() 251 ], reverse=True) 252 # print(cg_node_degs) 253 254 assert len(g) == len(cg), \ 255 'canonicalization changed number of triples in graph' 256 assert len(g.all_nodes()) == len(cg.all_nodes()), \ 257 'canonicalization changed number of nodes in graph' 258 assert len(list(g.subjects(RDF['type'], RDF['Statement']))) == \ 259 len(list(cg.subjects(RDF['type'], RDF['Statement']))), \ 260 'canonicalization changed number of statements' 261 assert g_node_degs == cg_node_degs, \ 262 'canonicalization changed node degrees' 263 264 # counter for subject, predicate and object nodes 265 g_pos_counts = Counter(), Counter(), Counter() 266 for t in g: 267 for i, node in enumerate(t): 268 g_pos_counts[i][t] += 1 269 g_count_signature = [sorted(c.values()) for c in g_pos_counts] 270 271 cg = to_canonical_graph(g) 272 cg_pos_counts = Counter(), Counter(), Counter() 273 for t in cg: 274 for i, node in enumerate(t): 275 cg_pos_counts[i][t] += 1 276 cg_count_signature = [sorted(c.values()) for c in cg_pos_counts] 277 278 assert g_count_signature == cg_count_signature, \ 279 'canonicalization changed node position counts' 280 281 282def test_issue682_signing_named_graphs(): 283 ns = Namespace("http://love.com#") 284 285 mary = BNode() 286 john = URIRef("http://love.com/lovers/john#") 287 288 cmary = URIRef("http://love.com/lovers/mary#") 289 cjohn = URIRef("http://love.com/lovers/john#") 290 291 store = IOMemory() 292 293 g = ConjunctiveGraph(store=store) 294 g.bind("love", ns) 295 296 gmary = Graph(store=store, identifier=cmary) 297 298 gmary.add((mary, ns['hasName'], Literal("Mary"))) 299 gmary.add((mary, ns['loves'], john)) 300 301 gjohn = Graph(store=store, identifier=cjohn) 302 gjohn.add((john, ns['hasName'], Literal("John"))) 303 304 ig = to_isomorphic(g) 305 igmary = to_isomorphic(gmary) 306 307 assert len(igmary) == len(gmary) 308 assert len(ig) == len(g) 309 assert len(igmary) < len(ig) 310 assert ig.graph_digest() != igmary.graph_digest() 311 312 313def test_issue725_collapsing_bnodes_2(): 314 g = Graph() 315 g += [ 316 (BNode('N0a76d42406b84fe4b8029d0a7fa04244'), 317 URIRef(u'http://www.w3.org/1999/02/22-rdf-syntax-ns#object'), 318 BNode('v2')), 319 (BNode('N0a76d42406b84fe4b8029d0a7fa04244'), 320 URIRef(u'http://www.w3.org/1999/02/22-rdf-syntax-ns#predicate'), 321 BNode('v0')), 322 (BNode('N0a76d42406b84fe4b8029d0a7fa04244'), 323 URIRef(u'http://www.w3.org/1999/02/22-rdf-syntax-ns#subject'), 324 URIRef(u'urn:gp_learner:fixed_var:target')), 325 (BNode('N0a76d42406b84fe4b8029d0a7fa04244'), 326 URIRef(u'http://www.w3.org/1999/02/22-rdf-syntax-ns#type'), 327 URIRef(u'http://www.w3.org/1999/02/22-rdf-syntax-ns#Statement')), 328 (BNode('N2f62af5936b94a8eb4b1e4bfa8e11d95'), 329 URIRef(u'http://www.w3.org/1999/02/22-rdf-syntax-ns#object'), 330 BNode('v1')), 331 (BNode('N2f62af5936b94a8eb4b1e4bfa8e11d95'), 332 URIRef(u'http://www.w3.org/1999/02/22-rdf-syntax-ns#predicate'), 333 BNode('v0')), 334 (BNode('N2f62af5936b94a8eb4b1e4bfa8e11d95'), 335 URIRef(u'http://www.w3.org/1999/02/22-rdf-syntax-ns#subject'), 336 URIRef(u'urn:gp_learner:fixed_var:target')), 337 (BNode('N2f62af5936b94a8eb4b1e4bfa8e11d95'), 338 URIRef(u'http://www.w3.org/1999/02/22-rdf-syntax-ns#type'), 339 URIRef(u'http://www.w3.org/1999/02/22-rdf-syntax-ns#Statement')), 340 (BNode('N5ae541f93e1d4e5880450b1bdceb6404'), 341 URIRef(u'http://www.w3.org/1999/02/22-rdf-syntax-ns#object'), 342 BNode('v5')), 343 (BNode('N5ae541f93e1d4e5880450b1bdceb6404'), 344 URIRef(u'http://www.w3.org/1999/02/22-rdf-syntax-ns#predicate'), 345 BNode('v4')), 346 (BNode('N5ae541f93e1d4e5880450b1bdceb6404'), 347 URIRef(u'http://www.w3.org/1999/02/22-rdf-syntax-ns#subject'), 348 URIRef(u'urn:gp_learner:fixed_var:target')), 349 (BNode('N5ae541f93e1d4e5880450b1bdceb6404'), 350 URIRef(u'http://www.w3.org/1999/02/22-rdf-syntax-ns#type'), 351 URIRef(u'http://www.w3.org/1999/02/22-rdf-syntax-ns#Statement')), 352 (BNode('N86ac7ca781f546ae939b8963895f672e'), 353 URIRef(u'http://www.w3.org/1999/02/22-rdf-syntax-ns#object'), 354 URIRef(u'urn:gp_learner:fixed_var:source')), 355 (BNode('N86ac7ca781f546ae939b8963895f672e'), 356 URIRef(u'http://www.w3.org/1999/02/22-rdf-syntax-ns#predicate'), 357 BNode('v0')), 358 (BNode('N86ac7ca781f546ae939b8963895f672e'), 359 URIRef(u'http://www.w3.org/1999/02/22-rdf-syntax-ns#subject'), 360 URIRef(u'urn:gp_learner:fixed_var:target')), 361 (BNode('N86ac7ca781f546ae939b8963895f672e'), 362 URIRef(u'http://www.w3.org/1999/02/22-rdf-syntax-ns#type'), 363 URIRef(u'http://www.w3.org/1999/02/22-rdf-syntax-ns#Statement')), 364 (BNode('Nac82b883ca3849b5ab6820b7ac15e490'), 365 URIRef(u'http://www.w3.org/1999/02/22-rdf-syntax-ns#object'), 366 BNode('v1')), 367 (BNode('Nac82b883ca3849b5ab6820b7ac15e490'), 368 URIRef(u'http://www.w3.org/1999/02/22-rdf-syntax-ns#predicate'), 369 BNode('v3')), 370 (BNode('Nac82b883ca3849b5ab6820b7ac15e490'), 371 URIRef(u'http://www.w3.org/1999/02/22-rdf-syntax-ns#subject'), 372 URIRef(u'urn:gp_learner:fixed_var:target')), 373 (BNode('Nac82b883ca3849b5ab6820b7ac15e490'), 374 URIRef(u'http://www.w3.org/1999/02/22-rdf-syntax-ns#type'), 375 URIRef(u'http://www.w3.org/1999/02/22-rdf-syntax-ns#Statement')) 376 ] 377 378 turtle = ''' 379 @prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> . 380 @prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> . 381 @prefix xml: <http://www.w3.org/XML/1998/namespace> . 382 @prefix xsd: <http://www.w3.org/2001/XMLSchema#> . 383 384 [] a rdf:Statement ; 385 rdf:object [ ] ; 386 rdf:predicate _:v0 ; 387 rdf:subject <urn:gp_learner:fixed_var:target> . 388 389 [] a rdf:Statement ; 390 rdf:object _:v1 ; 391 rdf:predicate _:v0 ; 392 rdf:subject <urn:gp_learner:fixed_var:target> . 393 394 [] a rdf:Statement ; 395 rdf:object [ ] ; 396 rdf:predicate [ ] ; 397 rdf:subject <urn:gp_learner:fixed_var:target> . 398 399 [] a rdf:Statement ; 400 rdf:object <urn:gp_learner:fixed_var:source> ; 401 rdf:predicate _:v0 ; 402 rdf:subject <urn:gp_learner:fixed_var:target> . 403 404 [] a rdf:Statement ; 405 rdf:object _:v1 ; 406 rdf:predicate [ ] ; 407 rdf:subject <urn:gp_learner:fixed_var:target> .''' 408 409 # g = Graph() 410 # g.parse(data=turtle, format='turtle') 411 412 stats = {} 413 cg = rdflib.compare.to_canonical_graph(g, stats=stats) 414 415 # print ('graph g length: %d, nodes: %d' % (len(g), len(g.all_nodes()))) 416 # print ('triple_bnode degrees:') 417 # for triple_bnode in g.subjects(rdflib.RDF['type'], rdflib.RDF['Statement']): 418 # print (len(list(g.triples([triple_bnode, None, None])))) 419 # print ('all node out-degrees:') 420 # print (sorted( 421 # [len(list(g.triples([node, None, None]))) for node in g.all_nodes()])) 422 # print ('all node in-degrees:') 423 # print (sorted( 424 # [len(list(g.triples([None, None, node]))) for node in g.all_nodes()])) 425 # print(g.serialize(format='n3')) 426 # 427 # print ('graph cg length: %d, nodes: %d' % (len(cg), len(cg.all_nodes()))) 428 # print ('triple_bnode degrees:') 429 # for triple_bnode in cg.subjects(rdflib.RDF['type'], 430 # rdflib.RDF['Statement']): 431 # print (len(list(cg.triples([triple_bnode, None, None])))) 432 # print ('all node out-degrees:') 433 # print (sorted( 434 # [len(list(cg.triples([node, None, None]))) for node in cg.all_nodes()])) 435 # print ('all node in-degrees:') 436 # print (sorted( 437 # [len(list(cg.triples([None, None, node]))) for node in cg.all_nodes()])) 438 # print(cg.serialize(format='n3')) 439 440 assert (len(g.all_nodes()) == len(cg.all_nodes())) 441 442 cg = to_canonical_graph(g) 443 assert len(g) == len(cg), \ 444 'canonicalization changed number of triples in graph' 445 assert len(g.all_nodes()) == len(cg.all_nodes()), \ 446 'canonicalization changed number of nodes in graph' 447 assert len(list(g.subjects(RDF['type'], RDF['Statement']))) == \ 448 len(list(cg.subjects(RDF['type'], RDF['Statement']))), \ 449 'canonicalization changed number of statements' 450 451 # counter for subject, predicate and object nodes 452 g_pos_counts = Counter(), Counter(), Counter() 453 for t in g: 454 for i, node in enumerate(t): 455 g_pos_counts[i][t] += 1 456 g_count_signature = [sorted(c.values()) for c in g_pos_counts] 457 458 cg_pos_counts = Counter(), Counter(), Counter() 459 for t in cg: 460 for i, node in enumerate(t): 461 cg_pos_counts[i][t] += 1 462 cg_count_signature = [sorted(c.values()) for c in cg_pos_counts] 463 464 assert g_count_signature == cg_count_signature, \ 465 'canonicalization changed node position counts' 466