1# -*- coding: utf-8 -*- 2# MolMod is a collection of molecular modelling tools for python. 3# Copyright (C) 2007 - 2019 Toon Verstraelen <Toon.Verstraelen@UGent.be>, Center 4# for Molecular Modeling (CMM), Ghent University, Ghent, Belgium; all rights 5# reserved unless otherwise stated. 6# 7# This file is part of MolMod. 8# 9# MolMod is free software; you can redistribute it and/or 10# modify it under the terms of the GNU General Public License 11# as published by the Free Software Foundation; either version 3 12# of the License, or (at your option) any later version. 13# 14# MolMod is distributed in the hope that it will be useful, 15# but WITHOUT ANY WARRANTY; without even the implied warranty of 16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 17# GNU General Public License for more details. 18# 19# You should have received a copy of the GNU General Public License 20# along with this program; if not, see <http://www.gnu.org/licenses/> 21# 22# -- 23# 24"""Graph data structure, analysis and pattern matching. 25 26 Some distinctive features include: 27 28 * Iterating over all shortest paths between two vertices (Dijkstra). See 29 http://en.wikipedia.org/wiki/Dijkstra's_algorithm for more info. 30 * Iterating over vertices or edges using the Breadth First convention. See 31 http://en.wikipedia.org/wiki/Breadth-first_search for more info. 32 * The all pairs shortest path matrix (Floyd-Warshall). See 33 http://en.wikipedia.org/wiki/Floyd-Warshall_algorithm for more info. 34 * Symmetry analysis of graphs (automorphisms). The Graph class can generate a 35 list of permutations between vertices that map the graph onto itself. This 36 can be used to generate (and test) all possible geometric symmetries in a 37 molecule. See http://en.wikipedia.org/wiki/Graph_automorphism for more 38 info. 39 * Scanning a graph for patterns. 40 The GraphSearch is a generic class that can scan a graph for certain 41 patterns, e.g. given pattern_graphs, strong rings, isomorphisms, 42 automorphisms, ... The pattern_graph can deal with (multiple sets of) 43 additional conditions that must be satisfied, such as "give me all 44 dihedral angles where the central atoms are carbons" without duplicates. 45 46 The central class in this module is 'Graph'. It caches most of the analysis 47 results, which implies that the graph structure can not be changed once the 48 object is created. If you feel the need to do this, just construct a new 49 graph object. 50""" 51 52 53from __future__ import print_function, division 54 55import sys 56from builtins import range 57import copy 58 59import numpy as np 60 61from molmod.utils import cached, ReadOnly, ReadOnlyAttribute 62 63 64__all__ = [ 65 "GraphError", "Graph", "OneToOne", "Match", "Pattern", 66 "CriteriaSet", "Anything", "CritOr", "CritAnd", "CritXor", "CritNot", 67 "CustomPattern", "EqualPattern", "RingPattern", "GraphSearch", 68] 69 70 71class GraphError(Exception): 72 """Raised when something goes wrong in one of the graph algorithms""" 73 pass 74 75 76class Graph(ReadOnly): 77 """An undirected graph, where edges have equal weight 78 79 The edges attribute is the most elementary and is always available. All 80 other attributes are optional. 81 82 Graphs are meant to be immutable objects. Once created they are not 83 supposed to be modified. If you want an adapted graph, create a new 84 object. The main reason is that all graph analysis work is cached in this 85 object. When the edges change, the cache becomes invalid and should be 86 erased. The latter is not supported as it is easier to create just a new 87 graph object with the modified edges. 88 89 If you want to associate data with vertices or edges, add custom 90 dictionary or list attributes to the graph object. This is the way to 91 work attributes that are not applicable to all vertices or edges: 92 93 >>> graph.vertex_property = {vertex1: blablabla, vertex2: blablabla, ...} 94 >>> graph.edge_property = {frozenset([vertex1, vertex2]): blablabla, ..} 95 96 If a certain property applies to all vertices or edges, it is sometimes 97 more practical to work with lists or ``numpy`` arrays that have the same 98 ordering as the vertices or the edges: 99 100 >>> # atom numbers of ethene 101 >>> graph.vertex_property = np.array([6, 6, 1, 1, 1, 1], int) 102 >>> # bond orders of ethene 103 >>> graph.edge_property = np.array([2, 1, 1, 1, 1], int) 104 """ 105 edges = ReadOnlyAttribute(tuple, none=False, doc="the incidence list") 106 num_vertices = ReadOnlyAttribute(int, none=False, doc="the number of vertices") 107 108 def __init__(self, edges, num_vertices=None): 109 """ 110 Arguments: 111 | ``edges`` -- ``tuple(frozenset([vertex1, vertex2]), ...)`` 112 | ``num_vertices`` -- number of vertices 113 114 vertex1 to vertexN must be integers from 0 to N-1. If vertices above 115 the highest vertex value are not connected by edges, use the 116 num_vertices argument to tell what the total number of vertices is. 117 118 If the edges argument does not have the correct format, it will be 119 converted. 120 """ 121 122 tmp = [] 123 for edge in edges: 124 if len(edge) != 2: 125 raise TypeError("The edges must be a iterable with 2 elements") 126 i, j = edge 127 i = int(i) 128 j = int(j) 129 if i == j: 130 raise ValueError("A edge must contain two different values.") 131 if i < 0 or j < 0: 132 raise TypeError("The edges must contain positive integers.") 133 tmp.append(frozenset([i, j])) 134 edges = tuple(tmp) 135 136 if len(edges) == 0: 137 real_num_vertices = 0 138 else: 139 real_num_vertices = max(max(a, b) for a, b in edges)+1 140 if num_vertices is not None: 141 if not isinstance(num_vertices, int): 142 raise TypeError("The optional argument num_vertices must be an " 143 "integer when given.") 144 if num_vertices < real_num_vertices: 145 raise ValueError("num_vertices must be equal or larger to the " 146 "number of vertices deduced from the edge list.") 147 real_num_vertices = num_vertices 148 149 self.edges = edges 150 self.num_vertices = real_num_vertices 151 152 num_edges = property(lambda self: len(self.edges), 153 doc="*Read-only attribute:* the number of edges in the graph.") 154 155 def __mul__(self, repeat): 156 """Construct a graph that repeats this graph a number of times 157 158 Arguments: 159 | ``repeat`` -- The number of repetitions. 160 """ 161 if not isinstance(repeat, int): 162 raise TypeError("Can only multiply a graph with an integer") 163 new_edges = [] 164 for i in range(repeat): 165 for vertex1, vertex2 in self.edges: 166 new_edges.append(frozenset([ 167 vertex1+i*self.num_vertices, 168 vertex2+i*self.num_vertices 169 ])) 170 return Graph(new_edges, self.num_vertices*repeat) 171 172 __rmul__ = __mul__ 173 174 def __str__(self): 175 return " ".join("%i-%i" % tuple(sorted([i, j])) for i, j in self.edges) 176 177 # functions that should be implemented by derived classes 178 179 def get_vertex_string(self, i): 180 """Returns a string that fully characterizes the nature of the vertex 181 182 In case of an ordinary graph, all vertices are the same. 183 """ 184 return "" 185 186 def get_edge_string(self, i): 187 """Returns a string that fully characterizes the nature of an edge 188 189 In case of an ordinary graph, all edges are the same. 190 """ 191 return "" 192 193 # cached attributes: 194 195 @cached 196 def edge_index(self): 197 """A map to look up the index of a edge""" 198 return dict((edge, index) for index, edge in enumerate(self.edges)) 199 200 @cached 201 def neighbors(self): 202 """A dictionary with neighbors 203 204 The dictionary will have the following form: 205 ``{vertexX: (vertexY1, vertexY2, ...), ...}`` 206 This means that vertexX and vertexY1 are connected etc. This also 207 implies that the following elements are part of the dictionary: 208 ``{vertexY1: (vertexX, ...), vertexY2: (vertexX, ...), ...}``. 209 """ 210 neighbors = dict( 211 (vertex, []) for vertex 212 in range(self.num_vertices) 213 ) 214 for a, b in self.edges: 215 neighbors[a].append(b) 216 neighbors[b].append(a) 217 # turn lists into frozensets 218 neighbors = dict((key, frozenset(val)) for key, val in neighbors.items()) 219 return neighbors 220 221 @cached 222 def distances(self): 223 """The matrix with the all-pairs shortest path lenghts""" 224 from molmod.ext import graphs_floyd_warshall 225 distances = np.zeros((self.num_vertices,)*2, dtype=int) 226 #distances[:] = -1 # set all -1, which is just a very big integer 227 #distances.ravel()[::len(distances)+1] = 0 # set diagonal to zero 228 for i, j in self.edges: # set edges to one 229 distances[i, j] = 1 230 distances[j, i] = 1 231 graphs_floyd_warshall(distances) 232 return distances 233 234 @cached 235 def max_distance(self): 236 """The maximum value in the distances matrix.""" 237 if self.distances.shape[0] == 0: 238 return 0 239 else: 240 return self.distances.max() 241 242 @cached 243 def central_vertices(self): 244 """Vertices that have the lowest maximum distance to any other vertex""" 245 max_distances = self.distances.max(0) 246 max_distances_min = max_distances[max_distances > 0].min() 247 return (max_distances == max_distances_min).nonzero()[0] 248 249 @cached 250 def central_vertex(self): 251 """The vertex that has the lowest maximum distance to any other vertex 252 253 This definition does not lead to a unique solution. One arbitrary 254 solution is selected. 255 """ 256 return self.central_vertices[0] 257 258 @cached 259 def independent_vertices(self): 260 """Lists of vertices that are only interconnected within each list 261 262 This means that there is no path from a vertex in one list to a 263 vertex in another list. In case of a molecular graph, this would 264 yield the atoms that belong to individual molecules. 265 """ 266 candidates = set(range(self.num_vertices)) 267 268 result = [] 269 while len(candidates) > 0: 270 pivot = candidates.pop() 271 group = [ 272 vertex for vertex, distance 273 in self.iter_breadth_first(pivot) 274 ] 275 candidates.difference_update(group) 276 277 # this sort makes sure that the order of the vertices is respected 278 group.sort() 279 result.append(group) 280 return result 281 282 @cached 283 def fingerprint(self): 284 """A total graph fingerprint 285 286 The result is invariant under permutation of the vertex indexes. The 287 chance that two different (molecular) graphs yield the same 288 fingerprint is small but not zero. (See unit tests.)""" 289 if self.num_vertices == 0: 290 return np.zeros(20, np.ubyte) 291 else: 292 return sum(self.vertex_fingerprints) 293 294 @cached 295 def vertex_fingerprints(self): 296 """A fingerprint for each vertex 297 298 The result is invariant under permutation of the vertex indexes. 299 Vertices that are symmetrically equivalent will get the same 300 fingerprint, e.g. the hydrogens in methane would get the same 301 fingerprint. 302 """ 303 return self.get_vertex_fingerprints( 304 [self.get_vertex_string(i) for i in range(self.num_vertices)], 305 [self.get_edge_string(i) for i in range(self.num_edges)], 306 ) 307 308 @cached 309 def equivalent_vertices(self): 310 """A dictionary with symmetrically equivalent vertices.""" 311 level1 = {} 312 for i, row in enumerate(self.vertex_fingerprints): 313 key = row.tobytes() 314 l = level1.get(key) 315 if l is None: 316 l = set([i]) 317 level1[key] = l 318 else: 319 l.add(i) 320 level2 = {} 321 for key, vertices in level1.items(): 322 for vertex in vertices: 323 level2[vertex] = vertices 324 return level2 325 326 @cached 327 def symmetries(self): 328 """Graph symmetries (permutations) that map the graph onto itself.""" 329 330 symmetry_cycles = set([]) 331 symmetries = set([]) 332 for match in GraphSearch(EqualPattern(self))(self): 333 match.cycles = match.get_closed_cycles() 334 if match.cycles in symmetry_cycles: 335 raise RuntimeError("Duplicates in EqualMatch") 336 symmetry_cycles.add(match.cycles) 337 symmetries.add(match) 338 return symmetries 339 340 @cached 341 def symmetry_cycles(self): 342 """The cycle representations of the graph symmetries""" 343 result = set([]) 344 for symmetry in self.symmetries: 345 result.add(symmetry.cycles) 346 return result 347 348 @cached 349 def canonical_order(self): 350 """The vertices in a canonical or normalized order. 351 352 This routine will return a list of vertices in an order that does not 353 depend on the initial order, but only depends on the connectivity and 354 the return values of the function self.get_vertex_string. 355 356 Only the vertices that are involved in edges will be included. The 357 result can be given as first argument to self.get_subgraph, with 358 reduce=True as second argument. This will return a complete canonical 359 graph. 360 361 The routine is designed not to use symmetry relations that are 362 obtained with the GraphSearch routine. We also tried to create an 363 ordering that feels like natural, i.e. starting in the center and 364 pushing vertices with few equivalents to the front. If necessary, the 365 nature of the vertices and their bonds to atoms closer to the center 366 will also play a role, but only as a last resort. 367 """ 368 # A) find an appropriate starting vertex. 369 # Here we take a central vertex that has a minimal number of symmetrical 370 # equivalents, 'the highest atom number', and the highest fingerprint. 371 # Note that the symmetrical equivalents are computed from the vertex 372 # fingerprints, i.e. without the GraphSearch. 373 starting_vertex = max( 374 ( 375 -len(self.equivalent_vertices[vertex]), 376 self.get_vertex_string(vertex), 377 self.vertex_fingerprints[vertex].tobytes(), 378 vertex 379 ) for vertex in self.central_vertices 380 )[-1] 381 382 # B) sort all vertices based on 383 # 1) distance from central vertex 384 # 2) number of equivalent vertices 385 # 3) vertex string, (higher atom numbers come first) 386 # 4) fingerprint 387 # 5) vertex index 388 # The last field is only included to collect the result of the sort. 389 # The fingerprint on itself would be sufficient, but the three first are 390 # there to have a naturally appealing result. 391 l = [ 392 [ 393 -distance, 394 -len(self.equivalent_vertices[vertex]), 395 self.get_vertex_string(vertex), 396 self.vertex_fingerprints[vertex].tobytes(), 397 vertex 398 ] for vertex, distance in self.iter_breadth_first(starting_vertex) 399 if len(self.neighbors[vertex]) > 0 400 ] 401 l.sort(reverse=True) 402 403 # C) The order of some vertices is still not completely set. e.g. 404 # consider the case of allene. The four hydrogen atoms are equivalent, 405 # but one can have two different orders: make geminiles consecutive or 406 # don't. It is more trikcy than one would think at first sight. In the 407 # case of allene, geminility could easily solve the problem. Consider a 408 # big flat rotationally symmetric molecule (order 2). The first five 409 # shells are order 4 and one would just give a random order to four 410 # segemnts in the first shell. Only when one reaches the outer part that 411 # has order two, it turns out that the arbitrary choices in the inner 412 # shell play a role. So it does not help to look at relations with 413 # vertices at inner or current shells only. One has to consider the 414 # whole picture. (unit testing reveals troubles like these) 415 416 # I need some sleep now. The code below checks for potential fuzz and 417 # will raise an error if the ordering is not fully determined yet. One 418 # day, I'll need this code more than I do now, and I'll fix things up. 419 # I know how to do this, but I don't care enough right now. 420 # -- Toon 421 for i in range(1, len(l)): 422 if l[i][:-1] == l[i-1][:-1]: 423 raise NotImplementedError 424 425 # D) Return only the vertex indexes. 426 return [record[-1] for record in l] 427 428 # other usefull graph functions 429 430 def iter_breadth_first(self, start=None, do_paths=False, do_duplicates=False): 431 """Iterate over the vertices with the breadth first algorithm. 432 433 See http://en.wikipedia.org/wiki/Breadth-first_search for more info. 434 If not start vertex is given, the central vertex is taken. 435 436 By default, the distance to the starting vertex is also computed. If 437 the path to the starting vertex should be computed instead, set path 438 to True. 439 440 When duplicate is True, then vertices that can be reached through 441 different paths of equal length, will be iterated twice. This 442 typically only makes sense when path==True. 443 """ 444 if start is None: 445 start = self.central_vertex 446 else: 447 try: 448 start = int(start) 449 except ValueError: 450 raise TypeError("First argument (start) must be an integer.") 451 if start < 0 or start >= self.num_vertices: 452 raise ValueError("start must be in the range [0, %i[" % 453 self.num_vertices) 454 from collections import deque 455 work = np.zeros(self.num_vertices, int) 456 work[:] = -1 457 work[start] = 0 458 if do_paths: 459 result = (start, 0, (start, )) 460 else: 461 result = (start, 0) 462 yield result 463 todo = deque([result]) 464 while len(todo) > 0: 465 if do_paths: 466 parent, parent_length, parent_path = todo.popleft() 467 else: 468 parent, parent_length = todo.popleft() 469 current_length = parent_length + 1 470 for current in self.neighbors[parent]: 471 visited = work[current] 472 if visited == -1 or (do_duplicates and visited == current_length): 473 work[current] = current_length 474 if do_paths: 475 current_path = parent_path + (current, ) 476 result = (current, current_length, current_path) 477 else: 478 result = (current, current_length) 479 #print "iter_breadth_first", result 480 yield result 481 todo.append(result) 482 483 def iter_shortest_paths(self, a, b): 484 """Iterate over all the shortest paths between vertex a and b.""" 485 max_len = None 486 ibf = self.iter_breadth_first(a, do_paths=True, do_duplicates=True) 487 for vertex, length, path in ibf: 488 if max_len is not None: 489 if length > max_len: 490 return 491 if vertex == b: 492 max_len = length 493 yield path 494 495 def iter_breadth_first_edges(self, start=None): 496 """Iterate over the edges with the breadth first convention. 497 498 We need this for the pattern matching algorithms, but a quick look at 499 Wikipedia did not result in a known and named algorithm. 500 501 The edges are yielded one by one, together with the distance of the 502 edge from the starting vertex and a flag that indicates whether the 503 yielded edge connects two vertices that are at the same distance from 504 the starting vertex. If that flag is False, the distance from the 505 starting vertex to edge[0] is equal to the distance variable and the 506 distance from edge[1] to the starting vertex is equal to distance+1. 507 One item has the following format: ((i, j), distance, flag) 508 """ 509 if start is None: 510 start = self.central_vertex 511 else: 512 try: 513 start = int(start) 514 except ValueError: 515 raise TypeError("First argument (start) must be an integer.") 516 if start < 0 or start >= self.num_vertices: 517 raise ValueError("start must be in the range [0, %i[" % 518 self.num_vertices) 519 from collections import deque 520 work = np.zeros(self.num_vertices, int) 521 work[:] = -1 522 work[start] = 0 523 todo = deque([start]) 524 while len(todo) > 0: 525 parent = todo.popleft() 526 distance = work[parent] 527 for current in self.neighbors[parent]: 528 if work[current] == -1: 529 yield (parent, current), distance, False 530 work[current] = distance+1 531 todo.append(current) 532 elif work[current] == distance and current > parent: 533 # second equation in elif avoids duplicates 534 yield (parent, current), distance, True 535 elif work[current] == distance+1: 536 yield (parent, current), distance, False 537 538 def get_subgraph(self, subvertices, normalize=False): 539 """Constructs a subgraph of the current graph 540 541 Arguments: 542 | ``subvertices`` -- The vertices that should be retained. 543 | ``normalize`` -- Whether or not the vertices should renumbered and 544 reduced to the given set of subvertices. When True, also the 545 edges are sorted. It the end, this means that new order of the 546 edges does not depend on the original order, but only on the 547 order of the argument subvertices. 548 This option is False by default. When False, only edges will be 549 discarded, but the retained data remain unchanged. Also the 550 parameter num_vertices is not affected. 551 552 The returned graph will have an attribute ``old_edge_indexes`` that 553 relates the positions of the new and the old edges as follows:: 554 555 >>> self.edges[result._old_edge_indexes[i]] = result.edges[i] 556 557 In derived classes, the following should be supported:: 558 559 >>> self.edge_property[result._old_edge_indexes[i]] = result.edge_property[i] 560 561 When ``normalize==True``, also the vertices are affected and the 562 derived classes should make sure that the following works:: 563 564 >>> self.vertex_property[result._old_vertex_indexes[i]] = result.vertex_property[i] 565 566 The attribute ``old_vertex_indexes`` is only constructed when 567 ``normalize==True``. 568 """ 569 if normalize: 570 revorder = dict((j, i) for i, j in enumerate(subvertices)) 571 new_edges = [] 572 old_edge_indexes = [] 573 for counter, (i, j) in enumerate(self.edges): 574 new_i = revorder.get(i) 575 if new_i is None: 576 continue 577 new_j = revorder.get(j) 578 if new_j is None: 579 continue 580 new_edges.append((new_i, new_j)) 581 old_edge_indexes.append(counter) 582 # sort the edges 583 order = list(range(len(new_edges))) 584 # argsort in pure python 585 order.sort( key=(lambda i: tuple(sorted(new_edges[i]))) ) 586 new_edges = [new_edges[i] for i in order] 587 old_edge_indexes = [old_edge_indexes[i] for i in order] 588 589 result = Graph(new_edges, num_vertices=len(subvertices)) 590 result._old_vertex_indexes = np.array(subvertices, dtype=int) 591 #result.new_vertex_indexes = revorder 592 result._old_edge_indexes = np.array(old_edge_indexes, dtype=int) 593 else: 594 subvertices = set(subvertices) 595 old_edge_indexes = np.array([ 596 i for i, edge in enumerate(self.edges) 597 if edge.issubset(subvertices) 598 ], dtype=int) 599 new_edges = tuple(self.edges[i] for i in old_edge_indexes) 600 result = Graph(new_edges, self.num_vertices) 601 result._old_edge_indexes = old_edge_indexes 602 # no need for old and new vertex_indexes because they remain the 603 # same. 604 return result 605 606 def get_vertex_fingerprints(self, vertex_strings, edge_strings, num_iter=None): 607 """Return an array with fingerprints for each vertex""" 608 import hashlib 609 def str2array(x): 610 """convert a hash string to a numpy array of bytes""" 611 if len(x) == 0: 612 return np.zeros(0, np.ubyte) 613 elif sys.version_info[0] == 2: 614 return np.frombuffer(x, np.ubyte) 615 else: 616 return np.frombuffer(x.encode(), np.ubyte) 617 hashrow = lambda x: np.frombuffer(hashlib.sha1(x.data).digest(), np.ubyte) 618 # initialization 619 result = np.zeros((self.num_vertices, 20), np.ubyte) 620 for i in range(self.num_vertices): 621 result[i] = hashrow(str2array(vertex_strings[i])) 622 for i in range(self.num_edges): 623 a, b = self.edges[i] 624 tmp = hashrow(str2array(edge_strings[i])) 625 result[a] += tmp 626 result[b] += tmp 627 work = result.copy() 628 # iterations 629 if num_iter is None: 630 num_iter = self.max_distance 631 for i in range(num_iter): 632 for a, b in self.edges: 633 work[a] += result[b] 634 work[b] += result[a] 635 #for a in xrange(self.num_vertices): 636 # for b in xrange(self.num_vertices): 637 # work[a] += hashrow(result[b]*self.distances[a, b]) 638 for a in range(self.num_vertices): 639 result[a] = hashrow(work[a]) 640 return result 641 642 def get_halfs(self, vertex1, vertex2): 643 """Split the graph in two halfs by cutting the edge: vertex1-vertex2 644 645 If this is not possible (due to loops connecting both ends), a 646 GraphError is raised. 647 648 Returns the vertices in both halfs. 649 """ 650 def grow(origin, other): 651 frontier = set(self.neighbors[origin]) 652 frontier.discard(other) 653 result = set([origin]) 654 while len(frontier) > 0: 655 pivot = frontier.pop() 656 if pivot == other: 657 raise GraphError("The graph can not be separated in two halfs " 658 "by disconnecting vertex1 and vertex2.") 659 pivot_neighbors = set(self.neighbors[pivot]) 660 pivot_neighbors -= result 661 frontier |= pivot_neighbors 662 result.add(pivot) 663 return result 664 665 vertex1_part = grow(vertex1, vertex2) 666 vertex2_part = grow(vertex2, vertex1) 667 return vertex1_part, vertex2_part 668 669 def get_part(self, vertex_in, vertices_border): 670 """List all vertices that are connected to vertex_in, but are not 671 included in or 'behind' vertices_border. 672 """ 673 vertices_new = set(self.neighbors[vertex_in]) 674 vertices_part = set([vertex_in]) 675 676 while len(vertices_new) > 0: 677 pivot = vertices_new.pop() 678 if pivot in vertices_border: 679 continue 680 vertices_part.add(pivot) 681 pivot_neighbors = set(self.neighbors[pivot]) 682 pivot_neighbors -= vertices_part 683 vertices_new |= pivot_neighbors 684 685 return vertices_part 686 687 def get_halfs_double(self, vertex_a1, vertex_b1, vertex_a2, vertex_b2): 688 """Compute the two parts separated by ``(vertex_a1, vertex_b1)`` and ``(vertex_a2, vertex_b2)`` 689 690 Raise a GraphError when ``(vertex_a1, vertex_b1)`` and 691 ``(vertex_a2, vertex_b2)`` do not separate the graph in two 692 disconnected parts. The edges must be neighbors. If not a GraphError 693 is raised. The for vertices must not coincide or a GraphError is 694 raised. 695 696 Returns the vertices of the two halfs and the four 'hinge' vertices 697 in the correct order, i.e. both ``vertex_a1`` and ``vertex_a2`` are 698 in the first half and both ``vertex_b1`` and ``vertex_b2`` are in the 699 second half. 700 """ 701 if vertex_a1 not in self.neighbors[vertex_b1]: 702 raise GraphError("vertex_a1 must be a neighbor of vertex_b1.") 703 if vertex_a2 not in self.neighbors[vertex_b2]: 704 raise GraphError("vertex_a2 must be a neighbor of vertex_b2.") 705 706 # find vertex_a_part (and possibly switch vertex_a2, vertex_b2) 707 vertex_a_new = set(self.neighbors[vertex_a1]) 708 vertex_a_new.discard(vertex_b1) 709 if vertex_a1 == vertex_b2: 710 # we now that we have to swap vertex_a2 and vertex_b2. The algo 711 # below will fail otherwise in this 'exotic' case. 712 vertex_a2, vertex_b2 = vertex_b2, vertex_a2 713 #vertex_a_new.discard(vertex_a2) # in case there is overlap 714 if vertex_a1 == vertex_a2: 715 vertex_a_new.discard(vertex_b2) # in case there is overlap 716 vertex_a_part = set([vertex_a1]) 717 718 touched = False # True if (the switched) vertex_a2 has been reached. 719 while len(vertex_a_new) > 0: 720 pivot = vertex_a_new.pop() 721 if pivot == vertex_b1: 722 raise GraphError("The graph can not be separated in two halfs. " 723 "vertex_b1 reached by vertex_a1.") 724 vertex_a_part.add(pivot) 725 # create a new set that we can modify 726 pivot_neighbors = set(self.neighbors[pivot]) 727 pivot_neighbors -= vertex_a_part 728 if pivot == vertex_a2 or pivot == vertex_b2: 729 if pivot == vertex_b2: 730 if touched: 731 raise GraphError("The graph can not be separated in " 732 "two halfs. vertex_b2 reached by " 733 "vertex_a1.") 734 else: 735 # put them in the correct order 736 vertex_a2, vertex_b2 = vertex_b2, vertex_a2 737 pivot_neighbors.discard(vertex_b2) 738 touched = True 739 vertex_a_new |= pivot_neighbors 740 741 if vertex_a2 not in vertex_a_part: 742 raise GraphError("The graph can not be separated in two halfs. " 743 "vertex_a1 can not reach vertex_a2 trough " 744 "vertex_a_part") 745 746 # find vertex_b_part: easy, is just the rest ... 747 #vertex_b_part = set(xrange(self.num_vertices)) - vertex_a_part 748 749 # ... but we also want that there is a path in vertex_b_part from 750 # vertex_b1 to vertex_b2 751 if vertex_b1 == vertex_b2: 752 closed = True 753 else: 754 vertex_b_new = set(self.neighbors[vertex_b1]) 755 vertex_b_new.discard(vertex_a1) 756 vertex_b_part = set([vertex_b1]) 757 758 closed = False 759 while len(vertex_b_new) > 0: 760 pivot = vertex_b_new.pop() 761 if pivot == vertex_b2: 762 closed = True 763 break 764 pivot_neighbors = set(self.neighbors[pivot]) 765 pivot_neighbors -= vertex_b_part 766 vertex_b_new |= pivot_neighbors 767 vertex_b_part.add(pivot) 768 769 if not closed: 770 raise GraphError("The graph can not be separated in two halfs. " 771 "vertex_b1 can not reach vertex_b2 trough " 772 "vertex_b_part") 773 774 # finaly compute the real vertex_b_part, the former loop might break 775 # early for efficiency. 776 vertex_b_part = set(range(self.num_vertices)) - vertex_a_part 777 778 # done! 779 return vertex_a_part, vertex_b_part, \ 780 (vertex_a1, vertex_b1, vertex_a2, vertex_b2) 781 782 def full_match(self, other): 783 """Find the mapping between vertex indexes in self and other. 784 785 This also works on disconnected graphs. Derived classes should just 786 implement get_vertex_string and get_edge_string to make this method 787 aware of the different nature of certain vertices. In case molecules, 788 this would make the algorithm sensitive to atom numbers etc. 789 """ 790 # we need normalize subgraphs because these graphs are used as patterns. 791 graphs0 = [ 792 self.get_subgraph(group, normalize=True) 793 for group in self.independent_vertices 794 ] 795 graphs1 = [ 796 other.get_subgraph(group) 797 for group in other.independent_vertices 798 ] 799 800 if len(graphs0) != len(graphs1): 801 return 802 803 matches = [] 804 805 for graph0 in graphs0: 806 pattern = EqualPattern(graph0) 807 found_match = False 808 for i, graph1 in enumerate(graphs1): 809 local_matches = list(GraphSearch(pattern)(graph1, one_match=True)) 810 if len(local_matches) == 1: 811 match = local_matches[0] 812 # we need to restore the relation between the normalized 813 # graph0 and its original indexes 814 old_to_new = OneToOne(( 815 (j, i) for i, j 816 in enumerate(graph0._old_vertex_indexes) 817 )) 818 matches.append(match * old_to_new) 819 del graphs1[i] 820 found_match = True 821 break 822 if not found_match: 823 return 824 825 result = OneToOne() 826 for match in matches: 827 result.add_relations(match.forward.items()) 828 return result 829 830 831# Pattern matching 832 833 834class OneToOne(object): 835 """Implements a discrete bijection between source and destination elements 836 837 The implementation is based on a consistent set of forward and reverse 838 relations, stored in dictionaries. 839 """ 840 841 def __init__(self, relations=None): 842 """ 843 Argument: 844 | ``relations`` -- initial relations for the bijection 845 """ 846 self.forward = {} 847 self.reverse = {} 848 if relations is not None: 849 self.add_relations(relations) 850 851 def __len__(self): 852 return len(self.forward) 853 854 def __str__(self): 855 result = "|" 856 for source, destination in self.forward.items(): 857 result += " %s -> %s |" % (source, destination) 858 return result 859 860 def __mul__(self, other): 861 """Return the result of the 'after' operator.""" 862 result = OneToOne() 863 for source, mid in other.forward.items(): 864 destination = self.forward[mid] 865 result.forward[source] = destination 866 result.reverse[destination] = source 867 return result 868 869 def add_relation(self, source, destination): 870 """Add new a relation to the bejection""" 871 if self.in_sources(source): 872 if self.forward[source] != destination: 873 raise ValueError("Source is already in use. Destination does " 874 "not match.") 875 else: 876 raise ValueError("Source-Destination relation already exists.") 877 elif self.in_destinations(destination): 878 raise ValueError("Destination is already in use. Source does not " 879 "match.") 880 else: 881 self.forward[source] = destination 882 self.reverse[destination] = source 883 884 def add_relations(self, relations): 885 """Add multiple relations to a bijection""" 886 for source, destination in relations: 887 self.add_relation(source, destination) 888 889 def get_destination(self, source): 890 """Get the end point of a relation that start with 'source'""" 891 return self.forward[source] 892 893 def get_source(self, destination): 894 """Get the starting point of a relation that ends with 'destination'""" 895 return self.reverse[destination] 896 897 def in_destinations(self, destination): 898 """Test if the given destination is present""" 899 return destination in self.reverse 900 901 def in_sources(self, source): 902 """Test if the given source is present""" 903 return source in self.forward 904 905 def inverse(self): 906 """Returns the inverse bijection.""" 907 result = self.__class__() 908 result.forward = copy.copy(self.reverse) 909 result.reverse = copy.copy(self.forward) 910 return result 911 912 913class Match(OneToOne): 914 """A match between a pattern and a graph""" 915 @classmethod 916 def from_first_relation(cls, vertex0, vertex1): 917 """Intialize a fresh match based on the first relation""" 918 result = cls([(vertex0, vertex1)]) 919 result.previous_ends1 = set([vertex1]) 920 return result 921 922 def get_new_edges(self, subject_graph): 923 """Get new edges from the subject graph for the graph search algorithm 924 925 The Graph search algorithm extends the matches iteratively by adding 926 matching vertices that are one edge further from the starting vertex 927 at each iteration. 928 """ 929 result = [] 930 #print "Match.get_new_edges self.previous_ends1", self.previous_ends1 931 for vertex in self.previous_ends1: 932 for neighbor in subject_graph.neighbors[vertex]: 933 if neighbor not in self.reverse: 934 result.append((vertex, neighbor)) 935 return result 936 937 def copy_with_new_relations(self, new_relations): 938 """Create a new match object extended with new relations""" 939 result = self.__class__(self.forward.items()) 940 result.add_relations(new_relations.items()) 941 result.previous_ends1 = set(new_relations.values()) 942 return result 943 944 945class Pattern(object): 946 """Base class for a pattern in a graph. 947 948 Note the following conventions: 949 950 * A pattern can always be represented by a graph (or a set of graphs) 951 and some additional conditions. This graph is the so called 'PATTERN 952 GRAPH'. For technical reasons, this pattern graph is not always 953 constructed explicitly. Variables related to this graph often get 954 suffix '0'. Note that a pattern graph is always fully connected. 955 * The graph in which we search for the pattern, is called the 'SUBJECT 956 GRAPH'. Variables related to this graph often get suffix '1'. 957 """ 958 959 # This means that matching vertices must not have equal number of neighbors: 960 sub = True 961 MatchClass = Match 962 963 def iter_initial_relations(self, subject_graph): 964 """Iterate over all initial relations to start a Graph Search 965 966 The function iterates of single relations between a pattern vertex 967 and a subject vertex. In practice it is sufficient to select one 968 vertex in the pattern and then associate it with each (reasonable) 969 vertex in the subject graph. 970 """ 971 raise NotImplementedError 972 973 def get_new_edges(self, level): 974 """Get new edges from the pattern graph for the graph search algorithm 975 976 The level argument denotes the distance of the new edges from the 977 starting vertex in the pattern graph. 978 """ 979 raise NotImplementedError 980 981 def check_symmetry(self, new_relations, current_match, next_match): 982 """Off all symmetric ``new_relations``, only allow one""" 983 return True 984 985 def compare(self, vertex0, vertex1, subject_graph): 986 """Test if ``vertex0`` and ``vertex1`` can be equal 987 988 False positives are allowed, but the less false positives, the more 989 efficient the :class:`GraphSearch` will be. 990 """ 991 return True 992 993 def check_next_match(self, match, new_relations, subject_graph, one_match): 994 """Does this match object make sense for the current pattern 995 996 Return False if some symmetry or other considerations are not 997 satisfied. The checks in this function are typically only possible by 998 considering the whole instead of looking just at a few 999 vertices/edges/relations. 1000 """ 1001 return True 1002 1003 def complete(self, match, subject_graph): 1004 """Returns ``True`` if no more additional relations are required""" 1005 return True 1006 1007 def iter_final_matches(self, match, subject_graph, one_match): 1008 """Just return the original match 1009 1010 Derived classes can specialize here to make efficient use of 1011 symmetries 1012 """ 1013 yield match 1014 1015 1016class CriteriaSet(object): 1017 """A set of criteria that can be associated with a custum pattern.""" 1018 1019 def __init__(self, vertex_criteria=None, edge_criteria=None, **kwargs): 1020 """ 1021 Arguments: 1022 | ``vertex_criteria`` -- a dictionary with criteria for the 1023 vertices, ``key=vertex_index``, 1024 ``value=criterion`` 1025 | ``edge_criteria`` -- a dictionary with criteria for the edges 1026 ``key=edge_index``, ``value=criterion`` 1027 1028 Any other keyword argument will be assigned as attribute to matches 1029 that fulfill the criteria of this set. 1030 """ 1031 if vertex_criteria is None: 1032 self.vertex_criteria = {} 1033 else: 1034 self.vertex_criteria = vertex_criteria 1035 if edge_criteria is None: 1036 self.edge_criteria = {} 1037 else: 1038 self.edge_criteria = edge_criteria 1039 self.info = kwargs 1040 1041 def test_match(self, match, pattern_graph, subject_graph): 1042 """Test if a match satisfies the criteria""" 1043 for vertex0, c in self.vertex_criteria.items(): 1044 vertex1 = match.forward[vertex0] 1045 if not c(vertex1, subject_graph): 1046 return False 1047 for edge0_index, c in self.edge_criteria.items(): 1048 vertex0a, vertex0b = pattern_graph.edges[edge0_index] 1049 edge1_index = subject_graph.edge_index[frozenset([ 1050 match.forward[vertex0a], 1051 match.forward[vertex0b], 1052 ])] 1053 if not c(edge1_index, subject_graph): 1054 return False 1055 return True 1056 1057# few basic example criteria 1058 1059class Anything(object): 1060 """A criterion that always returns True""" 1061 def __call__(self, index, subject_graph): 1062 """Always returns True""" 1063 return True 1064 1065 1066class CritOr(object): 1067 """OR Operator for criteria objects""" 1068 1069 def __init__(self, *criteria): 1070 """ 1071 Argument: 1072 | ``criteria`` -- a list of criteria to apply the OR operation to. 1073 """ 1074 self.criteria = criteria 1075 1076 def __call__(self, index, graph): 1077 """Evaluates all the criteria and applies an OR opartion 1078 1079 Arguments: 1080 | ``index`` -- the index of the vertex/edge on which the criterion 1081 is applied 1082 | ``graph`` -- the graph on which the criterion is tested 1083 """ 1084 for c in self.criteria: 1085 if c(index, graph): 1086 return True 1087 return False 1088 1089 1090class CritAnd(object): 1091 """AND Operator for criteria objects""" 1092 1093 def __init__(self, *criteria): 1094 """ 1095 Argument: 1096 | ``criteria`` -- a list of criteria to apply the AND operation to 1097 """ 1098 self.criteria = criteria 1099 1100 def __call__(self, index, graph): 1101 """Evaluates all the criteria and applies an AND opartion 1102 1103 Arguments: 1104 | ``index`` -- the index of the vertex/edge on which the criterion 1105 is applied 1106 | ``graph`` -- the graph on which the criterion is tested 1107 """ 1108 for c in self.criteria: 1109 if not c(index, graph): 1110 return False 1111 return True 1112 1113 1114class CritXor(object): 1115 """XOR Operator for criteria objects""" 1116 1117 def __init__(self, *criteria): 1118 """ 1119 Argument: 1120 | ``criteria`` -- a list of criteria to apply the XOR operation to. 1121 """ 1122 self.criteria = criteria 1123 1124 def __call__(self, index, graph): 1125 """Evaluates all the criteria and applies a generalized XOR opartion 1126 1127 Arguments: 1128 | ``index`` -- the index of the vertex/edge on which the criterion 1129 is applied 1130 | ``graph`` -- the graph on which the criterion is tested 1131 1132 when the XOR operation is applied to more than two criteria, True 1133 is only returned when an odd number of criteria return True. 1134 """ 1135 count = 0 1136 for c in self.criteria: 1137 if c(index, graph): 1138 count += 1 1139 return (count % 2) == 1 1140 1141 1142class CritNot(object): 1143 """Inverion of another criterion""" 1144 1145 def __init__(self, criterion): 1146 """ 1147 Argument: 1148 | ``criterion`` -- another criterion object 1149 """ 1150 self.criterion = criterion 1151 1152 def __call__(self, index, graph): 1153 """Evaluates all the criterion and applies an inversion opartion 1154 1155 Arguments: 1156 | ``index`` -- the index of the vertex/edge on which the criterion is 1157 applied 1158 | ``graph`` -- the graph on which the criterion is tested 1159 """ 1160 return not self.criterion(index, graph) 1161 1162 1163# pattern and match stuff 1164 1165 1166class CustomPattern(Pattern): 1167 """A pattern based on a given pattern graph 1168 1169 The pattern graph can be complemented with additional criteria for the 1170 vertices and edges. Additionally the effective symmetry of the pattern 1171 graph can be reduced by tagging the vertices in the pattern graph with 1172 different labels. 1173 """ 1174 def __init__(self, pattern_graph, criteria_sets=None, vertex_tags=None, start_vertex=None): 1175 """ 1176 Arguments: 1177 | ``pattern_graph`` -- the pattern that has to be found in the subject 1178 graph. 1179 | ``criteria_sets`` -- Criteria sets associate additional conditions 1180 with vertices and edges, and can also introduce 1181 global match conditions. 1182 | ``vertex_tags`` -- vertex tags can reduce the symmetry of the 1183 pattern_graph pattern. An example case where this is useful: 1184 Consider atoms 0, 1, 2 that are bonded in this order. We want to 1185 compute the distance from atom 2 to the line (0, 1). In this case 1186 the matches (0->a, 1->b, 2->c) and (0->c, 1->b, 2->a) correspond 1187 to different internal coordinates. We want the graph search to 1188 return the two solutions. In order to do this, set 1189 ``vertex_tags={0:0, 1:0, 2:1}``. This means that vertex 0 and 1 1190 are equivalent, but that vertex 2 has a different nature. In the 1191 case of a bending angle, only one match like (0->a, 1->b, 2->c) is 1192 sufficient and we do not want to reduce the symmetry of the 1193 ``pattern_graph``. In this case, one should not use vertex_tags at 1194 all. 1195 | ``start_vertex`` -- The first vertex in the pattern graph that is 1196 linked with a vertex in the subject graph. A wise choice can 1197 improve the performance of a graph search. If not given, the 1198 central vertex is take as start_vertex 1199 """ 1200 self.criteria_sets = criteria_sets 1201 if vertex_tags is None: 1202 vertex_tags = {} 1203 self.vertex_tags = vertex_tags 1204 # get the essential information from the pattern_graph: 1205 if start_vertex is None: 1206 self.start_vertex = pattern_graph.central_vertex 1207 else: 1208 self.start_vertex = start_vertex 1209 self._set_pattern_graph(pattern_graph) 1210 Pattern.__init__(self) 1211 1212 def _set_pattern_graph(self, pattern_graph): 1213 """Initialize the pattern_graph""" 1214 self.pattern_graph = pattern_graph 1215 self.level_edges = {} 1216 self.level_constraints = {} 1217 self.duplicate_checks = set([]) 1218 if pattern_graph is None: 1219 return 1220 if len(pattern_graph.independent_vertices) != 1: 1221 raise ValueError("A pattern_graph must not be a disconnected " 1222 "graph.") 1223 # A) the levels for the incremental pattern matching 1224 ibfe = self.pattern_graph.iter_breadth_first_edges(self.start_vertex) 1225 for edge, distance, constraint in ibfe: 1226 if constraint: 1227 l = self.level_constraints.setdefault(distance-1, []) 1228 else: 1229 l = self.level_edges.setdefault(distance, []) 1230 l.append(edge) 1231 #print "level_edges", self.level_edges 1232 #print "level_constraints", self.level_constraints 1233 # B) The comparisons the should be checked when one wants to avoid 1234 # symmetrically duplicate pattern matches 1235 if self.criteria_sets is not None: 1236 for cycles in pattern_graph.symmetry_cycles: 1237 if len(cycles) > 0: 1238 self.duplicate_checks.add((cycles[0][0], cycles[0][1])) 1239 1240 1241 def iter_initial_relations(self, subject_graph): 1242 """Iterate over all valid initial relations for a match""" 1243 vertex0 = self.start_vertex 1244 for vertex1 in range(subject_graph.num_vertices): 1245 if self.compare(vertex0, vertex1, subject_graph): 1246 yield vertex0, vertex1 1247 1248 def get_new_edges(self, level): 1249 """Get new edges from the pattern graph for the graph search algorithm 1250 1251 The level argument denotes the distance of the new edges from the 1252 starting vertex in the pattern graph. 1253 """ 1254 return ( 1255 self.level_edges.get(level, []), 1256 self.level_constraints.get(level, []) 1257 ) 1258 1259 def check_next_match(self, match, new_relations, subject_graph, one_match): 1260 """Check if the (onset for a) match can be a valid""" 1261 # only returns true for ecaxtly one set of new_relations from all the 1262 # ones that are symmetrically equivalent 1263 if not (self.criteria_sets is None or one_match): 1264 for check in self.duplicate_checks: 1265 vertex_a = new_relations.get(check[0]) 1266 vertex_b = new_relations.get(check[1]) 1267 if vertex_a is None and vertex_b is None: 1268 continue # if this pair is completely absent in the new 1269 # relations, it is either completely in the match or it 1270 # is to be matched. So it is either already checked for 1271 # symmetry duplicates, or it will be check in future. 1272 if vertex_a is None: 1273 # maybe vertex_a is in the match and vertex_b is the only 1274 # one in the new relations. try to get vertex_a from the 1275 # match. 1276 vertex_a = match.forward.get(check[0]) 1277 if vertex_a is None: 1278 # ok, vertex_a is to be found, don't care about it right 1279 # now. it will be checked in future calls. 1280 continue 1281 elif vertex_b is None: 1282 # maybe vertex_b is in the match and vertex_a is the only 1283 # one in the new relations. try to get vertex_b from the 1284 # match. 1285 vertex_b = match.forward.get(check[1]) 1286 if vertex_b is None: 1287 # ok, vertex_b is to be found, don't care about it right 1288 # now. it will be checked in future calls. 1289 continue 1290 if vertex_a > vertex_b: 1291 # Why does this work? The answer is not so easy to explain, 1292 # and certainly not easy to find. if vertex_a > vertex_b, it 1293 # means that there is a symmetry operation that leads to 1294 # an equivalent match where vertex_b < vertex_a. The latter 1295 # match is preferred for as much pairs (vertex_a, vertex_b) 1296 # as possible without rejecting all possible matches. The 1297 # real difficulty is to construct a proper list of 1298 # (vertex_a, vertex_b) pairs that will reject all but one 1299 # matches. I conjecture that this list contains all the 1300 # first two vertices from each normalized symmetry cycle of 1301 # the pattern graph. We need a math guy to do the proof. -- Toon 1302 return False 1303 return True 1304 return True 1305 1306 def complete(self, match, subject_graph): 1307 """Return True of the match is complete""" 1308 return len(match) == self.pattern_graph.num_vertices 1309 1310 def iter_final_matches(self, canonical_match, subject_graph, one_match): 1311 """Given a match, iterate over all related equivalent matches 1312 1313 When criteria sets are defined, the iterator runs over all symmetric 1314 equivalent matches that fulfill one of the criteria sets. When not 1315 criteria sets are defined, the iterator only yields the input match. 1316 """ 1317 if self.criteria_sets is None or one_match: 1318 yield canonical_match 1319 else: 1320 for criteria_set in self.criteria_sets: 1321 satisfied_match_tags = set([]) 1322 for symmetry in self.pattern_graph.symmetries: 1323 final_match = canonical_match * symmetry 1324 #print final_match 1325 if criteria_set.test_match(final_match, self.pattern_graph, subject_graph): 1326 match_tags = tuple( 1327 self.vertex_tags.get(symmetry.reverse[vertex0]) 1328 for vertex0 1329 in range(self.pattern_graph.num_vertices) 1330 ) 1331 if match_tags not in satisfied_match_tags: 1332 final_match.__dict__.update(criteria_set.info) 1333 yield final_match 1334 satisfied_match_tags.add(match_tags) 1335 1336 1337class EqualMatch(Match): 1338 """A Match object with specialized functions for the EqualPattern""" 1339 def get_closed_cycles(self): 1340 """Return the closed cycles corresponding to this permutation 1341 1342 The cycle will be normalized to facilitate the elimination of 1343 duplicates. The following is guaranteed: 1344 1345 1) If this permutation is represented by disconnected cycles, the 1346 cycles will be sorted by the lowest index they contain. 1347 2) Each cycle starts with its lowest index. (unique starting point) 1348 3) Singletons are discarded. (because they are boring) 1349 """ 1350 # A) construct all the cycles 1351 closed_cycles = [] 1352 todo = set(self.forward.keys()) 1353 if todo != set(self.forward.values()): 1354 raise GraphError("The subject and pattern graph must have the same " 1355 "numbering.") 1356 current_vertex = None 1357 while len(todo) > 0: 1358 if current_vertex == None: 1359 current_vertex = todo.pop() 1360 current_cycle = [] 1361 else: 1362 todo.discard(current_vertex) 1363 current_cycle.append(current_vertex) 1364 next_vertex = self.get_destination(current_vertex) 1365 if next_vertex == current_cycle[0]: 1366 if len(current_cycle) > 1: 1367 # bring the lowest element in front 1368 pivot = np.argmin(current_cycle) 1369 current_cycle = current_cycle[pivot:] + \ 1370 current_cycle[:pivot] 1371 closed_cycles.append(current_cycle) 1372 current_vertex = None 1373 else: 1374 current_vertex = next_vertex 1375 # B) normalize the cycle representation 1376 closed_cycles.sort() # a normal sort is sufficient because only the 1377 # first item of each cycle is considered 1378 1379 # transform the structure into a tuple of tuples 1380 closed_cycles = tuple(tuple(cycle) for cycle in closed_cycles) 1381 return closed_cycles 1382 1383 1384class EqualPattern(CustomPattern): 1385 """Like CustomPattern, but the pattern has the same size as the subject graph""" 1386 sub = False 1387 MatchClass = EqualMatch 1388 1389 def __init__(self, pattern_graph): 1390 """See :meth:`CustomPattern.__init__`""" 1391 # Don't allow criteria sets and vertex_tags. This limitation is due to 1392 # the compare method below. TODO: Is this a good idea? 1393 CustomPattern.__init__(self, pattern_graph) 1394 1395 def iter_initial_relations(self, subject_graph): 1396 """Iterate over all valid initial relations for a match""" 1397 if self.pattern_graph.num_edges != subject_graph.num_edges: 1398 return # don't even try 1399 for pair in CustomPattern.iter_initial_relations(self, subject_graph): 1400 yield pair 1401 1402 def compare(self, vertex0, vertex1, subject_graph): 1403 """Returns true when the two vertices are of the same kind""" 1404 return ( 1405 self.pattern_graph.vertex_fingerprints[vertex0] == 1406 subject_graph.vertex_fingerprints[vertex1] 1407 ).all() 1408 1409 1410class RingPattern(Pattern): 1411 """A pattern that matches strong rings up to a given size""" 1412 1413 def __init__(self, max_size): 1414 """ 1415 Argument: 1416 | ``max_size`` -- the maximum number of vertices in a ring 1417 """ 1418 if max_size < 3: 1419 raise ValueError("Ring sizes must be at least 3.") 1420 self.max_size = max_size 1421 Pattern.__init__(self) 1422 1423 def iter_initial_relations(self, subject_graph): 1424 """Iterate over all valid initial relations for a match""" 1425 vertex0 = 0 1426 for vertex1 in range(subject_graph.num_vertices): 1427 yield vertex0, vertex1 1428 1429 def get_new_edges(self, level): 1430 """Get new edges from the pattern graph for the graph search algorithm 1431 1432 The level argument denotes the distance of the new edges from the 1433 starting vertex in the pattern graph. 1434 """ 1435 if level == 0: 1436 edges0 = [(0, 1), (0, 2)] 1437 elif level >= (self.max_size-1)//2: 1438 edges0 = [] 1439 else: 1440 l2 = level*2 1441 edges0 = [(l2-1, l2+1), (l2, l2+2)] 1442 return edges0, [] 1443 1444 def check_next_match(self, match, new_relations, subject_graph, one_match): 1445 """Check if the (onset for a) match can be a valid (part of a) ring""" 1446 # avoid duplicate rings (order of traversal) 1447 if len(match) == 3: 1448 if match.forward[1] < match.forward[2]: 1449 #print "RingPattern.check_next_match: duplicate order", match.forward[1], match.forward[2] 1450 return False 1451 # avoid duplicate rings (starting point) 1452 for vertex1 in new_relations.values(): 1453 if vertex1 < match.forward[0]: 1454 #print "RingPattern.check_next_match: duplicate start", vertex1, match.forward[0] 1455 return False 1456 # can this ever become a strong ring? 1457 for vertex1 in new_relations.values(): 1458 paths = list(subject_graph.iter_shortest_paths(vertex1, match.forward[0])) 1459 if len(paths) != 1: 1460 #print "RingPattern.check_next_match: not strong 1" 1461 return False 1462 if len(paths[0]) != (len(match)+1)//2: 1463 #print "RingPattern.check_next_match: not strong 2" 1464 return False 1465 return True 1466 1467 def complete(self, match, subject_graph): 1468 """Check the completeness of a ring match""" 1469 size = len(match) 1470 # check whether we have an odd strong ring 1471 if match.forward[size-1] in subject_graph.neighbors[match.forward[size-2]]: 1472 # we have an odd closed cycle. check if this is a strong ring 1473 order = list(range(0, size, 2)) + list(range(1, size-1, 2))[::-1] 1474 ok = True 1475 for i in range(len(order)//2): 1476 # Count the number of paths between two opposite points in the 1477 # ring. Since the ring has an odd number of vertices, each 1478 # vertex has two semi-opposite vertices. 1479 count = len(list(subject_graph.iter_shortest_paths( 1480 match.forward[order[i]], 1481 match.forward[order[(i+size//2)%size]] 1482 ))) 1483 if count > 1: 1484 ok = False 1485 break 1486 count = len(list(subject_graph.iter_shortest_paths( 1487 match.forward[order[i]], 1488 match.forward[order[(i+size//2+1)%size]] 1489 ))) 1490 if count > 1: 1491 ok = False 1492 break 1493 if ok: 1494 match.ring_vertices = tuple(match.forward[i] for i in order) 1495 #print "RingPattern.complete: found odd ring" 1496 return True 1497 #print "RingPattern.complete: no odd ring" 1498 # check whether we have an even strong ring 1499 paths = list(subject_graph.iter_shortest_paths( 1500 match.forward[size-1], 1501 match.forward[size-2] 1502 )) 1503 #print "RingPattern.complete: even paths", paths 1504 if (size > 3 and len(paths) == 1 and len(paths[0]) == 3) or \ 1505 (size == 3 and len(paths) == 2 and len(paths[0]) == 3): 1506 path = paths[0] 1507 if size == 3 and path[1] == match.forward[0]: 1508 path = paths[1] 1509 # we have an even closed cycle. check if this is a strong ring 1510 match.add_relation(size, path[1]) 1511 size += 1 1512 order = list(range(0, size, 2)) + list(range(size-1, 0, -2)) 1513 ok = True 1514 for i in range(len(order)//2): 1515 count = len(list(subject_graph.iter_shortest_paths( 1516 match.forward[order[i]], 1517 match.forward[order[(i+size//2)%size]] 1518 ))) 1519 if count != 2: 1520 ok = False 1521 break 1522 if ok: 1523 # also check if this does not violate the requirement for a 1524 # unique origin: 1525 if match.forward[size-1] < match.forward[0]: 1526 ok = False 1527 if not ok: 1528 vertex1 = match.forward[size-1] 1529 del match.forward[size-1] 1530 del match.reverse[vertex1] 1531 size -= 1 1532 #print "RingPattern.complete: no even ring" 1533 else: 1534 match.ring_vertices = tuple(match.forward[i] for i in order) 1535 #print "RingPattern.complete: found even ring" 1536 return ok 1537 #print "RingPattern.complete: not at all" 1538 return False 1539 1540 1541class GraphSearch(object): 1542 """An algorithm that searches for all matches of a pattern in a graph 1543 1544 Usage: 1545 1546 >>> gs = GraphSearch(pattern) 1547 >>> for match in gs(graph): 1548 ... print match.forward 1549 """ 1550 1551 def __init__(self, pattern, debug=False): 1552 """ 1553 Arguments: 1554 | ``pattern`` -- A Pattern instance, describing the pattern to 1555 look for 1556 | ``debug`` -- When true, debugging info is printed on screen 1557 [default=False] 1558 """ 1559 self.pattern = pattern 1560 self.debug = debug 1561 1562 def __call__(self, subject_graph, one_match=False): 1563 """Iterator over all matches of self.pattern in the given graph. 1564 1565 Arguments: 1566 | subject_graph -- The subject_graph in which the matches 1567 according to self.pattern have to be found. 1568 | one_match -- If True, only one match will be returned. This 1569 allows certain optimizations. 1570 """ 1571 # Matches are grown iteratively. 1572 for vertex0, vertex1 in self.pattern.iter_initial_relations(subject_graph): 1573 init_match = self.pattern.MatchClass.from_first_relation(vertex0, vertex1) 1574 # init_match cotains only one source -> dest relation. starting from 1575 # this initial match, the function iter_matches extends the match 1576 # in all possible ways and yields the completed matches 1577 for canonical_match in self._iter_matches(init_match, subject_graph, one_match): 1578 # Some patterns my exclude symmetrically equivalent matches as 1579 # to aviod dupplicates. with such a 'canonical' solution, 1580 # the pattern is allowed to generate just those symmatrical 1581 # duplicates of interest. 1582 ifm = self.pattern.iter_final_matches(canonical_match, subject_graph, one_match) 1583 for final_match in ifm: 1584 self.print_debug("final_match: %s" % final_match) 1585 yield final_match 1586 if one_match: return 1587 1588 def print_debug(self, text, indent=0): 1589 """Only prints debug info on screen when self.debug == True.""" 1590 if self.debug: 1591 if indent > 0: 1592 print(" "*self.debug, text) 1593 self.debug += indent 1594 if indent <= 0: 1595 print(" "*self.debug, text) 1596 1597 def _iter_candidate_groups(self, init_match, edges0, edges1): 1598 """Divide the edges into groups""" 1599 # collect all end vertices0 and end vertices1 that belong to the same 1600 # group. 1601 sources = {} 1602 for start_vertex0, end_vertex0 in edges0: 1603 l = sources.setdefault(start_vertex0, []) 1604 l.append(end_vertex0) 1605 dests = {} 1606 for start_vertex1, end_vertex1 in edges1: 1607 start_vertex0 = init_match.reverse[start_vertex1] 1608 l = dests.setdefault(start_vertex0, []) 1609 l.append(end_vertex1) 1610 for start_vertex0, end_vertices0 in sources.items(): 1611 end_vertices1 = dests.get(start_vertex0, []) 1612 yield end_vertices0, end_vertices1 1613 1614 1615 def _iter_new_relations(self, init_match, subject_graph, edges0, constraints0, edges1): 1616 """Given an onset for a match, iterate over all possible new key-value pairs""" 1617 # Count the number of unique edges0[i][1] values. This is also 1618 # the number of new relations. 1619 num_new_relations = len(set(j for i, j in edges0)) 1620 1621 def combine_small(relations, num): 1622 """iterate over all compatible combinations within one set of relations""" 1623 if len(relations) == 0: 1624 return 1625 for i, pivot in enumerate(relations): 1626 if num == 1: 1627 yield (pivot, ) 1628 else: 1629 compatible_relations = list( 1630 item for item in relations[:i] 1631 if pivot[0]!=item[0] and pivot[1]!=item[1] 1632 ) 1633 for tail in combine_small(compatible_relations, num-1): 1634 yield (pivot, ) + tail 1635 1636 # generate candidate relations 1637 candidate_relations = [] 1638 icg = self._iter_candidate_groups(init_match, edges0, edges1) 1639 for end_vertices0, end_vertices1 in icg: 1640 if len(end_vertices0) > len(end_vertices1): 1641 return # this can never work, the subject graph is 'too small' 1642 elif not self.pattern.sub and \ 1643 len(end_vertices0) != len(end_vertices1): 1644 return # an exact match is sought, this can never work 1645 l = [] 1646 for end_vertex0 in end_vertices0: 1647 for end_vertex1 in end_vertices1: 1648 if self.pattern.compare(end_vertex0, end_vertex1, subject_graph): 1649 l.append((end_vertex0, end_vertex1)) 1650 # len(end_vertices0) = the total number of relations that must be 1651 # made in this group 1652 if len(l) > 0: 1653 # turn l into a list of sets of internally compatible candidate 1654 # relations in this group 1655 l = list(combine_small(l, len(end_vertices0))) 1656 candidate_relations.append(l) 1657 if len(candidate_relations) == 0: 1658 return 1659 self.print_debug("candidate_relations: %s" % candidate_relations) 1660 1661 def combine_big(pos=0): 1662 """Iterate over all possible sets of relations""" 1663 # pos is an index in candidate_relations 1664 crs = candidate_relations[pos] 1665 if pos == len(candidate_relations)-1: 1666 for relations in crs: 1667 yield relations 1668 else: 1669 for tail in combine_big(pos+1): 1670 for relations in crs: 1671 yield relations + tail 1672 1673 # final loop 1674 for new_relations in combine_big(): 1675 new_relations = set(new_relations) 1676 self.print_debug("new_relations: %s" % (new_relations, )) 1677 # check the total number of new relations 1678 if len(new_relations) != num_new_relations: 1679 continue 1680 # check sanity of relations 1681 forward = dict(new_relations) 1682 if len(forward) != num_new_relations: 1683 continue 1684 reverse = dict((j, i) for i, j in new_relations) 1685 if len(reverse) != num_new_relations: 1686 continue 1687 # check the constraints 1688 for a0, b0 in constraints0: 1689 if forward[a0] not in subject_graph.neighbors[forward[b0]]: 1690 forward = None 1691 break 1692 if forward is None: 1693 continue 1694 yield forward 1695 1696 def _iter_matches(self, input_match, subject_graph, one_match, level=0): 1697 """Given an onset for a match, iterate over all completions of that match 1698 1699 This iterator works recursively. At each level the match is extended 1700 with a new set of relations based on vertices in the pattern graph 1701 that are at a distances 'level' from the starting vertex 1702 """ 1703 self.print_debug("ENTERING _ITER_MATCHES", 1) 1704 self.print_debug("input_match: %s" % input_match) 1705 # A) collect the new edges in the pattern graph and the subject graph 1706 # to extend the match. 1707 # 1708 # Note that the edges are ordered. edge[0] is always in the match. 1709 # edge[1] is never in the match. The constraints contain information 1710 # about the end points of edges0. It is a list of two-tuples where 1711 # (a, b) means that a and b must be connected. 1712 # 1713 # Second note: suffix 0 indicates the pattern graph and suffix 1 1714 # is used for the subject graph. 1715 edges0, constraints0 = self.pattern.get_new_edges(level) 1716 edges1 = input_match.get_new_edges(subject_graph) 1717 self.print_debug("edges0: %s" % edges0) 1718 self.print_debug("constraints0: %s" % constraints0) 1719 self.print_debug("edges1: %s" % edges1) 1720 1721 # B) iterate over the sets of new relations: [(vertex0[i], vertex1[j]), 1722 # ...] that contain all endpoints of edges0, that satisfy the 1723 # constraints0 and where (vertex0[i], vertex1[j]) only occurs if these 1724 # are end points of a edge0 and edge1 whose starting points are already 1725 # in init_match. These conditions are implemented in an iterator as to 1726 # separate concerns. This iterator also calls the routines that check 1727 # whether vertex1[j] also satisfies additional conditions inherent 1728 # vertex0[i]. 1729 inr = self._iter_new_relations(input_match, subject_graph, edges0, 1730 constraints0, edges1) 1731 for new_relations in inr: 1732 # for each set of new_relations, construct a next_match and recurse 1733 next_match = input_match.copy_with_new_relations(new_relations) 1734 if not self.pattern.check_next_match(next_match, new_relations, subject_graph, one_match): 1735 continue 1736 if self.pattern.complete(next_match, subject_graph): 1737 yield next_match 1738 else: 1739 for match in self._iter_matches(next_match, subject_graph, one_match, level+1): 1740 yield match 1741 self.print_debug("LEAVING_ITER_MATCHES", -1) 1742