1# -*- coding: utf-8 -*-
2# MolMod is a collection of molecular modelling tools for python.
3# Copyright (C) 2007 - 2019 Toon Verstraelen <Toon.Verstraelen@UGent.be>, Center
4# for Molecular Modeling (CMM), Ghent University, Ghent, Belgium; all rights
5# reserved unless otherwise stated.
6#
7# This file is part of MolMod.
8#
9# MolMod is free software; you can redistribute it and/or
10# modify it under the terms of the GNU General Public License
11# as published by the Free Software Foundation; either version 3
12# of the License, or (at your option) any later version.
13#
14# MolMod is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program; if not, see <http://www.gnu.org/licenses/>
21#
22# --
23#
24"""Graph data structure, analysis and pattern matching.
25
26   Some distinctive features include:
27
28   * Iterating over all shortest paths between two vertices (Dijkstra). See
29     http://en.wikipedia.org/wiki/Dijkstra's_algorithm for more info.
30   * Iterating over vertices or edges using the Breadth First convention. See
31     http://en.wikipedia.org/wiki/Breadth-first_search for more info.
32   * The all pairs shortest path matrix (Floyd-Warshall). See
33     http://en.wikipedia.org/wiki/Floyd-Warshall_algorithm for more info.
34   * Symmetry analysis of graphs (automorphisms). The Graph class can generate a
35     list of permutations between vertices that map the graph onto itself. This
36     can be used to generate (and test) all possible geometric symmetries in a
37     molecule. See http://en.wikipedia.org/wiki/Graph_automorphism for more
38     info.
39   * Scanning a graph for patterns.
40     The GraphSearch is a generic class that can scan a graph for certain
41     patterns, e.g. given pattern_graphs, strong rings, isomorphisms,
42     automorphisms, ... The pattern_graph can deal with (multiple sets of)
43     additional conditions that must be satisfied, such as "give me all
44     dihedral angles where the central atoms are carbons" without duplicates.
45
46   The central class in this module is 'Graph'. It caches most of the analysis
47   results, which implies that the graph structure can not be changed once the
48   object is created. If you feel the need to do this, just construct a new
49   graph object.
50"""
51
52
53from __future__ import print_function, division
54
55import sys
56from builtins import range
57import copy
58
59import numpy as np
60
61from molmod.utils import cached, ReadOnly, ReadOnlyAttribute
62
63
64__all__ = [
65    "GraphError", "Graph", "OneToOne", "Match", "Pattern",
66    "CriteriaSet", "Anything", "CritOr", "CritAnd", "CritXor", "CritNot",
67    "CustomPattern", "EqualPattern", "RingPattern", "GraphSearch",
68]
69
70
71class GraphError(Exception):
72    """Raised when something goes wrong in one of the graph algorithms"""
73    pass
74
75
76class Graph(ReadOnly):
77    """An undirected graph, where edges have equal weight
78
79       The edges attribute is the most elementary and is always available. All
80       other attributes are optional.
81
82       Graphs are meant to be immutable objects. Once created they are not
83       supposed to be modified. If you want an adapted graph, create a new
84       object. The main reason is that all graph analysis work is cached in this
85       object. When the edges change, the cache becomes invalid and should be
86       erased. The latter is not supported as it is easier to create just a new
87       graph object with the modified edges.
88
89       If you want to associate data with vertices or edges, add custom
90       dictionary or list attributes to the graph object. This is the way to
91       work attributes that are not applicable to all vertices or edges:
92
93       >>> graph.vertex_property = {vertex1: blablabla, vertex2: blablabla, ...}
94       >>> graph.edge_property = {frozenset([vertex1, vertex2]): blablabla, ..}
95
96       If a certain property applies to all vertices or edges, it is sometimes
97       more practical to work with lists or ``numpy`` arrays that have the same
98       ordering as the vertices or the edges:
99
100       >>> # atom numbers of ethene
101       >>> graph.vertex_property = np.array([6, 6, 1, 1, 1, 1], int)
102       >>> # bond orders of ethene
103       >>> graph.edge_property = np.array([2, 1, 1, 1, 1], int)
104    """
105    edges = ReadOnlyAttribute(tuple, none=False, doc="the incidence list")
106    num_vertices = ReadOnlyAttribute(int, none=False, doc="the number of vertices")
107
108    def __init__(self, edges, num_vertices=None):
109        """
110           Arguments:
111            | ``edges`` -- ``tuple(frozenset([vertex1, vertex2]), ...)``
112            | ``num_vertices`` -- number of vertices
113
114           vertex1 to vertexN must be integers from 0 to N-1. If vertices above
115           the highest vertex value are not connected by edges, use the
116           num_vertices argument to tell what the total number of vertices is.
117
118           If the edges argument does not have the correct format, it will be
119           converted.
120        """
121
122        tmp = []
123        for edge in edges:
124            if len(edge) != 2:
125                raise TypeError("The edges must be a iterable with 2 elements")
126            i, j = edge
127            i = int(i)
128            j = int(j)
129            if i == j:
130                raise ValueError("A edge must contain two different values.")
131            if i < 0 or j < 0:
132                raise TypeError("The edges must contain positive integers.")
133            tmp.append(frozenset([i, j]))
134        edges = tuple(tmp)
135
136        if len(edges) == 0:
137            real_num_vertices = 0
138        else:
139            real_num_vertices = max(max(a, b) for a, b in edges)+1
140        if num_vertices is not None:
141            if not isinstance(num_vertices, int):
142                raise TypeError("The optional argument num_vertices must be an "
143                    "integer when given.")
144            if num_vertices < real_num_vertices:
145                raise ValueError("num_vertices must be equal or larger to the "
146                    "number of vertices deduced from the edge list.")
147            real_num_vertices = num_vertices
148
149        self.edges = edges
150        self.num_vertices = real_num_vertices
151
152    num_edges = property(lambda self: len(self.edges),
153        doc="*Read-only attribute:* the number of edges in the graph.")
154
155    def __mul__(self, repeat):
156        """Construct a graph that repeats this graph a number of times
157
158           Arguments:
159            | ``repeat`` -- The number of repetitions.
160        """
161        if not isinstance(repeat, int):
162            raise TypeError("Can only multiply a graph with an integer")
163        new_edges = []
164        for i in range(repeat):
165            for vertex1, vertex2 in self.edges:
166                new_edges.append(frozenset([
167                    vertex1+i*self.num_vertices,
168                    vertex2+i*self.num_vertices
169                ]))
170        return Graph(new_edges, self.num_vertices*repeat)
171
172    __rmul__ = __mul__
173
174    def __str__(self):
175        return " ".join("%i-%i" % tuple(sorted([i, j])) for i, j in self.edges)
176
177    # functions that should be implemented by derived classes
178
179    def get_vertex_string(self, i):
180        """Returns a string that fully characterizes the nature of the vertex
181
182           In case of an ordinary graph, all vertices are the same.
183        """
184        return ""
185
186    def get_edge_string(self, i):
187        """Returns a string that fully characterizes the nature of an edge
188
189           In case of an ordinary graph, all edges are the same.
190        """
191        return ""
192
193    # cached attributes:
194
195    @cached
196    def edge_index(self):
197        """A map to look up the index of a edge"""
198        return dict((edge, index) for index, edge in enumerate(self.edges))
199
200    @cached
201    def neighbors(self):
202        """A dictionary with neighbors
203
204           The dictionary will have the following form:
205           ``{vertexX: (vertexY1, vertexY2, ...), ...}``
206           This means that vertexX and vertexY1 are connected etc. This also
207           implies that the following elements are part of the dictionary:
208           ``{vertexY1: (vertexX, ...), vertexY2: (vertexX, ...), ...}``.
209        """
210        neighbors = dict(
211            (vertex, []) for vertex
212            in range(self.num_vertices)
213        )
214        for a, b in self.edges:
215            neighbors[a].append(b)
216            neighbors[b].append(a)
217        # turn lists into frozensets
218        neighbors = dict((key, frozenset(val)) for key, val in neighbors.items())
219        return neighbors
220
221    @cached
222    def distances(self):
223        """The matrix with the all-pairs shortest path lenghts"""
224        from molmod.ext import graphs_floyd_warshall
225        distances = np.zeros((self.num_vertices,)*2, dtype=int)
226        #distances[:] = -1 # set all -1, which is just a very big integer
227        #distances.ravel()[::len(distances)+1] = 0 # set diagonal to zero
228        for i, j in self.edges: # set edges to one
229            distances[i, j] = 1
230            distances[j, i] = 1
231        graphs_floyd_warshall(distances)
232        return distances
233
234    @cached
235    def max_distance(self):
236        """The maximum value in the distances matrix."""
237        if self.distances.shape[0] == 0:
238            return 0
239        else:
240            return self.distances.max()
241
242    @cached
243    def central_vertices(self):
244        """Vertices that have the lowest maximum distance to any other vertex"""
245        max_distances = self.distances.max(0)
246        max_distances_min = max_distances[max_distances > 0].min()
247        return (max_distances == max_distances_min).nonzero()[0]
248
249    @cached
250    def central_vertex(self):
251        """The vertex that has the lowest maximum distance to any other vertex
252
253           This definition does not lead to a unique solution. One arbitrary
254           solution is selected.
255        """
256        return self.central_vertices[0]
257
258    @cached
259    def independent_vertices(self):
260        """Lists of vertices that are only interconnected within each list
261
262           This means that there is no path from a vertex in one list to a
263           vertex in another list. In case of a molecular graph, this would
264           yield the atoms that belong to individual molecules.
265        """
266        candidates = set(range(self.num_vertices))
267
268        result = []
269        while len(candidates) > 0:
270            pivot = candidates.pop()
271            group = [
272                vertex for vertex, distance
273                in self.iter_breadth_first(pivot)
274            ]
275            candidates.difference_update(group)
276
277            # this sort makes sure that the order of the vertices is respected
278            group.sort()
279            result.append(group)
280        return result
281
282    @cached
283    def fingerprint(self):
284        """A total graph fingerprint
285
286           The result is invariant under permutation of the vertex indexes. The
287           chance that two different (molecular) graphs yield the same
288           fingerprint is small but not zero. (See unit tests.)"""
289        if self.num_vertices == 0:
290            return np.zeros(20, np.ubyte)
291        else:
292            return sum(self.vertex_fingerprints)
293
294    @cached
295    def vertex_fingerprints(self):
296        """A fingerprint for each vertex
297
298           The result is invariant under permutation of the vertex indexes.
299           Vertices that are symmetrically equivalent will get the same
300           fingerprint, e.g. the hydrogens in methane would get the same
301           fingerprint.
302        """
303        return self.get_vertex_fingerprints(
304            [self.get_vertex_string(i) for i in range(self.num_vertices)],
305            [self.get_edge_string(i) for i in range(self.num_edges)],
306        )
307
308    @cached
309    def equivalent_vertices(self):
310        """A dictionary with symmetrically equivalent vertices."""
311        level1 = {}
312        for i, row in enumerate(self.vertex_fingerprints):
313            key = row.tobytes()
314            l = level1.get(key)
315            if l is None:
316                l = set([i])
317                level1[key] = l
318            else:
319                l.add(i)
320        level2 = {}
321        for key, vertices in level1.items():
322            for vertex in vertices:
323                level2[vertex] = vertices
324        return level2
325
326    @cached
327    def symmetries(self):
328        """Graph symmetries (permutations) that map the graph onto itself."""
329
330        symmetry_cycles = set([])
331        symmetries = set([])
332        for match in GraphSearch(EqualPattern(self))(self):
333            match.cycles = match.get_closed_cycles()
334            if match.cycles in symmetry_cycles:
335                raise RuntimeError("Duplicates in EqualMatch")
336            symmetry_cycles.add(match.cycles)
337            symmetries.add(match)
338        return symmetries
339
340    @cached
341    def symmetry_cycles(self):
342        """The cycle representations of the graph symmetries"""
343        result = set([])
344        for symmetry in self.symmetries:
345            result.add(symmetry.cycles)
346        return result
347
348    @cached
349    def canonical_order(self):
350        """The vertices in a canonical or normalized order.
351
352           This routine will return a list of vertices in an order that does not
353           depend on the initial order, but only depends on the connectivity and
354           the return values of the function self.get_vertex_string.
355
356           Only the vertices that are involved in edges will be included. The
357           result can be given as first argument to self.get_subgraph, with
358           reduce=True as second argument. This will return a complete canonical
359           graph.
360
361           The routine is designed not to use symmetry relations that are
362           obtained with the GraphSearch routine. We also tried to create an
363           ordering that feels like natural, i.e. starting in the center and
364           pushing vertices with few equivalents to the front. If necessary, the
365           nature of the vertices and  their bonds to atoms closer to the center
366           will also play a role, but only as a last resort.
367        """
368        # A) find an appropriate starting vertex.
369        # Here we take a central vertex that has a minimal number of symmetrical
370        # equivalents, 'the highest atom number', and the highest fingerprint.
371        # Note that the symmetrical equivalents are computed from the vertex
372        # fingerprints, i.e. without the GraphSearch.
373        starting_vertex = max(
374            (
375                -len(self.equivalent_vertices[vertex]),
376                self.get_vertex_string(vertex),
377                self.vertex_fingerprints[vertex].tobytes(),
378                vertex
379            ) for vertex in self.central_vertices
380        )[-1]
381
382        # B) sort all vertices based on
383        #      1) distance from central vertex
384        #      2) number of equivalent vertices
385        #      3) vertex string, (higher atom numbers come first)
386        #      4) fingerprint
387        #      5) vertex index
388        # The last field is only included to collect the result of the sort.
389        # The fingerprint on itself would be sufficient, but the three first are
390        # there to have a naturally appealing result.
391        l = [
392            [
393                -distance,
394                -len(self.equivalent_vertices[vertex]),
395                self.get_vertex_string(vertex),
396                self.vertex_fingerprints[vertex].tobytes(),
397                vertex
398            ] for vertex, distance in self.iter_breadth_first(starting_vertex)
399            if len(self.neighbors[vertex]) > 0
400        ]
401        l.sort(reverse=True)
402
403        # C) The order of some vertices is still not completely set. e.g.
404        # consider the case of allene. The four hydrogen atoms are equivalent,
405        # but one can have two different orders: make geminiles consecutive or
406        # don't. It is more trikcy than one would think at first sight. In the
407        # case of allene, geminility could easily solve the problem. Consider a
408        # big flat rotationally symmetric molecule (order 2). The first five
409        # shells are order 4 and one would just give a random order to four
410        # segemnts in the first shell. Only when one reaches the outer part that
411        # has order two, it turns out that the arbitrary choices in the inner
412        # shell play a role. So it does not help to look at relations with
413        # vertices at inner or current shells only. One has to consider the
414        # whole picture. (unit testing reveals troubles like these)
415
416        # I need some sleep now. The code below checks for potential fuzz and
417        # will raise an error if the ordering is not fully determined yet. One
418        # day, I'll need this code more than I do now, and I'll fix things up.
419        # I know how to do this, but I don't care enough right now.
420        # -- Toon
421        for i in range(1, len(l)):
422            if l[i][:-1] == l[i-1][:-1]:
423                raise NotImplementedError
424
425        # D) Return only the vertex indexes.
426        return [record[-1] for record in l]
427
428    # other usefull graph functions
429
430    def iter_breadth_first(self, start=None, do_paths=False, do_duplicates=False):
431        """Iterate over the vertices with the breadth first algorithm.
432
433           See http://en.wikipedia.org/wiki/Breadth-first_search for more info.
434           If not start vertex is given, the central vertex is taken.
435
436           By default, the distance to the starting vertex is also computed. If
437           the path to the starting vertex should be computed instead, set path
438           to True.
439
440           When duplicate is True, then vertices that can be reached through
441           different  paths of equal length, will be iterated twice. This
442           typically only makes sense when path==True.
443        """
444        if start is None:
445            start = self.central_vertex
446        else:
447            try:
448                start = int(start)
449            except ValueError:
450                raise TypeError("First argument (start) must be an integer.")
451            if start < 0 or start >= self.num_vertices:
452                raise ValueError("start must be in the range [0, %i[" %
453                                 self.num_vertices)
454        from collections import deque
455        work = np.zeros(self.num_vertices, int)
456        work[:] = -1
457        work[start] = 0
458        if do_paths:
459            result = (start, 0, (start, ))
460        else:
461            result = (start, 0)
462        yield result
463        todo = deque([result])
464        while len(todo) > 0:
465            if do_paths:
466                parent, parent_length, parent_path = todo.popleft()
467            else:
468                parent, parent_length = todo.popleft()
469            current_length = parent_length + 1
470            for current in self.neighbors[parent]:
471                visited = work[current]
472                if visited == -1 or (do_duplicates and visited == current_length):
473                    work[current] = current_length
474                    if do_paths:
475                        current_path = parent_path + (current, )
476                        result = (current, current_length, current_path)
477                    else:
478                        result = (current, current_length)
479                    #print "iter_breadth_first", result
480                    yield result
481                    todo.append(result)
482
483    def iter_shortest_paths(self, a, b):
484        """Iterate over all the shortest paths between vertex a and b."""
485        max_len = None
486        ibf = self.iter_breadth_first(a, do_paths=True, do_duplicates=True)
487        for vertex, length, path in ibf:
488            if max_len is not None:
489                if length > max_len:
490                    return
491            if vertex == b:
492                max_len = length
493                yield path
494
495    def iter_breadth_first_edges(self, start=None):
496        """Iterate over the edges with the breadth first convention.
497
498           We need this for the pattern matching algorithms, but a quick look at
499           Wikipedia did not result in a known and named algorithm.
500
501           The edges are yielded one by one, together with the distance of the
502           edge from the starting vertex and a flag that indicates whether the
503           yielded edge connects two vertices that are at the same distance from
504           the starting vertex. If that flag is False, the distance from the
505           starting vertex to edge[0] is equal to the distance variable and the
506           distance from edge[1] to the starting vertex is equal to distance+1.
507           One item has the following format: ((i, j), distance, flag)
508        """
509        if start is None:
510            start = self.central_vertex
511        else:
512            try:
513                start = int(start)
514            except ValueError:
515                raise TypeError("First argument (start) must be an integer.")
516            if start < 0 or start >= self.num_vertices:
517                raise ValueError("start must be in the range [0, %i[" %
518                                 self.num_vertices)
519        from collections import deque
520        work = np.zeros(self.num_vertices, int)
521        work[:] = -1
522        work[start] = 0
523        todo = deque([start])
524        while len(todo) > 0:
525            parent = todo.popleft()
526            distance = work[parent]
527            for current in self.neighbors[parent]:
528                if work[current] == -1:
529                    yield (parent, current), distance, False
530                    work[current] = distance+1
531                    todo.append(current)
532                elif work[current] == distance and current > parent:
533                    # second equation in elif avoids duplicates
534                    yield (parent, current), distance, True
535                elif work[current] == distance+1:
536                    yield (parent, current), distance, False
537
538    def get_subgraph(self, subvertices, normalize=False):
539        """Constructs a subgraph of the current graph
540
541           Arguments:
542            | ``subvertices`` -- The vertices that should be retained.
543            | ``normalize`` -- Whether or not the vertices should renumbered and
544                 reduced to the given set of subvertices. When True, also the
545                 edges are sorted. It the end, this means that new order of the
546                 edges does not depend on the original order, but only on the
547                 order of the argument subvertices.
548                 This option is False by default. When False, only edges will be
549                 discarded, but the retained data remain unchanged. Also the
550                 parameter num_vertices is not affected.
551
552           The returned graph will have an attribute ``old_edge_indexes`` that
553           relates the positions of the new and the old edges as follows::
554
555             >>> self.edges[result._old_edge_indexes[i]] = result.edges[i]
556
557           In derived classes, the following should be supported::
558
559             >>> self.edge_property[result._old_edge_indexes[i]] = result.edge_property[i]
560
561           When ``normalize==True``, also the vertices are affected and the
562           derived classes should make sure that the following works::
563
564             >>> self.vertex_property[result._old_vertex_indexes[i]] = result.vertex_property[i]
565
566           The attribute ``old_vertex_indexes`` is only constructed when
567           ``normalize==True``.
568        """
569        if normalize:
570            revorder = dict((j, i) for i, j in enumerate(subvertices))
571            new_edges = []
572            old_edge_indexes = []
573            for counter, (i, j) in enumerate(self.edges):
574                new_i = revorder.get(i)
575                if new_i is None:
576                    continue
577                new_j = revorder.get(j)
578                if new_j is None:
579                    continue
580                new_edges.append((new_i, new_j))
581                old_edge_indexes.append(counter)
582            # sort the edges
583            order = list(range(len(new_edges)))
584            # argsort in pure python
585            order.sort( key=(lambda i: tuple(sorted(new_edges[i]))) )
586            new_edges = [new_edges[i] for i in order]
587            old_edge_indexes = [old_edge_indexes[i] for i in order]
588
589            result = Graph(new_edges, num_vertices=len(subvertices))
590            result._old_vertex_indexes = np.array(subvertices, dtype=int)
591            #result.new_vertex_indexes = revorder
592            result._old_edge_indexes = np.array(old_edge_indexes, dtype=int)
593        else:
594            subvertices = set(subvertices)
595            old_edge_indexes = np.array([
596                i for i, edge in enumerate(self.edges)
597                if edge.issubset(subvertices)
598            ], dtype=int)
599            new_edges = tuple(self.edges[i] for i in old_edge_indexes)
600            result = Graph(new_edges, self.num_vertices)
601            result._old_edge_indexes = old_edge_indexes
602            # no need for old and new vertex_indexes because they remain the
603            # same.
604        return result
605
606    def get_vertex_fingerprints(self, vertex_strings, edge_strings, num_iter=None):
607        """Return an array with fingerprints for each vertex"""
608        import hashlib
609        def str2array(x):
610            """convert a hash string to a numpy array of bytes"""
611            if len(x) == 0:
612                return np.zeros(0, np.ubyte)
613            elif sys.version_info[0] == 2:
614                return np.frombuffer(x, np.ubyte)
615            else:
616                return np.frombuffer(x.encode(), np.ubyte)
617        hashrow = lambda x: np.frombuffer(hashlib.sha1(x.data).digest(), np.ubyte)
618        # initialization
619        result = np.zeros((self.num_vertices, 20), np.ubyte)
620        for i in range(self.num_vertices):
621            result[i] = hashrow(str2array(vertex_strings[i]))
622        for i in range(self.num_edges):
623            a, b = self.edges[i]
624            tmp = hashrow(str2array(edge_strings[i]))
625            result[a] += tmp
626            result[b] += tmp
627        work = result.copy()
628        # iterations
629        if num_iter is None:
630            num_iter = self.max_distance
631        for i in range(num_iter):
632            for a, b in self.edges:
633                work[a] += result[b]
634                work[b] += result[a]
635            #for a in xrange(self.num_vertices):
636            #    for b in xrange(self.num_vertices):
637            #        work[a] += hashrow(result[b]*self.distances[a, b])
638            for a in range(self.num_vertices):
639                result[a] = hashrow(work[a])
640        return result
641
642    def get_halfs(self, vertex1, vertex2):
643        """Split the graph in two halfs by cutting the edge: vertex1-vertex2
644
645           If this is not possible (due to loops connecting both ends), a
646           GraphError is raised.
647
648           Returns the vertices in both halfs.
649        """
650        def grow(origin, other):
651            frontier = set(self.neighbors[origin])
652            frontier.discard(other)
653            result = set([origin])
654            while len(frontier) > 0:
655                pivot = frontier.pop()
656                if pivot == other:
657                    raise GraphError("The graph can not be separated in two halfs "
658                                     "by disconnecting vertex1 and vertex2.")
659                pivot_neighbors = set(self.neighbors[pivot])
660                pivot_neighbors -= result
661                frontier |= pivot_neighbors
662                result.add(pivot)
663            return result
664
665        vertex1_part = grow(vertex1, vertex2)
666        vertex2_part = grow(vertex2, vertex1)
667        return vertex1_part, vertex2_part
668
669    def get_part(self, vertex_in, vertices_border):
670        """List all vertices that are connected to vertex_in, but are not
671           included in or 'behind' vertices_border.
672        """
673        vertices_new = set(self.neighbors[vertex_in])
674        vertices_part = set([vertex_in])
675
676        while len(vertices_new) > 0:
677            pivot = vertices_new.pop()
678            if pivot in vertices_border:
679                continue
680            vertices_part.add(pivot)
681            pivot_neighbors = set(self.neighbors[pivot])
682            pivot_neighbors -= vertices_part
683            vertices_new |= pivot_neighbors
684
685        return vertices_part
686
687    def get_halfs_double(self, vertex_a1, vertex_b1, vertex_a2, vertex_b2):
688        """Compute the two parts separated by ``(vertex_a1, vertex_b1)`` and ``(vertex_a2, vertex_b2)``
689
690           Raise a GraphError when ``(vertex_a1, vertex_b1)`` and
691           ``(vertex_a2, vertex_b2)`` do not separate the graph in two
692           disconnected parts. The edges must be neighbors. If not a GraphError
693           is raised. The for vertices must not coincide or a GraphError is
694           raised.
695
696           Returns the vertices of the two halfs and the four 'hinge' vertices
697           in the correct order, i.e. both ``vertex_a1`` and ``vertex_a2`` are
698           in the first half and both ``vertex_b1`` and ``vertex_b2`` are in the
699           second half.
700        """
701        if vertex_a1 not in self.neighbors[vertex_b1]:
702            raise GraphError("vertex_a1 must be a neighbor of vertex_b1.")
703        if vertex_a2 not in self.neighbors[vertex_b2]:
704            raise GraphError("vertex_a2 must be a neighbor of vertex_b2.")
705
706        # find vertex_a_part (and possibly switch vertex_a2, vertex_b2)
707        vertex_a_new = set(self.neighbors[vertex_a1])
708        vertex_a_new.discard(vertex_b1)
709        if vertex_a1 == vertex_b2:
710            # we now that we have to swap vertex_a2 and vertex_b2. The algo
711            # below will fail otherwise in this 'exotic' case.
712            vertex_a2, vertex_b2 = vertex_b2, vertex_a2
713            #vertex_a_new.discard(vertex_a2) # in case there is overlap
714        if vertex_a1 == vertex_a2:
715            vertex_a_new.discard(vertex_b2) # in case there is overlap
716        vertex_a_part = set([vertex_a1])
717
718        touched = False # True if (the switched) vertex_a2 has been reached.
719        while len(vertex_a_new) > 0:
720            pivot = vertex_a_new.pop()
721            if pivot == vertex_b1:
722                raise GraphError("The graph can not be separated in two halfs. "
723                                 "vertex_b1 reached by vertex_a1.")
724            vertex_a_part.add(pivot)
725            # create a new set that we can modify
726            pivot_neighbors = set(self.neighbors[pivot])
727            pivot_neighbors -= vertex_a_part
728            if pivot == vertex_a2 or pivot == vertex_b2:
729                if pivot == vertex_b2:
730                    if touched:
731                        raise GraphError("The graph can not be separated in "
732                                         "two halfs. vertex_b2 reached by "
733                                         "vertex_a1.")
734                    else:
735                        # put them in the correct order
736                        vertex_a2, vertex_b2 = vertex_b2, vertex_a2
737                pivot_neighbors.discard(vertex_b2)
738                touched = True
739            vertex_a_new |= pivot_neighbors
740
741        if vertex_a2 not in vertex_a_part:
742            raise GraphError("The graph can not be separated in two halfs. "
743                             "vertex_a1 can not reach vertex_a2 trough "
744                             "vertex_a_part")
745
746        # find vertex_b_part: easy, is just the rest ...
747        #vertex_b_part = set(xrange(self.num_vertices)) - vertex_a_part
748
749        # ... but we also want that there is a path in vertex_b_part from
750        # vertex_b1 to vertex_b2
751        if vertex_b1 == vertex_b2:
752            closed = True
753        else:
754            vertex_b_new = set(self.neighbors[vertex_b1])
755            vertex_b_new.discard(vertex_a1)
756            vertex_b_part = set([vertex_b1])
757
758            closed = False
759            while len(vertex_b_new) > 0:
760                pivot = vertex_b_new.pop()
761                if pivot == vertex_b2:
762                    closed = True
763                    break
764                pivot_neighbors = set(self.neighbors[pivot])
765                pivot_neighbors -= vertex_b_part
766                vertex_b_new |= pivot_neighbors
767                vertex_b_part.add(pivot)
768
769        if not closed:
770            raise GraphError("The graph can not be separated in two halfs. "
771                             "vertex_b1 can not reach vertex_b2 trough "
772                             "vertex_b_part")
773
774        # finaly compute the real vertex_b_part, the former loop might break
775        # early for efficiency.
776        vertex_b_part = set(range(self.num_vertices)) - vertex_a_part
777
778        # done!
779        return vertex_a_part, vertex_b_part, \
780               (vertex_a1, vertex_b1, vertex_a2, vertex_b2)
781
782    def full_match(self, other):
783        """Find the mapping between vertex indexes in self and other.
784
785           This also works on disconnected graphs. Derived classes should just
786           implement get_vertex_string and get_edge_string to make this method
787           aware of the different nature of certain vertices. In case molecules,
788           this would make the algorithm sensitive to atom numbers etc.
789        """
790        # we need normalize subgraphs because these graphs are used as patterns.
791        graphs0 = [
792            self.get_subgraph(group, normalize=True)
793            for group in self.independent_vertices
794        ]
795        graphs1 = [
796            other.get_subgraph(group)
797            for group in other.independent_vertices
798        ]
799
800        if len(graphs0) != len(graphs1):
801            return
802
803        matches = []
804
805        for graph0 in graphs0:
806            pattern = EqualPattern(graph0)
807            found_match = False
808            for i, graph1 in enumerate(graphs1):
809                local_matches = list(GraphSearch(pattern)(graph1, one_match=True))
810                if len(local_matches) == 1:
811                    match = local_matches[0]
812                    # we need to restore the relation between the normalized
813                    # graph0 and its original indexes
814                    old_to_new = OneToOne((
815                        (j, i) for i, j
816                        in enumerate(graph0._old_vertex_indexes)
817                    ))
818                    matches.append(match * old_to_new)
819                    del graphs1[i]
820                    found_match = True
821                    break
822            if not found_match:
823                return
824
825        result = OneToOne()
826        for match in matches:
827            result.add_relations(match.forward.items())
828        return result
829
830
831# Pattern matching
832
833
834class OneToOne(object):
835    """Implements a discrete bijection between source and destination elements
836
837       The implementation is based on a consistent set of forward and reverse
838       relations, stored in dictionaries.
839    """
840
841    def __init__(self, relations=None):
842        """
843           Argument:
844            | ``relations``  --  initial relations for the bijection
845        """
846        self.forward = {}
847        self.reverse = {}
848        if relations is not None:
849            self.add_relations(relations)
850
851    def __len__(self):
852        return len(self.forward)
853
854    def __str__(self):
855        result = "|"
856        for source, destination in self.forward.items():
857            result += " %s -> %s |" % (source, destination)
858        return result
859
860    def __mul__(self, other):
861        """Return the result of the 'after' operator."""
862        result = OneToOne()
863        for source, mid in other.forward.items():
864            destination = self.forward[mid]
865            result.forward[source] = destination
866            result.reverse[destination] = source
867        return result
868
869    def add_relation(self, source, destination):
870        """Add new a relation to the bejection"""
871        if self.in_sources(source):
872            if self.forward[source] != destination:
873                raise ValueError("Source is already in use. Destination does "
874                                 "not match.")
875            else:
876                raise ValueError("Source-Destination relation already exists.")
877        elif self.in_destinations(destination):
878            raise ValueError("Destination is already in use. Source does not "
879                             "match.")
880        else:
881            self.forward[source] = destination
882            self.reverse[destination] = source
883
884    def add_relations(self, relations):
885        """Add multiple relations to a bijection"""
886        for source, destination in relations:
887            self.add_relation(source, destination)
888
889    def get_destination(self, source):
890        """Get the end point of a relation that start with 'source'"""
891        return self.forward[source]
892
893    def get_source(self, destination):
894        """Get the starting point of a relation that ends with 'destination'"""
895        return self.reverse[destination]
896
897    def in_destinations(self, destination):
898        """Test if the given destination is present"""
899        return destination in self.reverse
900
901    def in_sources(self, source):
902        """Test if the given source is present"""
903        return source in self.forward
904
905    def inverse(self):
906        """Returns the inverse bijection."""
907        result = self.__class__()
908        result.forward = copy.copy(self.reverse)
909        result.reverse = copy.copy(self.forward)
910        return result
911
912
913class Match(OneToOne):
914    """A match between a pattern and a graph"""
915    @classmethod
916    def from_first_relation(cls, vertex0, vertex1):
917        """Intialize a fresh match based on the first relation"""
918        result = cls([(vertex0, vertex1)])
919        result.previous_ends1 = set([vertex1])
920        return result
921
922    def get_new_edges(self, subject_graph):
923        """Get new edges from the subject graph for the graph search algorithm
924
925           The Graph search algorithm extends the matches iteratively by adding
926           matching vertices that are one edge further from the starting vertex
927           at each iteration.
928        """
929        result = []
930        #print "Match.get_new_edges self.previous_ends1", self.previous_ends1
931        for vertex in self.previous_ends1:
932            for neighbor in subject_graph.neighbors[vertex]:
933                if neighbor not in self.reverse:
934                    result.append((vertex, neighbor))
935        return result
936
937    def copy_with_new_relations(self, new_relations):
938        """Create a new match object extended with new relations"""
939        result = self.__class__(self.forward.items())
940        result.add_relations(new_relations.items())
941        result.previous_ends1 = set(new_relations.values())
942        return result
943
944
945class Pattern(object):
946    """Base class for a pattern in a graph.
947
948       Note the following conventions:
949
950       * A pattern can always be represented by a graph (or a set of graphs)
951         and some additional conditions. This graph is the so called 'PATTERN
952         GRAPH'. For technical reasons, this pattern graph is not always
953         constructed explicitly. Variables related to this graph often get
954         suffix '0'. Note that a pattern graph is always fully connected.
955       * The graph in which we search for the pattern, is called the 'SUBJECT
956         GRAPH'. Variables related to this graph often get suffix '1'.
957    """
958
959    # This means that matching vertices must not have equal number of neighbors:
960    sub = True
961    MatchClass = Match
962
963    def iter_initial_relations(self, subject_graph):
964        """Iterate over all initial relations to start a Graph Search
965
966           The function iterates of single relations between a pattern vertex
967           and a subject vertex. In practice it is sufficient to select one
968           vertex in the pattern and then associate it with each (reasonable)
969           vertex in the subject graph.
970        """
971        raise NotImplementedError
972
973    def get_new_edges(self, level):
974        """Get new edges from the pattern graph for the graph search algorithm
975
976           The level argument denotes the distance of the new edges from the
977           starting vertex in the pattern graph.
978        """
979        raise NotImplementedError
980
981    def check_symmetry(self, new_relations, current_match, next_match):
982        """Off all symmetric ``new_relations``, only allow one"""
983        return True
984
985    def compare(self, vertex0, vertex1, subject_graph):
986        """Test if ``vertex0`` and ``vertex1`` can be equal
987
988           False positives are allowed, but the less false positives, the more
989           efficient the :class:`GraphSearch` will be.
990        """
991        return True
992
993    def check_next_match(self, match, new_relations, subject_graph, one_match):
994        """Does this match object make sense for the current pattern
995
996           Return False if some symmetry or other considerations are not
997           satisfied. The checks in this function are typically only possible by
998           considering the whole instead of looking just at a few
999           vertices/edges/relations.
1000        """
1001        return True
1002
1003    def complete(self, match, subject_graph):
1004        """Returns ``True`` if no more additional relations are required"""
1005        return True
1006
1007    def iter_final_matches(self, match, subject_graph, one_match):
1008        """Just return the original match
1009
1010           Derived classes can specialize here to make efficient use of
1011           symmetries
1012        """
1013        yield match
1014
1015
1016class CriteriaSet(object):
1017    """A set of criteria that can be associated with a custum pattern."""
1018
1019    def __init__(self, vertex_criteria=None, edge_criteria=None, **kwargs):
1020        """
1021           Arguments:
1022            | ``vertex_criteria``  --  a dictionary with criteria for the
1023                                       vertices, ``key=vertex_index``,
1024                                       ``value=criterion``
1025            | ``edge_criteria``  --  a dictionary with criteria for the edges
1026                                     ``key=edge_index``, ``value=criterion``
1027
1028           Any other keyword argument will be assigned as attribute to matches
1029           that fulfill the criteria of this set.
1030        """
1031        if vertex_criteria is None:
1032            self.vertex_criteria = {}
1033        else:
1034            self.vertex_criteria = vertex_criteria
1035        if edge_criteria is None:
1036            self.edge_criteria = {}
1037        else:
1038            self.edge_criteria = edge_criteria
1039        self.info = kwargs
1040
1041    def test_match(self, match, pattern_graph, subject_graph):
1042        """Test if a match satisfies the criteria"""
1043        for vertex0, c in self.vertex_criteria.items():
1044            vertex1 = match.forward[vertex0]
1045            if not c(vertex1, subject_graph):
1046                return False
1047        for edge0_index, c in self.edge_criteria.items():
1048            vertex0a, vertex0b = pattern_graph.edges[edge0_index]
1049            edge1_index = subject_graph.edge_index[frozenset([
1050                match.forward[vertex0a],
1051                match.forward[vertex0b],
1052            ])]
1053            if not c(edge1_index, subject_graph):
1054                return False
1055        return True
1056
1057# few basic example criteria
1058
1059class Anything(object):
1060    """A criterion that always returns True"""
1061    def __call__(self, index, subject_graph):
1062        """Always returns True"""
1063        return True
1064
1065
1066class CritOr(object):
1067    """OR Operator for criteria objects"""
1068
1069    def __init__(self, *criteria):
1070        """
1071           Argument:
1072            | ``criteria``  --  a list of criteria to apply the OR operation to.
1073        """
1074        self.criteria = criteria
1075
1076    def __call__(self, index, graph):
1077        """Evaluates all the criteria and applies an OR opartion
1078
1079           Arguments:
1080            | ``index``  --  the index of the vertex/edge on which the criterion
1081                             is applied
1082            | ``graph``  --  the graph on which the criterion is tested
1083        """
1084        for c in self.criteria:
1085            if c(index, graph):
1086                return True
1087        return False
1088
1089
1090class CritAnd(object):
1091    """AND Operator for criteria objects"""
1092
1093    def __init__(self, *criteria):
1094        """
1095           Argument:
1096            | ``criteria``  --  a list of criteria to apply the AND operation to
1097        """
1098        self.criteria = criteria
1099
1100    def __call__(self, index, graph):
1101        """Evaluates all the criteria and applies an AND opartion
1102
1103           Arguments:
1104            | ``index``  --  the index of the vertex/edge on which the criterion
1105                             is applied
1106            | ``graph``  --  the graph on which the criterion is tested
1107        """
1108        for c in self.criteria:
1109            if not c(index, graph):
1110                return False
1111        return True
1112
1113
1114class CritXor(object):
1115    """XOR Operator for criteria objects"""
1116
1117    def __init__(self, *criteria):
1118        """
1119           Argument:
1120            | ``criteria``  --  a list of criteria to apply the XOR operation to.
1121        """
1122        self.criteria = criteria
1123
1124    def __call__(self, index, graph):
1125        """Evaluates all the criteria and applies a generalized XOR opartion
1126
1127           Arguments:
1128            | ``index``  --  the index of the vertex/edge on which the criterion
1129                             is applied
1130            | ``graph``  --  the graph on which the criterion is tested
1131
1132           when the XOR operation is applied to more than two criteria, True
1133           is only returned when an odd number of criteria return True.
1134        """
1135        count = 0
1136        for c in self.criteria:
1137            if c(index, graph):
1138                count += 1
1139        return (count % 2) == 1
1140
1141
1142class CritNot(object):
1143    """Inverion of another criterion"""
1144
1145    def __init__(self, criterion):
1146        """
1147           Argument:
1148            | ``criterion``  --  another criterion object
1149        """
1150        self.criterion = criterion
1151
1152    def __call__(self, index, graph):
1153        """Evaluates all the criterion and applies an inversion opartion
1154
1155           Arguments:
1156            | ``index``  --  the index of the vertex/edge on which the criterion is
1157                             applied
1158            | ``graph``  --  the graph on which the criterion is tested
1159        """
1160        return not self.criterion(index, graph)
1161
1162
1163# pattern and match stuff
1164
1165
1166class CustomPattern(Pattern):
1167    """A pattern based on a given pattern graph
1168
1169       The pattern graph can be complemented with additional criteria for the
1170       vertices and edges. Additionally the effective symmetry of the pattern
1171       graph can be reduced by tagging the vertices in the pattern graph with
1172       different labels.
1173    """
1174    def __init__(self, pattern_graph, criteria_sets=None, vertex_tags=None, start_vertex=None):
1175        """
1176        Arguments:
1177          | ``pattern_graph`` -- the pattern that has to be found in the subject
1178                                 graph.
1179          | ``criteria_sets`` -- Criteria sets associate additional conditions
1180                                 with vertices and edges, and can also introduce
1181                                 global match conditions.
1182          | ``vertex_tags`` -- vertex tags can reduce the symmetry of the
1183              pattern_graph pattern. An example case where this is useful:
1184              Consider atoms 0, 1, 2 that are bonded in this order. We want to
1185              compute the distance from atom 2 to the line (0, 1). In this case
1186              the matches (0->a, 1->b, 2->c) and (0->c, 1->b, 2->a) correspond
1187              to different internal coordinates. We want the graph search to
1188              return the two solutions. In order to do this, set
1189              ``vertex_tags={0:0, 1:0, 2:1}``. This means that vertex 0 and 1
1190              are equivalent, but that vertex 2 has a different nature. In the
1191              case of a bending angle, only one match like (0->a, 1->b, 2->c) is
1192              sufficient and we do not want to reduce the symmetry of the
1193              ``pattern_graph``. In this case, one should not use vertex_tags at
1194              all.
1195          | ``start_vertex``  --  The first vertex in the pattern graph that is
1196              linked with a vertex in the subject graph. A wise choice can
1197              improve the performance of a graph search. If not given, the
1198              central vertex is take as start_vertex
1199        """
1200        self.criteria_sets = criteria_sets
1201        if vertex_tags is None:
1202            vertex_tags = {}
1203        self.vertex_tags = vertex_tags
1204        # get the essential information from the pattern_graph:
1205        if start_vertex is None:
1206            self.start_vertex = pattern_graph.central_vertex
1207        else:
1208            self.start_vertex = start_vertex
1209        self._set_pattern_graph(pattern_graph)
1210        Pattern.__init__(self)
1211
1212    def _set_pattern_graph(self, pattern_graph):
1213        """Initialize the pattern_graph"""
1214        self.pattern_graph = pattern_graph
1215        self.level_edges = {}
1216        self.level_constraints = {}
1217        self.duplicate_checks = set([])
1218        if pattern_graph is None:
1219            return
1220        if len(pattern_graph.independent_vertices) != 1:
1221            raise ValueError("A pattern_graph must not be a disconnected "
1222                             "graph.")
1223        # A) the levels for the incremental pattern matching
1224        ibfe = self.pattern_graph.iter_breadth_first_edges(self.start_vertex)
1225        for edge, distance, constraint in ibfe:
1226            if constraint:
1227                l = self.level_constraints.setdefault(distance-1, [])
1228            else:
1229                l = self.level_edges.setdefault(distance, [])
1230            l.append(edge)
1231        #print "level_edges", self.level_edges
1232        #print "level_constraints", self.level_constraints
1233        # B) The comparisons the should be checked when one wants to avoid
1234        # symmetrically duplicate pattern matches
1235        if self.criteria_sets is not None:
1236            for cycles in pattern_graph.symmetry_cycles:
1237                if len(cycles) > 0:
1238                    self.duplicate_checks.add((cycles[0][0], cycles[0][1]))
1239
1240
1241    def iter_initial_relations(self, subject_graph):
1242        """Iterate over all valid initial relations for a match"""
1243        vertex0 = self.start_vertex
1244        for vertex1 in range(subject_graph.num_vertices):
1245            if self.compare(vertex0, vertex1, subject_graph):
1246                yield vertex0, vertex1
1247
1248    def get_new_edges(self, level):
1249        """Get new edges from the pattern graph for the graph search algorithm
1250
1251           The level argument denotes the distance of the new edges from the
1252           starting vertex in the pattern graph.
1253        """
1254        return (
1255            self.level_edges.get(level, []),
1256            self.level_constraints.get(level, [])
1257        )
1258
1259    def check_next_match(self, match, new_relations, subject_graph, one_match):
1260        """Check if the (onset for a) match can be a valid"""
1261        # only returns true for ecaxtly one set of new_relations from all the
1262        # ones that are symmetrically equivalent
1263        if not (self.criteria_sets is None or one_match):
1264            for check in self.duplicate_checks:
1265                vertex_a = new_relations.get(check[0])
1266                vertex_b = new_relations.get(check[1])
1267                if vertex_a is None and vertex_b is None:
1268                    continue # if this pair is completely absent in the new
1269                    # relations, it is either completely in the match or it
1270                    # is to be matched. So it is either already checked for
1271                    # symmetry duplicates, or it will be check in future.
1272                if vertex_a is None:
1273                    # maybe vertex_a is in the match and vertex_b is the only
1274                    # one in the new relations. try to get vertex_a from the
1275                    # match.
1276                    vertex_a = match.forward.get(check[0])
1277                    if vertex_a is None:
1278                        # ok, vertex_a is to be found, don't care about it right
1279                        # now. it will be checked in future calls.
1280                        continue
1281                elif vertex_b is None:
1282                    # maybe vertex_b is in the match and vertex_a is the only
1283                    # one in the new relations. try to get vertex_b from the
1284                    # match.
1285                    vertex_b = match.forward.get(check[1])
1286                    if vertex_b is None:
1287                        # ok, vertex_b is to be found, don't care about it right
1288                        # now. it will be checked in future calls.
1289                        continue
1290                if vertex_a > vertex_b:
1291                    # Why does this work? The answer is not so easy to explain,
1292                    # and certainly not easy to find. if vertex_a > vertex_b, it
1293                    # means that there is a symmetry operation that leads to
1294                    # an equivalent match where vertex_b < vertex_a. The latter
1295                    # match is preferred for as much pairs (vertex_a, vertex_b)
1296                    # as possible without rejecting all possible matches. The
1297                    # real difficulty is to construct a proper list of
1298                    # (vertex_a, vertex_b) pairs that will reject all but one
1299                    # matches. I conjecture that this list contains all the
1300                    # first two vertices from each normalized symmetry cycle of
1301                    # the pattern graph. We need a math guy to do the proof. -- Toon
1302                    return False
1303            return True
1304        return True
1305
1306    def complete(self, match, subject_graph):
1307        """Return True of the match is complete"""
1308        return len(match) == self.pattern_graph.num_vertices
1309
1310    def iter_final_matches(self, canonical_match, subject_graph, one_match):
1311        """Given a match, iterate over all related equivalent matches
1312
1313           When criteria sets are defined, the iterator runs over all symmetric
1314           equivalent matches that fulfill one of the criteria sets. When not
1315           criteria sets are defined, the iterator only yields the input match.
1316        """
1317        if self.criteria_sets is None or one_match:
1318            yield canonical_match
1319        else:
1320            for criteria_set in self.criteria_sets:
1321                satisfied_match_tags = set([])
1322                for symmetry in self.pattern_graph.symmetries:
1323                    final_match = canonical_match * symmetry
1324                    #print final_match
1325                    if criteria_set.test_match(final_match, self.pattern_graph, subject_graph):
1326                        match_tags = tuple(
1327                            self.vertex_tags.get(symmetry.reverse[vertex0])
1328                            for vertex0
1329                            in range(self.pattern_graph.num_vertices)
1330                        )
1331                        if match_tags not in satisfied_match_tags:
1332                            final_match.__dict__.update(criteria_set.info)
1333                            yield final_match
1334                            satisfied_match_tags.add(match_tags)
1335
1336
1337class EqualMatch(Match):
1338    """A Match object with specialized functions for the EqualPattern"""
1339    def get_closed_cycles(self):
1340        """Return the closed cycles corresponding to this permutation
1341
1342           The cycle will be normalized to facilitate the elimination of
1343           duplicates. The following is guaranteed:
1344
1345           1) If this permutation is represented by disconnected cycles, the
1346              cycles will be sorted by the lowest index they contain.
1347           2) Each cycle starts with its lowest index. (unique starting point)
1348           3) Singletons are discarded. (because they are boring)
1349        """
1350        # A) construct all the cycles
1351        closed_cycles = []
1352        todo = set(self.forward.keys())
1353        if todo != set(self.forward.values()):
1354            raise GraphError("The subject and pattern graph must have the same "
1355                             "numbering.")
1356        current_vertex = None
1357        while len(todo) > 0:
1358            if current_vertex == None:
1359                current_vertex = todo.pop()
1360                current_cycle = []
1361            else:
1362                todo.discard(current_vertex)
1363            current_cycle.append(current_vertex)
1364            next_vertex = self.get_destination(current_vertex)
1365            if next_vertex == current_cycle[0]:
1366                if len(current_cycle) > 1:
1367                    # bring the lowest element in front
1368                    pivot = np.argmin(current_cycle)
1369                    current_cycle = current_cycle[pivot:] + \
1370                                    current_cycle[:pivot]
1371                    closed_cycles.append(current_cycle)
1372                current_vertex = None
1373            else:
1374                current_vertex = next_vertex
1375        # B) normalize the cycle representation
1376        closed_cycles.sort() # a normal sort is sufficient because only the
1377                             # first item of each cycle is considered
1378
1379        # transform the structure into a tuple of tuples
1380        closed_cycles = tuple(tuple(cycle) for cycle in closed_cycles)
1381        return closed_cycles
1382
1383
1384class EqualPattern(CustomPattern):
1385    """Like CustomPattern, but the pattern has the same size as the subject graph"""
1386    sub = False
1387    MatchClass = EqualMatch
1388
1389    def __init__(self, pattern_graph):
1390        """See :meth:`CustomPattern.__init__`"""
1391        # Don't allow criteria sets and vertex_tags. This limitation is due to
1392        # the compare method below. TODO: Is this a good idea?
1393        CustomPattern.__init__(self, pattern_graph)
1394
1395    def iter_initial_relations(self, subject_graph):
1396        """Iterate over all valid initial relations for a match"""
1397        if self.pattern_graph.num_edges != subject_graph.num_edges:
1398            return # don't even try
1399        for pair in CustomPattern.iter_initial_relations(self, subject_graph):
1400            yield pair
1401
1402    def compare(self, vertex0, vertex1, subject_graph):
1403        """Returns true when the two vertices are of the same kind"""
1404        return (
1405            self.pattern_graph.vertex_fingerprints[vertex0] ==
1406            subject_graph.vertex_fingerprints[vertex1]
1407        ).all()
1408
1409
1410class RingPattern(Pattern):
1411    """A pattern that matches strong rings up to a given size"""
1412
1413    def __init__(self, max_size):
1414        """
1415           Argument:
1416            | ``max_size``  --  the maximum number of vertices in a ring
1417        """
1418        if max_size < 3:
1419            raise ValueError("Ring sizes must be at least 3.")
1420        self.max_size = max_size
1421        Pattern.__init__(self)
1422
1423    def iter_initial_relations(self, subject_graph):
1424        """Iterate over all valid initial relations for a match"""
1425        vertex0 = 0
1426        for vertex1 in range(subject_graph.num_vertices):
1427            yield vertex0, vertex1
1428
1429    def get_new_edges(self, level):
1430        """Get new edges from the pattern graph for the graph search algorithm
1431
1432           The level argument denotes the distance of the new edges from the
1433           starting vertex in the pattern graph.
1434        """
1435        if level == 0:
1436            edges0 = [(0, 1), (0, 2)]
1437        elif level >= (self.max_size-1)//2:
1438            edges0 = []
1439        else:
1440            l2 = level*2
1441            edges0 = [(l2-1, l2+1), (l2, l2+2)]
1442        return edges0, []
1443
1444    def check_next_match(self, match, new_relations, subject_graph, one_match):
1445        """Check if the (onset for a) match can be a valid (part of a) ring"""
1446        # avoid duplicate rings (order of traversal)
1447        if len(match) == 3:
1448            if match.forward[1] < match.forward[2]:
1449                #print "RingPattern.check_next_match: duplicate order", match.forward[1], match.forward[2]
1450                return False
1451        # avoid duplicate rings (starting point)
1452        for vertex1 in new_relations.values():
1453            if vertex1 < match.forward[0]:
1454                #print "RingPattern.check_next_match: duplicate start", vertex1, match.forward[0]
1455                return False
1456        # can this ever become a strong ring?
1457        for vertex1 in new_relations.values():
1458            paths = list(subject_graph.iter_shortest_paths(vertex1, match.forward[0]))
1459            if len(paths) != 1:
1460                #print "RingPattern.check_next_match: not strong 1"
1461                return False
1462            if len(paths[0]) != (len(match)+1)//2:
1463                #print "RingPattern.check_next_match: not strong 2"
1464                return False
1465        return True
1466
1467    def complete(self, match, subject_graph):
1468        """Check the completeness of a ring match"""
1469        size = len(match)
1470        # check whether we have an odd strong ring
1471        if match.forward[size-1] in subject_graph.neighbors[match.forward[size-2]]:
1472            # we have an odd closed cycle. check if this is a strong ring
1473            order = list(range(0, size, 2)) + list(range(1, size-1, 2))[::-1]
1474            ok = True
1475            for i in range(len(order)//2):
1476                # Count the number of paths between two opposite points in the
1477                # ring. Since the ring has an odd number of vertices, each
1478                # vertex has two semi-opposite vertices.
1479                count = len(list(subject_graph.iter_shortest_paths(
1480                    match.forward[order[i]],
1481                    match.forward[order[(i+size//2)%size]]
1482                )))
1483                if count > 1:
1484                    ok = False
1485                    break
1486                count = len(list(subject_graph.iter_shortest_paths(
1487                    match.forward[order[i]],
1488                    match.forward[order[(i+size//2+1)%size]]
1489                )))
1490                if count > 1:
1491                    ok = False
1492                    break
1493            if ok:
1494                match.ring_vertices = tuple(match.forward[i] for i in order)
1495                #print "RingPattern.complete: found odd ring"
1496                return True
1497            #print "RingPattern.complete: no odd ring"
1498        # check whether we have an even strong ring
1499        paths = list(subject_graph.iter_shortest_paths(
1500            match.forward[size-1],
1501            match.forward[size-2]
1502        ))
1503        #print "RingPattern.complete: even paths", paths
1504        if (size > 3 and len(paths) == 1 and len(paths[0]) == 3) or \
1505           (size == 3 and len(paths) == 2 and len(paths[0]) == 3):
1506            path = paths[0]
1507            if size == 3 and path[1] == match.forward[0]:
1508                path = paths[1]
1509            # we have an even closed cycle. check if this is a strong ring
1510            match.add_relation(size, path[1])
1511            size += 1
1512            order = list(range(0, size, 2)) + list(range(size-1, 0, -2))
1513            ok = True
1514            for i in range(len(order)//2):
1515                count = len(list(subject_graph.iter_shortest_paths(
1516                    match.forward[order[i]],
1517                    match.forward[order[(i+size//2)%size]]
1518                )))
1519                if count != 2:
1520                    ok = False
1521                    break
1522            if ok:
1523                # also check if this does not violate the requirement for a
1524                # unique origin:
1525                if match.forward[size-1] < match.forward[0]:
1526                    ok = False
1527            if not ok:
1528                vertex1 = match.forward[size-1]
1529                del match.forward[size-1]
1530                del match.reverse[vertex1]
1531                size -= 1
1532                #print "RingPattern.complete: no even ring"
1533            else:
1534                match.ring_vertices = tuple(match.forward[i] for i in order)
1535                #print "RingPattern.complete: found even ring"
1536            return ok
1537        #print "RingPattern.complete: not at all"
1538        return False
1539
1540
1541class GraphSearch(object):
1542    """An algorithm that searches for all matches of a pattern in a graph
1543
1544       Usage:
1545
1546         >>> gs = GraphSearch(pattern)
1547         >>> for match in gs(graph):
1548         ...     print match.forward
1549    """
1550
1551    def __init__(self, pattern, debug=False):
1552        """
1553           Arguments:
1554            | ``pattern``  --  A Pattern instance, describing the pattern to
1555                               look for
1556            | ``debug``  --  When true, debugging info is printed on screen
1557                             [default=False]
1558        """
1559        self.pattern = pattern
1560        self.debug = debug
1561
1562    def __call__(self, subject_graph, one_match=False):
1563        """Iterator over all matches of self.pattern in the given graph.
1564
1565           Arguments:
1566            | subject_graph  --  The subject_graph in which the matches
1567                                 according to self.pattern have to be found.
1568            | one_match --  If True, only one match will be returned. This
1569                            allows certain optimizations.
1570        """
1571        # Matches are grown iteratively.
1572        for vertex0, vertex1 in self.pattern.iter_initial_relations(subject_graph):
1573            init_match = self.pattern.MatchClass.from_first_relation(vertex0, vertex1)
1574            # init_match cotains only one source -> dest relation. starting from
1575            # this initial match, the function iter_matches extends the match
1576            # in all possible ways and yields the completed matches
1577            for canonical_match in self._iter_matches(init_match, subject_graph, one_match):
1578                # Some patterns my exclude symmetrically equivalent matches as
1579                # to aviod dupplicates. with such a 'canonical' solution,
1580                # the pattern is allowed to generate just those symmatrical
1581                # duplicates of interest.
1582                ifm = self.pattern.iter_final_matches(canonical_match, subject_graph, one_match)
1583                for final_match in ifm:
1584                    self.print_debug("final_match: %s" % final_match)
1585                    yield final_match
1586                    if one_match: return
1587
1588    def print_debug(self, text, indent=0):
1589        """Only prints debug info on screen when self.debug == True."""
1590        if self.debug:
1591            if indent > 0:
1592                print(" "*self.debug, text)
1593            self.debug += indent
1594            if indent <= 0:
1595                print(" "*self.debug, text)
1596
1597    def _iter_candidate_groups(self, init_match, edges0, edges1):
1598        """Divide the edges into groups"""
1599        # collect all end vertices0 and end vertices1 that belong to the same
1600        # group.
1601        sources = {}
1602        for start_vertex0, end_vertex0 in edges0:
1603            l = sources.setdefault(start_vertex0, [])
1604            l.append(end_vertex0)
1605        dests = {}
1606        for start_vertex1, end_vertex1 in edges1:
1607            start_vertex0 = init_match.reverse[start_vertex1]
1608            l = dests.setdefault(start_vertex0, [])
1609            l.append(end_vertex1)
1610        for start_vertex0, end_vertices0 in sources.items():
1611            end_vertices1 = dests.get(start_vertex0, [])
1612            yield end_vertices0, end_vertices1
1613
1614
1615    def _iter_new_relations(self, init_match, subject_graph, edges0, constraints0, edges1):
1616        """Given an onset for a match, iterate over all possible new key-value pairs"""
1617        # Count the number of unique edges0[i][1] values. This is also
1618        # the number of new relations.
1619        num_new_relations = len(set(j for i, j in edges0))
1620
1621        def combine_small(relations, num):
1622            """iterate over all compatible combinations within one set of relations"""
1623            if len(relations) == 0:
1624                return
1625            for i, pivot in enumerate(relations):
1626                if num == 1:
1627                    yield (pivot, )
1628                else:
1629                    compatible_relations = list(
1630                        item for item in relations[:i]
1631                        if pivot[0]!=item[0] and pivot[1]!=item[1]
1632                    )
1633                    for tail in combine_small(compatible_relations, num-1):
1634                        yield (pivot, ) + tail
1635
1636        # generate candidate relations
1637        candidate_relations = []
1638        icg = self._iter_candidate_groups(init_match, edges0, edges1)
1639        for end_vertices0, end_vertices1 in icg:
1640            if len(end_vertices0) > len(end_vertices1):
1641                return # this can never work, the subject graph is 'too small'
1642            elif not self.pattern.sub and \
1643                 len(end_vertices0) != len(end_vertices1):
1644                return # an exact match is sought, this can never work
1645            l = []
1646            for end_vertex0 in end_vertices0:
1647                for end_vertex1 in end_vertices1:
1648                    if self.pattern.compare(end_vertex0, end_vertex1, subject_graph):
1649                        l.append((end_vertex0, end_vertex1))
1650            # len(end_vertices0) = the total number of relations that must be
1651            # made in this group
1652            if len(l) > 0:
1653                # turn l into a list of sets of internally compatible candidate
1654                # relations in this group
1655                l = list(combine_small(l, len(end_vertices0)))
1656                candidate_relations.append(l)
1657        if len(candidate_relations) == 0:
1658            return
1659        self.print_debug("candidate_relations: %s" % candidate_relations)
1660
1661        def combine_big(pos=0):
1662            """Iterate over all possible sets of relations"""
1663            # pos is an index in candidate_relations
1664            crs = candidate_relations[pos]
1665            if pos == len(candidate_relations)-1:
1666                for relations in crs:
1667                    yield relations
1668            else:
1669                for tail in combine_big(pos+1):
1670                    for relations in crs:
1671                        yield relations + tail
1672
1673        # final loop
1674        for new_relations in combine_big():
1675            new_relations = set(new_relations)
1676            self.print_debug("new_relations: %s" % (new_relations, ))
1677            # check the total number of new relations
1678            if len(new_relations) != num_new_relations:
1679                continue
1680            # check sanity of relations
1681            forward = dict(new_relations)
1682            if len(forward) != num_new_relations:
1683                continue
1684            reverse = dict((j, i) for i, j in new_relations)
1685            if len(reverse) != num_new_relations:
1686                continue
1687            # check the constraints
1688            for a0, b0 in constraints0:
1689                if forward[a0] not in subject_graph.neighbors[forward[b0]]:
1690                    forward = None
1691                    break
1692            if forward is None:
1693                continue
1694            yield forward
1695
1696    def _iter_matches(self, input_match, subject_graph, one_match, level=0):
1697        """Given an onset for a match, iterate over all completions of that match
1698
1699           This iterator works recursively. At each level the match is extended
1700           with a new set of relations based on vertices in the pattern graph
1701           that are at a distances 'level' from the starting vertex
1702        """
1703        self.print_debug("ENTERING _ITER_MATCHES", 1)
1704        self.print_debug("input_match: %s" % input_match)
1705        # A) collect the new edges in the pattern graph and the subject graph
1706        # to extend the match.
1707        #
1708        # Note that the edges are ordered. edge[0] is always in the match.
1709        # edge[1] is never in the match. The constraints contain information
1710        # about the end points of edges0. It is a list of two-tuples where
1711        # (a, b) means that a and b must be connected.
1712        #
1713        # Second note: suffix 0 indicates the pattern graph and suffix 1
1714        # is used for the subject graph.
1715        edges0, constraints0 = self.pattern.get_new_edges(level)
1716        edges1 = input_match.get_new_edges(subject_graph)
1717        self.print_debug("edges0: %s" % edges0)
1718        self.print_debug("constraints0: %s" % constraints0)
1719        self.print_debug("edges1: %s" % edges1)
1720
1721        # B) iterate over the sets of new relations: [(vertex0[i], vertex1[j]),
1722        # ...] that contain all endpoints of edges0, that satisfy the
1723        # constraints0 and where (vertex0[i], vertex1[j]) only occurs if these
1724        # are end points of a edge0 and edge1 whose starting points are already
1725        # in init_match. These conditions are implemented in an iterator as to
1726        # separate concerns. This iterator also calls the routines that check
1727        # whether vertex1[j] also satisfies additional conditions inherent
1728        # vertex0[i].
1729        inr = self._iter_new_relations(input_match, subject_graph, edges0,
1730                                       constraints0, edges1)
1731        for new_relations in inr:
1732            # for each set of new_relations, construct a next_match and recurse
1733            next_match = input_match.copy_with_new_relations(new_relations)
1734            if not self.pattern.check_next_match(next_match, new_relations, subject_graph, one_match):
1735                continue
1736            if self.pattern.complete(next_match, subject_graph):
1737                yield next_match
1738            else:
1739                for match in self._iter_matches(next_match, subject_graph, one_match, level+1):
1740                    yield match
1741        self.print_debug("LEAVING_ITER_MATCHES", -1)
1742