1"""Functions for finding and manipulating cliques.
2
3Finding the largest clique in a graph is NP-complete problem, so most of
4these algorithms have an exponential running time; for more information,
5see the Wikipedia article on the clique problem [1]_.
6
7.. [1] clique problem:: https://en.wikipedia.org/wiki/Clique_problem
8
9"""
10from collections import deque
11from itertools import chain
12from itertools import combinations
13from itertools import islice
14import networkx as nx
15from networkx.utils import not_implemented_for
16
17
18__all__ = [
19    "find_cliques",
20    "find_cliques_recursive",
21    "make_max_clique_graph",
22    "make_clique_bipartite",
23    "graph_clique_number",
24    "graph_number_of_cliques",
25    "node_clique_number",
26    "number_of_cliques",
27    "cliques_containing_node",
28    "enumerate_all_cliques",
29    "max_weight_clique",
30]
31
32
33@not_implemented_for("directed")
34def enumerate_all_cliques(G):
35    """Returns all cliques in an undirected graph.
36
37    This function returns an iterator over cliques, each of which is a
38    list of nodes. The iteration is ordered by cardinality of the
39    cliques: first all cliques of size one, then all cliques of size
40    two, etc.
41
42    Parameters
43    ----------
44    G : NetworkX graph
45        An undirected graph.
46
47    Returns
48    -------
49    iterator
50        An iterator over cliques, each of which is a list of nodes in
51        `G`. The cliques are ordered according to size.
52
53    Notes
54    -----
55    To obtain a list of all cliques, use
56    `list(enumerate_all_cliques(G))`. However, be aware that in the
57    worst-case, the length of this list can be exponential in the number
58    of nodes in the graph (for example, when the graph is the complete
59    graph). This function avoids storing all cliques in memory by only
60    keeping current candidate node lists in memory during its search.
61
62    The implementation is adapted from the algorithm by Zhang, et
63    al. (2005) [1]_ to output all cliques discovered.
64
65    This algorithm ignores self-loops and parallel edges, since cliques
66    are not conventionally defined with such edges.
67
68    References
69    ----------
70    .. [1] Yun Zhang, Abu-Khzam, F.N., Baldwin, N.E., Chesler, E.J.,
71           Langston, M.A., Samatova, N.F.,
72           "Genome-Scale Computational Approaches to Memory-Intensive
73           Applications in Systems Biology".
74           *Supercomputing*, 2005. Proceedings of the ACM/IEEE SC 2005
75           Conference, pp. 12, 12--18 Nov. 2005.
76           <https://doi.org/10.1109/SC.2005.29>.
77
78    """
79    index = {}
80    nbrs = {}
81    for u in G:
82        index[u] = len(index)
83        # Neighbors of u that appear after u in the iteration order of G.
84        nbrs[u] = {v for v in G[u] if v not in index}
85
86    queue = deque(([u], sorted(nbrs[u], key=index.__getitem__)) for u in G)
87    # Loop invariants:
88    # 1. len(base) is nondecreasing.
89    # 2. (base + cnbrs) is sorted with respect to the iteration order of G.
90    # 3. cnbrs is a set of common neighbors of nodes in base.
91    while queue:
92        base, cnbrs = map(list, queue.popleft())
93        yield base
94        for i, u in enumerate(cnbrs):
95            # Use generators to reduce memory consumption.
96            queue.append(
97                (
98                    chain(base, [u]),
99                    filter(nbrs[u].__contains__, islice(cnbrs, i + 1, None)),
100                )
101            )
102
103
104@not_implemented_for("directed")
105def find_cliques(G):
106    """Returns all maximal cliques in an undirected graph.
107
108    For each node *v*, a *maximal clique for v* is a largest complete
109    subgraph containing *v*. The largest maximal clique is sometimes
110    called the *maximum clique*.
111
112    This function returns an iterator over cliques, each of which is a
113    list of nodes. It is an iterative implementation, so should not
114    suffer from recursion depth issues.
115
116    Parameters
117    ----------
118    G : NetworkX graph
119        An undirected graph.
120
121    Returns
122    -------
123    iterator
124        An iterator over maximal cliques, each of which is a list of
125        nodes in `G`. The order of cliques is arbitrary.
126
127    See Also
128    --------
129    find_cliques_recursive
130        A recursive version of the same algorithm.
131
132    Notes
133    -----
134    To obtain a list of all maximal cliques, use
135    `list(find_cliques(G))`. However, be aware that in the worst-case,
136    the length of this list can be exponential in the number of nodes in
137    the graph. This function avoids storing all cliques in memory by
138    only keeping current candidate node lists in memory during its search.
139
140    This implementation is based on the algorithm published by Bron and
141    Kerbosch (1973) [1]_, as adapted by Tomita, Tanaka and Takahashi
142    (2006) [2]_ and discussed in Cazals and Karande (2008) [3]_. It
143    essentially unrolls the recursion used in the references to avoid
144    issues of recursion stack depth (for a recursive implementation, see
145    :func:`find_cliques_recursive`).
146
147    This algorithm ignores self-loops and parallel edges, since cliques
148    are not conventionally defined with such edges.
149
150    References
151    ----------
152    .. [1] Bron, C. and Kerbosch, J.
153       "Algorithm 457: finding all cliques of an undirected graph".
154       *Communications of the ACM* 16, 9 (Sep. 1973), 575--577.
155       <http://portal.acm.org/citation.cfm?doid=362342.362367>
156
157    .. [2] Etsuji Tomita, Akira Tanaka, Haruhisa Takahashi,
158       "The worst-case time complexity for generating all maximal
159       cliques and computational experiments",
160       *Theoretical Computer Science*, Volume 363, Issue 1,
161       Computing and Combinatorics,
162       10th Annual International Conference on
163       Computing and Combinatorics (COCOON 2004), 25 October 2006, Pages 28--42
164       <https://doi.org/10.1016/j.tcs.2006.06.015>
165
166    .. [3] F. Cazals, C. Karande,
167       "A note on the problem of reporting maximal cliques",
168       *Theoretical Computer Science*,
169       Volume 407, Issues 1--3, 6 November 2008, Pages 564--568,
170       <https://doi.org/10.1016/j.tcs.2008.05.010>
171
172    """
173    if len(G) == 0:
174        return
175
176    adj = {u: {v for v in G[u] if v != u} for u in G}
177    Q = [None]
178
179    subg = set(G)
180    cand = set(G)
181    u = max(subg, key=lambda u: len(cand & adj[u]))
182    ext_u = cand - adj[u]
183    stack = []
184
185    try:
186        while True:
187            if ext_u:
188                q = ext_u.pop()
189                cand.remove(q)
190                Q[-1] = q
191                adj_q = adj[q]
192                subg_q = subg & adj_q
193                if not subg_q:
194                    yield Q[:]
195                else:
196                    cand_q = cand & adj_q
197                    if cand_q:
198                        stack.append((subg, cand, ext_u))
199                        Q.append(None)
200                        subg = subg_q
201                        cand = cand_q
202                        u = max(subg, key=lambda u: len(cand & adj[u]))
203                        ext_u = cand - adj[u]
204            else:
205                Q.pop()
206                subg, cand, ext_u = stack.pop()
207    except IndexError:
208        pass
209
210
211# TODO Should this also be not implemented for directed graphs?
212def find_cliques_recursive(G):
213    """Returns all maximal cliques in a graph.
214
215    For each node *v*, a *maximal clique for v* is a largest complete
216    subgraph containing *v*. The largest maximal clique is sometimes
217    called the *maximum clique*.
218
219    This function returns an iterator over cliques, each of which is a
220    list of nodes. It is a recursive implementation, so may suffer from
221    recursion depth issues.
222
223    Parameters
224    ----------
225    G : NetworkX graph
226
227    Returns
228    -------
229    iterator
230        An iterator over maximal cliques, each of which is a list of
231        nodes in `G`. The order of cliques is arbitrary.
232
233    See Also
234    --------
235    find_cliques
236        An iterative version of the same algorithm.
237
238    Notes
239    -----
240    To obtain a list of all maximal cliques, use
241    `list(find_cliques_recursive(G))`. However, be aware that in the
242    worst-case, the length of this list can be exponential in the number
243    of nodes in the graph. This function avoids storing all cliques in memory
244    by only keeping current candidate node lists in memory during its search.
245
246    This implementation is based on the algorithm published by Bron and
247    Kerbosch (1973) [1]_, as adapted by Tomita, Tanaka and Takahashi
248    (2006) [2]_ and discussed in Cazals and Karande (2008) [3]_. For a
249    non-recursive implementation, see :func:`find_cliques`.
250
251    This algorithm ignores self-loops and parallel edges, since cliques
252    are not conventionally defined with such edges.
253
254    References
255    ----------
256    .. [1] Bron, C. and Kerbosch, J.
257       "Algorithm 457: finding all cliques of an undirected graph".
258       *Communications of the ACM* 16, 9 (Sep. 1973), 575--577.
259       <http://portal.acm.org/citation.cfm?doid=362342.362367>
260
261    .. [2] Etsuji Tomita, Akira Tanaka, Haruhisa Takahashi,
262       "The worst-case time complexity for generating all maximal
263       cliques and computational experiments",
264       *Theoretical Computer Science*, Volume 363, Issue 1,
265       Computing and Combinatorics,
266       10th Annual International Conference on
267       Computing and Combinatorics (COCOON 2004), 25 October 2006, Pages 28--42
268       <https://doi.org/10.1016/j.tcs.2006.06.015>
269
270    .. [3] F. Cazals, C. Karande,
271       "A note on the problem of reporting maximal cliques",
272       *Theoretical Computer Science*,
273       Volume 407, Issues 1--3, 6 November 2008, Pages 564--568,
274       <https://doi.org/10.1016/j.tcs.2008.05.010>
275
276    """
277    if len(G) == 0:
278        return iter([])
279
280    adj = {u: {v for v in G[u] if v != u} for u in G}
281    Q = []
282
283    def expand(subg, cand):
284        u = max(subg, key=lambda u: len(cand & adj[u]))
285        for q in cand - adj[u]:
286            cand.remove(q)
287            Q.append(q)
288            adj_q = adj[q]
289            subg_q = subg & adj_q
290            if not subg_q:
291                yield Q[:]
292            else:
293                cand_q = cand & adj_q
294                if cand_q:
295                    yield from expand(subg_q, cand_q)
296            Q.pop()
297
298    return expand(set(G), set(G))
299
300
301def make_max_clique_graph(G, create_using=None):
302    """Returns the maximal clique graph of the given graph.
303
304    The nodes of the maximal clique graph of `G` are the cliques of
305    `G` and an edge joins two cliques if the cliques are not disjoint.
306
307    Parameters
308    ----------
309    G : NetworkX graph
310
311    create_using : NetworkX graph constructor, optional (default=nx.Graph)
312       Graph type to create. If graph instance, then cleared before populated.
313
314    Returns
315    -------
316    NetworkX graph
317        A graph whose nodes are the cliques of `G` and whose edges
318        join two cliques if they are not disjoint.
319
320    Notes
321    -----
322    This function behaves like the following code::
323
324        import networkx as nx
325        G = nx.make_clique_bipartite(G)
326        cliques = [v for v in G.nodes() if G.nodes[v]['bipartite'] == 0]
327        G = nx.bipartite.project(G, cliques)
328        G = nx.relabel_nodes(G, {-v: v - 1 for v in G})
329
330    It should be faster, though, since it skips all the intermediate
331    steps.
332
333    """
334    if create_using is None:
335        B = G.__class__()
336    else:
337        B = nx.empty_graph(0, create_using)
338    cliques = list(enumerate(set(c) for c in find_cliques(G)))
339    # Add a numbered node for each clique.
340    B.add_nodes_from(i for i, c in cliques)
341    # Join cliques by an edge if they share a node.
342    clique_pairs = combinations(cliques, 2)
343    B.add_edges_from((i, j) for (i, c1), (j, c2) in clique_pairs if c1 & c2)
344    return B
345
346
347def make_clique_bipartite(G, fpos=None, create_using=None, name=None):
348    """Returns the bipartite clique graph corresponding to `G`.
349
350    In the returned bipartite graph, the "bottom" nodes are the nodes of
351    `G` and the "top" nodes represent the maximal cliques of `G`.
352    There is an edge from node *v* to clique *C* in the returned graph
353    if and only if *v* is an element of *C*.
354
355    Parameters
356    ----------
357    G : NetworkX graph
358        An undirected graph.
359
360    fpos : bool
361        If True or not None, the returned graph will have an
362        additional attribute, `pos`, a dictionary mapping node to
363        position in the Euclidean plane.
364
365    create_using : NetworkX graph constructor, optional (default=nx.Graph)
366       Graph type to create. If graph instance, then cleared before populated.
367
368    Returns
369    -------
370    NetworkX graph
371        A bipartite graph whose "bottom" set is the nodes of the graph
372        `G`, whose "top" set is the cliques of `G`, and whose edges
373        join nodes of `G` to the cliques that contain them.
374
375        The nodes of the graph `G` have the node attribute
376        'bipartite' set to 1 and the nodes representing cliques
377        have the node attribute 'bipartite' set to 0, as is the
378        convention for bipartite graphs in NetworkX.
379
380    """
381    B = nx.empty_graph(0, create_using)
382    B.clear()
383    # The "bottom" nodes in the bipartite graph are the nodes of the
384    # original graph, G.
385    B.add_nodes_from(G, bipartite=1)
386    for i, cl in enumerate(find_cliques(G)):
387        # The "top" nodes in the bipartite graph are the cliques. These
388        # nodes get negative numbers as labels.
389        name = -i - 1
390        B.add_node(name, bipartite=0)
391        B.add_edges_from((v, name) for v in cl)
392    return B
393
394
395def graph_clique_number(G, cliques=None):
396    """Returns the clique number of the graph.
397
398    The *clique number* of a graph is the size of the largest clique in
399    the graph.
400
401    Parameters
402    ----------
403    G : NetworkX graph
404        An undirected graph.
405
406    cliques : list
407        A list of cliques, each of which is itself a list of nodes. If
408        not specified, the list of all cliques will be computed, as by
409        :func:`find_cliques`.
410
411    Returns
412    -------
413    int
414        The size of the largest clique in `G`.
415
416    Notes
417    -----
418    You should provide `cliques` if you have already computed the list
419    of maximal cliques, in order to avoid an exponential time search for
420    maximal cliques.
421
422    """
423    if len(G.nodes) < 1:
424        return 0
425    if cliques is None:
426        cliques = find_cliques(G)
427    return max([len(c) for c in cliques] or [1])
428
429
430def graph_number_of_cliques(G, cliques=None):
431    """Returns the number of maximal cliques in the graph.
432
433    Parameters
434    ----------
435    G : NetworkX graph
436        An undirected graph.
437
438    cliques : list
439        A list of cliques, each of which is itself a list of nodes. If
440        not specified, the list of all cliques will be computed, as by
441        :func:`find_cliques`.
442
443    Returns
444    -------
445    int
446        The number of maximal cliques in `G`.
447
448    Notes
449    -----
450    You should provide `cliques` if you have already computed the list
451    of maximal cliques, in order to avoid an exponential time search for
452    maximal cliques.
453
454    """
455    if cliques is None:
456        cliques = list(find_cliques(G))
457    return len(cliques)
458
459
460def node_clique_number(G, nodes=None, cliques=None):
461    """Returns the size of the largest maximal clique containing
462    each given node.
463
464    Returns a single or list depending on input nodes.
465    Optional list of cliques can be input if already computed.
466    """
467    if cliques is None:
468        if nodes is not None:
469            # Use ego_graph to decrease size of graph
470            if isinstance(nodes, list):
471                d = {}
472                for n in nodes:
473                    H = nx.ego_graph(G, n)
474                    d[n] = max(len(c) for c in find_cliques(H))
475            else:
476                H = nx.ego_graph(G, nodes)
477                d = max(len(c) for c in find_cliques(H))
478            return d
479        # nodes is None--find all cliques
480        cliques = list(find_cliques(G))
481
482    if nodes is None:
483        nodes = list(G.nodes())  # none, get entire graph
484
485    if not isinstance(nodes, list):  # check for a list
486        v = nodes
487        # assume it is a single value
488        d = max([len(c) for c in cliques if v in c])
489    else:
490        d = {}
491        for v in nodes:
492            d[v] = max([len(c) for c in cliques if v in c])
493    return d
494
495    # if nodes is None:                 # none, use entire graph
496    #     nodes=G.nodes()
497    # elif  not isinstance(nodes, list):    # check for a list
498    #     nodes=[nodes]             # assume it is a single value
499
500    # if cliques is None:
501    #     cliques=list(find_cliques(G))
502    # d={}
503    # for v in nodes:
504    #     d[v]=max([len(c) for c in cliques if v in c])
505
506    # if nodes in G:
507    #     return d[v] #return single value
508    # return d
509
510
511def number_of_cliques(G, nodes=None, cliques=None):
512    """Returns the number of maximal cliques for each node.
513
514    Returns a single or list depending on input nodes.
515    Optional list of cliques can be input if already computed.
516    """
517    if cliques is None:
518        cliques = list(find_cliques(G))
519
520    if nodes is None:
521        nodes = list(G.nodes())  # none, get entire graph
522
523    if not isinstance(nodes, list):  # check for a list
524        v = nodes
525        # assume it is a single value
526        numcliq = len([1 for c in cliques if v in c])
527    else:
528        numcliq = {}
529        for v in nodes:
530            numcliq[v] = len([1 for c in cliques if v in c])
531    return numcliq
532
533
534def cliques_containing_node(G, nodes=None, cliques=None):
535    """Returns a list of cliques containing the given node.
536
537    Returns a single list or list of lists depending on input nodes.
538    Optional list of cliques can be input if already computed.
539    """
540    if cliques is None:
541        cliques = list(find_cliques(G))
542
543    if nodes is None:
544        nodes = list(G.nodes())  # none, get entire graph
545
546    if not isinstance(nodes, list):  # check for a list
547        v = nodes
548        # assume it is a single value
549        vcliques = [c for c in cliques if v in c]
550    else:
551        vcliques = {}
552        for v in nodes:
553            vcliques[v] = [c for c in cliques if v in c]
554    return vcliques
555
556
557class MaxWeightClique(object):
558    """A class for the maximum weight clique algorithm.
559
560    This class is a helper for the `max_weight_clique` function.  The class
561    should not normally be used directly.
562
563    Parameters
564    ----------
565    G : NetworkX graph
566        The undirected graph for which a maximum weight clique is sought
567    weight : string or None, optional (default='weight')
568        The node attribute that holds the integer value used as a weight.
569        If None, then each node has weight 1.
570
571    Attributes
572    ----------
573    G : NetworkX graph
574        The undirected graph for which a maximum weight clique is sought
575    node_weights: dict
576        The weight of each node
577    incumbent_nodes : list
578        The nodes of the incumbent clique (the best clique found so far)
579    incumbent_weight: int
580        The weight of the incumbent clique
581    """
582
583    def __init__(self, G, weight):
584        self.G = G
585        self.incumbent_nodes = []
586        self.incumbent_weight = 0
587
588        if weight is None:
589            self.node_weights = {v: 1 for v in G.nodes()}
590        else:
591            for v in G.nodes():
592                if weight not in G.nodes[v]:
593                    errmsg = f"Node {v!r} does not have the requested weight field."
594                    raise KeyError(errmsg)
595                if not isinstance(G.nodes[v][weight], int):
596                    errmsg = f"The {weight!r} field of node {v!r} is not an integer."
597                    raise ValueError(errmsg)
598            self.node_weights = {v: G.nodes[v][weight] for v in G.nodes()}
599
600    def update_incumbent_if_improved(self, C, C_weight):
601        """Update the incumbent if the node set C has greater weight.
602
603        C is assumed to be a clique.
604        """
605        if C_weight > self.incumbent_weight:
606            self.incumbent_nodes = C[:]
607            self.incumbent_weight = C_weight
608
609    def greedily_find_independent_set(self, P):
610        """Greedily find an independent set of nodes from a set of
611        nodes P."""
612        independent_set = []
613        P = P[:]
614        while P:
615            v = P[0]
616            independent_set.append(v)
617            P = [w for w in P if v != w and not self.G.has_edge(v, w)]
618        return independent_set
619
620    def find_branching_nodes(self, P, target):
621        """Find a set of nodes to branch on."""
622        residual_wt = {v: self.node_weights[v] for v in P}
623        total_wt = 0
624        P = P[:]
625        while P:
626            independent_set = self.greedily_find_independent_set(P)
627            min_wt_in_class = min(residual_wt[v] for v in independent_set)
628            total_wt += min_wt_in_class
629            if total_wt > target:
630                break
631            for v in independent_set:
632                residual_wt[v] -= min_wt_in_class
633            P = [v for v in P if residual_wt[v] != 0]
634        return P
635
636    def expand(self, C, C_weight, P):
637        """Look for the best clique that contains all the nodes in C and zero or
638        more of the nodes in P, backtracking if it can be shown that no such
639        clique has greater weight than the incumbent.
640        """
641        self.update_incumbent_if_improved(C, C_weight)
642        branching_nodes = self.find_branching_nodes(P, self.incumbent_weight - C_weight)
643        while branching_nodes:
644            v = branching_nodes.pop()
645            P.remove(v)
646            new_C = C + [v]
647            new_C_weight = C_weight + self.node_weights[v]
648            new_P = [w for w in P if self.G.has_edge(v, w)]
649            self.expand(new_C, new_C_weight, new_P)
650
651    def find_max_weight_clique(self):
652        """Find a maximum weight clique."""
653        # Sort nodes in reverse order of degree for speed
654        nodes = sorted(self.G.nodes(), key=lambda v: self.G.degree(v), reverse=True)
655        nodes = [v for v in nodes if self.node_weights[v] > 0]
656        self.expand([], 0, nodes)
657
658
659@not_implemented_for("directed")
660def max_weight_clique(G, weight="weight"):
661    """Find a maximum weight clique in G.
662
663    A *clique* in a graph is a set of nodes such that every two distinct nodes
664    are adjacent.  The *weight* of a clique is the sum of the weights of its
665    nodes.  A *maximum weight clique* of graph G is a clique C in G such that
666    no clique in G has weight greater than the weight of C.
667
668    Parameters
669    ----------
670    G : NetworkX graph
671        Undirected graph
672    weight : string or None, optional (default='weight')
673        The node attribute that holds the integer value used as a weight.
674        If None, then each node has weight 1.
675
676    Returns
677    -------
678    clique : list
679        the nodes of a maximum weight clique
680    weight : int
681        the weight of a maximum weight clique
682
683    Notes
684    -----
685    The implementation is recursive, and therefore it may run into recursion
686    depth issues if G contains a clique whose number of nodes is close to the
687    recursion depth limit.
688
689    At each search node, the algorithm greedily constructs a weighted
690    independent set cover of part of the graph in order to find a small set of
691    nodes on which to branch.  The algorithm is very similar to the algorithm
692    of Tavares et al. [1]_, other than the fact that the NetworkX version does
693    not use bitsets.  This style of algorithm for maximum weight clique (and
694    maximum weight independent set, which is the same problem but on the
695    complement graph) has a decades-long history.  See Algorithm B of Warren
696    and Hicks [2]_ and the references in that paper.
697
698    References
699    ----------
700    .. [1] Tavares, W.A., Neto, M.B.C., Rodrigues, C.D., Michelon, P.: Um
701           algoritmo de branch and bound para o problema da clique máxima
702           ponderada.  Proceedings of XLVII SBPO 1 (2015).
703
704    .. [2] Warrent, Jeffrey S, Hicks, Illya V.: Combinatorial Branch-and-Bound
705           for the Maximum Weight Independent Set Problem.  Technical Report,
706           Texas A&M University (2016).
707    """
708
709    mwc = MaxWeightClique(G, weight)
710    mwc.find_max_weight_clique()
711    return mwc.incumbent_nodes, mwc.incumbent_weight
712