1"""Functions for finding and evaluating cuts in a graph.
2
3"""
4
5from itertools import chain
6
7import networkx as nx
8
9__all__ = [
10    "boundary_expansion",
11    "conductance",
12    "cut_size",
13    "edge_expansion",
14    "mixing_expansion",
15    "node_expansion",
16    "normalized_cut_size",
17    "volume",
18]
19
20
21# TODO STILL NEED TO UPDATE ALL THE DOCUMENTATION!
22
23
24def cut_size(G, S, T=None, weight=None):
25    """Returns the size of the cut between two sets of nodes.
26
27    A *cut* is a partition of the nodes of a graph into two sets. The
28    *cut size* is the sum of the weights of the edges "between" the two
29    sets of nodes.
30
31    Parameters
32    ----------
33    G : NetworkX graph
34
35    S : collection
36        A collection of nodes in `G`.
37
38    T : collection
39        A collection of nodes in `G`. If not specified, this is taken to
40        be the set complement of `S`.
41
42    weight : object
43        Edge attribute key to use as weight. If not specified, edges
44        have weight one.
45
46    Returns
47    -------
48    number
49        Total weight of all edges from nodes in set `S` to nodes in
50        set `T` (and, in the case of directed graphs, all edges from
51        nodes in `T` to nodes in `S`).
52
53    Examples
54    --------
55    In the graph with two cliques joined by a single edges, the natural
56    bipartition of the graph into two blocks, one for each clique,
57    yields a cut of weight one::
58
59        >>> G = nx.barbell_graph(3, 0)
60        >>> S = {0, 1, 2}
61        >>> T = {3, 4, 5}
62        >>> nx.cut_size(G, S, T)
63        1
64
65    Each parallel edge in a multigraph is counted when determining the
66    cut size::
67
68        >>> G = nx.MultiGraph(["ab", "ab"])
69        >>> S = {"a"}
70        >>> T = {"b"}
71        >>> nx.cut_size(G, S, T)
72        2
73
74    Notes
75    -----
76    In a multigraph, the cut size is the total weight of edges including
77    multiplicity.
78
79    """
80    edges = nx.edge_boundary(G, S, T, data=weight, default=1)
81    if G.is_directed():
82        edges = chain(edges, nx.edge_boundary(G, T, S, data=weight, default=1))
83    return sum(weight for u, v, weight in edges)
84
85
86def volume(G, S, weight=None):
87    """Returns the volume of a set of nodes.
88
89    The *volume* of a set *S* is the sum of the (out-)degrees of nodes
90    in *S* (taking into account parallel edges in multigraphs). [1]
91
92    Parameters
93    ----------
94    G : NetworkX graph
95
96    S : collection
97        A collection of nodes in `G`.
98
99    weight : object
100        Edge attribute key to use as weight. If not specified, edges
101        have weight one.
102
103    Returns
104    -------
105    number
106        The volume of the set of nodes represented by `S` in the graph
107        `G`.
108
109    See also
110    --------
111    conductance
112    cut_size
113    edge_expansion
114    edge_boundary
115    normalized_cut_size
116
117    References
118    ----------
119    .. [1] David Gleich.
120           *Hierarchical Directed Spectral Graph Partitioning*.
121           <https://www.cs.purdue.edu/homes/dgleich/publications/Gleich%202005%20-%20hierarchical%20directed%20spectral.pdf>
122
123    """
124    degree = G.out_degree if G.is_directed() else G.degree
125    return sum(d for v, d in degree(S, weight=weight))
126
127
128def normalized_cut_size(G, S, T=None, weight=None):
129    """Returns the normalized size of the cut between two sets of nodes.
130
131    The *normalized cut size* is the cut size times the sum of the
132    reciprocal sizes of the volumes of the two sets. [1]
133
134    Parameters
135    ----------
136    G : NetworkX graph
137
138    S : collection
139        A collection of nodes in `G`.
140
141    T : collection
142        A collection of nodes in `G`.
143
144    weight : object
145        Edge attribute key to use as weight. If not specified, edges
146        have weight one.
147
148    Returns
149    -------
150    number
151        The normalized cut size between the two sets `S` and `T`.
152
153    Notes
154    -----
155    In a multigraph, the cut size is the total weight of edges including
156    multiplicity.
157
158    See also
159    --------
160    conductance
161    cut_size
162    edge_expansion
163    volume
164
165    References
166    ----------
167    .. [1] David Gleich.
168           *Hierarchical Directed Spectral Graph Partitioning*.
169           <https://www.cs.purdue.edu/homes/dgleich/publications/Gleich%202005%20-%20hierarchical%20directed%20spectral.pdf>
170
171    """
172    if T is None:
173        T = set(G) - set(S)
174    num_cut_edges = cut_size(G, S, T=T, weight=weight)
175    volume_S = volume(G, S, weight=weight)
176    volume_T = volume(G, T, weight=weight)
177    return num_cut_edges * ((1 / volume_S) + (1 / volume_T))
178
179
180def conductance(G, S, T=None, weight=None):
181    """Returns the conductance of two sets of nodes.
182
183    The *conductance* is the quotient of the cut size and the smaller of
184    the volumes of the two sets. [1]
185
186    Parameters
187    ----------
188    G : NetworkX graph
189
190    S : collection
191        A collection of nodes in `G`.
192
193    T : collection
194        A collection of nodes in `G`.
195
196    weight : object
197        Edge attribute key to use as weight. If not specified, edges
198        have weight one.
199
200    Returns
201    -------
202    number
203        The conductance between the two sets `S` and `T`.
204
205    See also
206    --------
207    cut_size
208    edge_expansion
209    normalized_cut_size
210    volume
211
212    References
213    ----------
214    .. [1] David Gleich.
215           *Hierarchical Directed Spectral Graph Partitioning*.
216           <https://www.cs.purdue.edu/homes/dgleich/publications/Gleich%202005%20-%20hierarchical%20directed%20spectral.pdf>
217
218    """
219    if T is None:
220        T = set(G) - set(S)
221    num_cut_edges = cut_size(G, S, T, weight=weight)
222    volume_S = volume(G, S, weight=weight)
223    volume_T = volume(G, T, weight=weight)
224    return num_cut_edges / min(volume_S, volume_T)
225
226
227def edge_expansion(G, S, T=None, weight=None):
228    """Returns the edge expansion between two node sets.
229
230    The *edge expansion* is the quotient of the cut size and the smaller
231    of the cardinalities of the two sets. [1]
232
233    Parameters
234    ----------
235    G : NetworkX graph
236
237    S : collection
238        A collection of nodes in `G`.
239
240    T : collection
241        A collection of nodes in `G`.
242
243    weight : object
244        Edge attribute key to use as weight. If not specified, edges
245        have weight one.
246
247    Returns
248    -------
249    number
250        The edge expansion between the two sets `S` and `T`.
251
252    See also
253    --------
254    boundary_expansion
255    mixing_expansion
256    node_expansion
257
258    References
259    ----------
260    .. [1] Fan Chung.
261           *Spectral Graph Theory*.
262           (CBMS Regional Conference Series in Mathematics, No. 92),
263           American Mathematical Society, 1997, ISBN 0-8218-0315-8
264           <http://www.math.ucsd.edu/~fan/research/revised.html>
265
266    """
267    if T is None:
268        T = set(G) - set(S)
269    num_cut_edges = cut_size(G, S, T=T, weight=weight)
270    return num_cut_edges / min(len(S), len(T))
271
272
273def mixing_expansion(G, S, T=None, weight=None):
274    """Returns the mixing expansion between two node sets.
275
276    The *mixing expansion* is the quotient of the cut size and twice the
277    number of edges in the graph. [1]
278
279    Parameters
280    ----------
281    G : NetworkX graph
282
283    S : collection
284        A collection of nodes in `G`.
285
286    T : collection
287        A collection of nodes in `G`.
288
289    weight : object
290        Edge attribute key to use as weight. If not specified, edges
291        have weight one.
292
293    Returns
294    -------
295    number
296        The mixing expansion between the two sets `S` and `T`.
297
298    See also
299    --------
300    boundary_expansion
301    edge_expansion
302    node_expansion
303
304    References
305    ----------
306    .. [1] Vadhan, Salil P.
307           "Pseudorandomness."
308           *Foundations and Trends
309           in Theoretical Computer Science* 7.1–3 (2011): 1–336.
310           <https://doi.org/10.1561/0400000010>
311
312    """
313    num_cut_edges = cut_size(G, S, T=T, weight=weight)
314    num_total_edges = G.number_of_edges()
315    return num_cut_edges / (2 * num_total_edges)
316
317
318# TODO What is the generalization to two arguments, S and T? Does the
319# denominator become `min(len(S), len(T))`?
320def node_expansion(G, S):
321    """Returns the node expansion of the set `S`.
322
323    The *node expansion* is the quotient of the size of the node
324    boundary of *S* and the cardinality of *S*. [1]
325
326    Parameters
327    ----------
328    G : NetworkX graph
329
330    S : collection
331        A collection of nodes in `G`.
332
333    Returns
334    -------
335    number
336        The node expansion of the set `S`.
337
338    See also
339    --------
340    boundary_expansion
341    edge_expansion
342    mixing_expansion
343
344    References
345    ----------
346    .. [1] Vadhan, Salil P.
347           "Pseudorandomness."
348           *Foundations and Trends
349           in Theoretical Computer Science* 7.1–3 (2011): 1–336.
350           <https://doi.org/10.1561/0400000010>
351
352    """
353    neighborhood = set(chain.from_iterable(G.neighbors(v) for v in S))
354    return len(neighborhood) / len(S)
355
356
357# TODO What is the generalization to two arguments, S and T? Does the
358# denominator become `min(len(S), len(T))`?
359def boundary_expansion(G, S):
360    """Returns the boundary expansion of the set `S`.
361
362    The *boundary expansion* is the quotient of the size
363    of the node boundary and the cardinality of *S*. [1]
364
365    Parameters
366    ----------
367    G : NetworkX graph
368
369    S : collection
370        A collection of nodes in `G`.
371
372    Returns
373    -------
374    number
375        The boundary expansion of the set `S`.
376
377    See also
378    --------
379    edge_expansion
380    mixing_expansion
381    node_expansion
382
383    References
384    ----------
385    .. [1] Vadhan, Salil P.
386           "Pseudorandomness."
387           *Foundations and Trends in Theoretical Computer Science*
388           7.1–3 (2011): 1–336.
389           <https://doi.org/10.1561/0400000010>
390
391    """
392    return len(nx.node_boundary(G, S)) / len(S)
393