1 /* Graph algorithms
2 *
3 * Contents:
4 * 1. Maximum bipartite matching
5 * 2. Unit tests
6 * 3. Test driver
7 */
8
9 #include "esl_config.h"
10
11 #include <stdlib.h>
12 #include <stdio.h>
13
14 #include "easel.h"
15 #include "esl_matrixops.h"
16 #include "esl_vectorops.h"
17
18 /* Function: esl_graph_MaxBipartiteMatch()
19 * Synopsis: Maximum bipartite matching algorithm.
20 * Incept: SRE, Tue 26 Jun 2018
21 *
22 * Purpose: Find a maximum match for a bipartite graph. For two sets,
23 * one with <M> elements and the other with <N>, the <M> by
24 * <N> input adjacency matrix <A> defines $A_{ij} =$TRUE
25 * for allowed matches between the two sets, otherwise
26 * FALSE. The algorithm defines a maximal matching
27 * bipartite graph <G> with a subset of those edges.
28 *
29 * The total number of edges in <G> is returned in
30 * <*ret_edges>. By definition, it is $\leq \min(M,N)$;
31 * equality means a "perfect" match (especially for the
32 * case $M=N$).
33 *
34 * Optionally, the caller can also obtain <G> itself, by
35 * passing a non-NULL <opt_G> ptr. Edges in <G> are defined
36 * by $G_{ij} =$ TRUE or FALSE.
37 *
38 * Args: A : input adjacency matrix. A[i=0..M-1][j=0..N-1]
39 * M : number of elements in 1st set (rows in <A>)
40 * N : number of elements in 2nd set (cols in <A>)
41 * opt_G : optRETURN: maximal matching bipartite graph
42 * ret_nedges : RETURN: number of edges in <G>
43 *
44 * Returns: <eslOK> on success, *ret_edges is the number of edges in <G>,
45 * and *opt_G (if <&G> was passed) is a ptr to <G>.
46 *
47 * Throws: <eslEMEM> on allocation failure. Now <*ret_nedges> is 0 and
48 * <*opt_G>, if it was requested, is NULL.
49 *
50 * Notes: This is a simplified, specialized version of the
51 * Ford-Fulkerson maximum flow algorithm. $A_{ij}$ is
52 * treated as the capacity of directed edges $i \rightarrow
53 * j$, and the graph is augmented with a source and a sink
54 * vertex; source $\rightarrow i$ for all $i$, sink
55 * $\rightarrow j$ for all j, with implicit capacity of 1 on
56 * all these entry/exit edges.
57 */
58 int
esl_graph_MaxBipartiteMatch(int ** A,int M,int N,int *** opt_G,int * ret_nedges)59 esl_graph_MaxBipartiteMatch(int **A, int M, int N, int ***opt_G, int *ret_nedges)
60 {
61 int **G = NULL; // bipartite graph we're building, as a flow network. Gij = 1|0; 1 means i-j link.
62 int *Ga = NULL; // ... augmented with source -> i flow; Ga[0..M-1] = 1|0.
63 int *Gz = NULL; // ... and with j -> sink flow; Gz[0..N-1] = 1|0.
64 int *parent1 = NULL; // Parent in path for vertex in 1st set: parent1[i=0..M-1] = 0..N-1 (forward edges only); -1 (no edge yet)
65 int *parent2 = NULL; // Parent in path for vertex in 2nd set: parent2[j=0..N-1] = 0..M-1 (reverse edges); M (forward edge to sink); -1 (no edge yet)
66 int par0; // Parent for source in a new path
67 int found_path; // TRUE when we find a path that can increase flow
68 int done; // TRUE while breadth first search is still extending at least one path
69 int nedges = 0; // number of edges in G
70 int i,j;
71 int status;
72
73 /* Allocations. */
74 if (( G = esl_mat_ICreate(M, N) ) == NULL) { status = eslEMEM; goto ERROR; }
75 ESL_ALLOC(Ga, sizeof(int) * M);
76 ESL_ALLOC(Gz, sizeof(int) * N);
77 ESL_ALLOC(parent1, sizeof(int) * M);
78 ESL_ALLOC(parent2, sizeof(int) * N);
79
80 /* G is initialized with no edges. */
81 esl_vec_ISet(Ga, M, 0);
82 esl_vec_ISet(Gz, N, 0);
83 esl_mat_ISet(G, M, N, 0);
84
85 // Given the current G: can we identify a path that increases the overall flow?
86 while (1)
87 {
88 found_path = FALSE;
89 esl_vec_ISet(parent1, M, -1);
90 for (j = 0; j < N; j++) parent2[j] = Gz[j] ? -1 : M; // j->sink possible if the edge isn't used in G yet; it automatically has capacity of 1.
91
92 /* Breadth first search (Edmonds/Karp) to find an augmenting path, until there isn't one */
93 do {
94 done = TRUE; // until proven otherwise
95
96 for (j = 0; j < N; j++) // breadth-first search back to i from all active j's
97 if (parent2[j] != -1)
98 for (i = 0; i < M; i++)
99 if (parent1[i] == -1 && A[i][j] && ! G[i][j]) { parent1[i] = j; done = FALSE; break; } // can make forward link if 1) capacity and 2) not used in G yet.
100
101 for (i = 0; i < M; i++) // breadth-first search back to source from all active i's
102 if (parent1[i] != -1 && ! Ga[i]) { par0 = i; found_path = TRUE; break; }
103
104 for (i = 0; i < M; i++) // active i's can also go back a reverse link to j's
105 if (parent1[i] != -1)
106 for (j = 0; j < N; j++)
107 if (parent2[j] == -1 && G[i][j]) { parent2[j] = i; done = FALSE; break; }
108 } while (! found_path && ! done);
109
110 if (! found_path) break; // We're done. This is the only way.
111
112 // Now follow the path. Turn forward links on; turn reverse links off.
113 i = par0;
114 Ga[i] = 1;
115 while (1)
116 {
117 j = parent1[i]; G[i][j] = 1; nedges++; // add a forward edge
118 if (parent2[j] == N) { Gz[j] = 1; break; } // end path
119 i = parent2[j]; G[i][j] = 0; nedges--; // subtract a reverse edge
120 }
121 }
122 free(Ga); free(Gz);
123 free(parent1); free(parent2);
124 if (opt_G) *opt_G = G; else esl_mat_IDestroy(G);
125 *ret_nedges = nedges;
126 return eslOK;
127
128 ERROR:
129 esl_mat_IDestroy(G);
130 free(Ga); free(Gz);
131 free(parent1); free(parent2);
132 if (opt_G) *opt_G = NULL;
133 *ret_nedges = 0;
134 return status;
135 }
136
137
138 /*****************************************************************
139 * 2. Unit tests
140 *****************************************************************/
141 #ifdef eslGRAPH_TESTDRIVE
142
143 #include "esl_mixdchlet.h"
144
145 /* utest_perfect()
146 *
147 * Constructs a known <G0> as a perfect bipartite match, shuffled;
148 * then constructs <A> by adding a random number of extra edges to
149 * it. Infer <G> from <A>. The inferred <G> therefore should be
150 * perfect (nedges = n), and in the case of <= 1 extra added edge, <G0
151 * == G>.
152 */
153 static void
utest_perfect(ESL_RANDOMNESS * rng)154 utest_perfect(ESL_RANDOMNESS *rng)
155 {
156 char msg[] = "esl_graph utest_perfect failed";
157 int n = 1 + esl_rnd_Roll(rng, 20); // 1..20
158 int nextra = esl_rnd_Roll(rng, n*n-n); // 0..N^2-N-1
159 int *shuf = NULL;
160 int **G0 = esl_mat_ICreate(n, n);
161 int **A = esl_mat_ICreate(n, n);
162 int **G = NULL;
163 int ntot = n; // number of edges in A
164 int nedges; // number of edges in G
165 int i,j,e;
166
167 if ((shuf = malloc(sizeof(int) * n)) == NULL) esl_fatal(msg);
168 for (i = 0; i < n; i++) shuf[i] = i;
169 esl_vec_IShuffle(rng, shuf, n);
170
171 esl_mat_ISet(G0, n, n, 0);
172 for (i = 0; i < n; i++)
173 G0[i][shuf[i]] = TRUE;
174
175 esl_mat_ICopy(G0, n, n, A);
176 for (e = 0; e < nextra; e++)
177 {
178 i = esl_rnd_Roll(rng, n);
179 j = esl_rnd_Roll(rng, n);
180 if (! A[i][j]) ntot++;
181 A[i][j] = TRUE;
182 }
183
184 esl_graph_MaxBipartiteMatch(A, n, n, &G, &nedges);
185 if (nedges != n) esl_fatal(msg);
186 if (ntot <= n+1 && esl_mat_ICompare(G, G0, n, n) != eslOK) esl_fatal(msg);
187
188 free(shuf);
189 esl_mat_IDestroy(A);
190 esl_mat_IDestroy(G);
191 esl_mat_IDestroy(G0);
192 }
193 #endif // eslGRAPH_TESTDRIVE
194
195 /*****************************************************************
196 * 3. Test driver
197 *****************************************************************/
198 #ifdef eslGRAPH_TESTDRIVE
199
200 #include "easel.h"
201 #include "esl_getopts.h"
202 #include "esl_random.h"
203
204 static ESL_OPTIONS options[] = {
205 /* name type default env range togs reqs incomp help docgrp */
206 {"-h", eslARG_NONE, FALSE, NULL, NULL, NULL, NULL, NULL, "show help and usage", 0},
207 {"-s", eslARG_INT, "0", NULL, NULL, NULL, NULL, NULL, "set random number seed to <n>", 0},
208 { 0,0,0,0,0,0,0,0,0,0},
209 };
210 static char usage[] = "[-options]";
211 static char banner[] = "test driver for graph module";
212
213 int
main(int argc,char ** argv)214 main(int argc, char **argv)
215 {
216 ESL_GETOPTS *go = esl_getopts_CreateDefaultApp(options, 0, argc, argv, banner, usage);
217 ESL_RANDOMNESS *rng = esl_randomness_Create(esl_opt_GetInteger(go, "-s"));
218
219 fprintf(stderr, "## %s\n", argv[0]);
220 fprintf(stderr, "# rng seed = %" PRIu32 "\n", esl_randomness_GetSeed(rng));
221
222 utest_perfect(rng);
223
224 fprintf(stderr, "# status = ok\n");
225 esl_randomness_Destroy(rng);
226 esl_getopts_Destroy(go);
227 return 0;
228 }
229 #endif // eslGRAPH_TESTDRIVE
230
231
232