1 /* Graph algorithms
2  *
3  * Contents:
4  *    1. Maximum bipartite matching
5  *    2. Unit tests
6  *    3. Test driver
7  */
8 
9 #include "esl_config.h"
10 
11 #include <stdlib.h>
12 #include <stdio.h>
13 
14 #include "easel.h"
15 #include "esl_matrixops.h"
16 #include "esl_vectorops.h"
17 
18 /* Function:  esl_graph_MaxBipartiteMatch()
19  * Synopsis:  Maximum bipartite matching algorithm.
20  * Incept:    SRE, Tue 26 Jun 2018
21  *
22  * Purpose:   Find a maximum match for a bipartite graph. For two sets,
23  *            one with <M> elements and the other with <N>, the <M> by
24  *            <N> input adjacency matrix <A> defines $A_{ij} =$TRUE
25  *            for allowed matches between the two sets, otherwise
26  *            FALSE. The algorithm defines a maximal matching
27  *            bipartite graph <G> with a subset of those edges.
28  *
29  *            The total number of edges in <G> is returned in
30  *            <*ret_edges>. By definition, it is $\leq \min(M,N)$;
31  *            equality means a "perfect" match (especially for the
32  *            case $M=N$).
33  *
34  *            Optionally, the caller can also obtain <G> itself, by
35  *            passing a non-NULL <opt_G> ptr. Edges in <G> are defined
36  *            by $G_{ij} =$ TRUE or FALSE.
37  *
38  * Args:      A          : input adjacency matrix. A[i=0..M-1][j=0..N-1]
39  *            M          : number of elements in 1st set (rows in <A>)
40  *            N          : number of elements in 2nd set (cols in <A>)
41  *            opt_G      : optRETURN: maximal matching bipartite graph
42  *            ret_nedges : RETURN: number of edges in <G>
43  *
44  * Returns:   <eslOK> on success, *ret_edges is the number of edges in <G>,
45  *            and *opt_G (if <&G> was passed) is a ptr to <G>.
46  *
47  * Throws:   <eslEMEM> on allocation failure. Now <*ret_nedges> is 0 and
48  *           <*opt_G>, if it was requested, is NULL.
49  *
50  * Notes:    This is a simplified, specialized version of the
51  *           Ford-Fulkerson maximum flow algorithm. $A_{ij}$ is
52  *           treated as the capacity of directed edges $i \rightarrow
53  *           j$, and the graph is augmented with a source and a sink
54  *           vertex; source $\rightarrow i$ for all $i$, sink
55  *           $\rightarrow j$ for all j, with implicit capacity of 1 on
56  *           all these entry/exit edges.
57  */
58 int
esl_graph_MaxBipartiteMatch(int ** A,int M,int N,int *** opt_G,int * ret_nedges)59 esl_graph_MaxBipartiteMatch(int **A, int M, int N, int ***opt_G, int *ret_nedges)
60 {
61   int **G       = NULL;     // bipartite graph we're building, as a flow network. Gij = 1|0; 1 means i-j link.
62   int  *Ga      = NULL;     //   ... augmented with source -> i flow; Ga[0..M-1] = 1|0.
63   int  *Gz      = NULL;     //   ... and with j -> sink flow;   Gz[0..N-1] = 1|0.
64   int  *parent1 = NULL;     // Parent in path for vertex in 1st set: parent1[i=0..M-1] = 0..N-1 (forward edges only); -1 (no edge yet)
65   int  *parent2 = NULL;     // Parent in path for vertex in 2nd set: parent2[j=0..N-1] = 0..M-1 (reverse edges); M (forward edge to sink); -1 (no edge yet)
66   int   par0;               // Parent for source in a new path
67   int   found_path;         // TRUE when we find a path that can increase flow
68   int   done;               // TRUE while breadth first search is still extending at least one path
69   int   nedges  = 0;        // number of edges in G
70   int   i,j;
71   int   status;
72 
73   /* Allocations. */
74   if (( G = esl_mat_ICreate(M, N) ) == NULL) { status = eslEMEM; goto ERROR; }
75   ESL_ALLOC(Ga, sizeof(int) * M);
76   ESL_ALLOC(Gz, sizeof(int) * N);
77   ESL_ALLOC(parent1, sizeof(int) * M);
78   ESL_ALLOC(parent2, sizeof(int) * N);
79 
80   /* G is initialized with no edges. */
81   esl_vec_ISet(Ga, M, 0);
82   esl_vec_ISet(Gz, N, 0);
83   esl_mat_ISet(G,  M, N, 0);
84 
85   // Given the current G: can we identify a path that increases the overall flow?
86   while (1)
87     {
88       found_path = FALSE;
89       esl_vec_ISet(parent1, M, -1);
90       for (j = 0; j < N; j++) parent2[j] = Gz[j] ? -1 : M;  // j->sink possible if the edge isn't used in G yet; it automatically has capacity of 1.
91 
92       /* Breadth first search (Edmonds/Karp) to find an augmenting path, until there isn't one */
93       do {
94 	done = TRUE; // until proven otherwise
95 
96 	for (j = 0; j < N; j++)  	// breadth-first search back to i from all active j's
97 	  if (parent2[j] != -1)
98 	    for (i = 0; i < M; i++)
99 	      if (parent1[i] == -1 && A[i][j] && ! G[i][j]) { parent1[i] = j; done = FALSE; break; }  // can make forward link if 1) capacity and 2) not used in G yet.
100 
101 	for (i = 0; i < M; i++)        	// breadth-first search back to source from all active i's
102 	  if (parent1[i] != -1 && ! Ga[i]) { par0 = i; found_path = TRUE; break; }
103 
104 	for (i = 0; i < M; i++)        	// active i's can also go back a reverse link to j's
105 	  if (parent1[i] != -1)
106 	    for (j = 0; j < N; j++)
107 	      if (parent2[j] == -1 && G[i][j]) { parent2[j] = i; done = FALSE; break; }
108       } while (! found_path && ! done);
109 
110       if (! found_path) break;  // We're done. This is the only way.
111 
112       // Now follow the path. Turn forward links on; turn reverse links off.
113       i     = par0;
114       Ga[i] = 1;
115       while (1)
116 	{
117 	  j = parent1[i]; G[i][j] = 1;  nedges++;     // add a forward edge
118 	  if (parent2[j] == N) { Gz[j] = 1; break; }  // end path
119 	  i = parent2[j]; G[i][j] = 0;  nedges--;     // subtract a reverse edge
120 	}
121     }
122   free(Ga);       free(Gz);
123   free(parent1);  free(parent2);
124   if (opt_G)  *opt_G = G; else esl_mat_IDestroy(G);
125   *ret_nedges = nedges;
126   return eslOK;
127 
128  ERROR:
129   esl_mat_IDestroy(G);
130   free(Ga);      free(Gz);
131   free(parent1); free(parent2);
132   if (opt_G) *opt_G = NULL;
133   *ret_nedges = 0;
134   return status;
135 }
136 
137 
138 /*****************************************************************
139  * 2. Unit tests
140  *****************************************************************/
141 #ifdef eslGRAPH_TESTDRIVE
142 
143 #include "esl_mixdchlet.h"
144 
145 /* utest_perfect()
146  *
147  * Constructs a known <G0> as a perfect bipartite match, shuffled;
148  * then constructs <A> by adding a random number of extra edges to
149  * it. Infer <G> from <A>. The inferred <G> therefore should be
150  * perfect (nedges = n), and in the case of <= 1 extra added edge, <G0
151  * == G>.
152  */
153 static void
utest_perfect(ESL_RANDOMNESS * rng)154 utest_perfect(ESL_RANDOMNESS *rng)
155 {
156   char   msg[]  = "esl_graph utest_perfect failed";
157   int    n      = 1 + esl_rnd_Roll(rng, 20);  // 1..20
158   int    nextra = esl_rnd_Roll(rng, n*n-n);   // 0..N^2-N-1
159   int   *shuf   = NULL;
160   int  **G0     = esl_mat_ICreate(n, n);
161   int  **A      = esl_mat_ICreate(n, n);
162   int  **G      = NULL;
163   int    ntot   = n;                          // number of edges in A
164   int    nedges;                              // number of edges in G
165   int    i,j,e;
166 
167   if ((shuf = malloc(sizeof(int) * n)) == NULL) esl_fatal(msg);
168   for (i = 0; i < n; i++) shuf[i] = i;
169   esl_vec_IShuffle(rng, shuf, n);
170 
171   esl_mat_ISet(G0, n, n, 0);
172   for (i = 0; i < n; i++)
173     G0[i][shuf[i]] = TRUE;
174 
175   esl_mat_ICopy(G0, n, n, A);
176   for (e = 0; e < nextra; e++)
177     {
178       i = esl_rnd_Roll(rng, n);
179       j = esl_rnd_Roll(rng, n);
180       if (! A[i][j]) ntot++;
181       A[i][j] = TRUE;
182     }
183 
184   esl_graph_MaxBipartiteMatch(A, n, n, &G, &nedges);
185   if (nedges != n) esl_fatal(msg);
186   if (ntot <= n+1 && esl_mat_ICompare(G, G0, n, n) != eslOK) esl_fatal(msg);
187 
188   free(shuf);
189   esl_mat_IDestroy(A);
190   esl_mat_IDestroy(G);
191   esl_mat_IDestroy(G0);
192 }
193 #endif // eslGRAPH_TESTDRIVE
194 
195 /*****************************************************************
196  * 3. Test driver
197  *****************************************************************/
198 #ifdef eslGRAPH_TESTDRIVE
199 
200 #include "easel.h"
201 #include "esl_getopts.h"
202 #include "esl_random.h"
203 
204 static ESL_OPTIONS options[] = {
205   /* name  type         default  env   range togs  reqs  incomp  help                      docgrp */
206   {"-h",  eslARG_NONE,    FALSE, NULL, NULL, NULL, NULL, NULL, "show help and usage",            0},
207   {"-s",   eslARG_INT,      "0", NULL, NULL, NULL, NULL, NULL, "set random number seed to <n>",  0},
208   { 0,0,0,0,0,0,0,0,0,0},
209 };
210 static char usage[]  = "[-options]";
211 static char banner[] = "test driver for graph module";
212 
213 int
main(int argc,char ** argv)214 main(int argc, char **argv)
215 {
216   ESL_GETOPTS    *go  = esl_getopts_CreateDefaultApp(options, 0, argc, argv, banner, usage);
217   ESL_RANDOMNESS *rng = esl_randomness_Create(esl_opt_GetInteger(go, "-s"));
218 
219   fprintf(stderr, "## %s\n", argv[0]);
220   fprintf(stderr, "#  rng seed = %" PRIu32 "\n", esl_randomness_GetSeed(rng));
221 
222   utest_perfect(rng);
223 
224   fprintf(stderr, "#  status = ok\n");
225   esl_randomness_Destroy(rng);
226   esl_getopts_Destroy(go);
227   return 0;
228 }
229 #endif // eslGRAPH_TESTDRIVE
230 
231 
232