1 /* ========================================================================== */
2 /* === BTF_MAXTRANS ========================================================= */
3 /* ========================================================================== */
4
5 /* Finds a column permutation that maximizes the number of entries on the
6 * diagonal of a sparse matrix. See btf.h for more information.
7 *
8 * This function is identical to cs_maxtrans in CSparse, with the following
9 * exceptions:
10 *
11 * (1) cs_maxtrans finds both jmatch and imatch, where jmatch [i] = j and
12 * imatch [j] = i if row i is matched to column j. This function returns
13 * just jmatch (the Match array). The MATLAB interface to cs_maxtrans
14 * (the single-output cs_dmperm) returns imatch, not jmatch to the MATLAB
15 * caller.
16 *
17 * (2) cs_maxtrans includes a pre-pass that counts the number of non-empty
18 * rows and columns (m2 and n2, respectively), and computes the matching
19 * using the transpose of A if m2 < n2. cs_maxtrans also returns quickly
20 * if the diagonal of the matrix is already zero-free. This pre-pass
21 * allows cs_maxtrans to be much faster than maxtrans, if the use of the
22 * transpose is warranted.
23 *
24 * However, for square structurally non-singular matrices with one or more
25 * zeros on the diagonal, the pre-pass is a waste of time, and for these
26 * matrices, maxtrans can be twice as fast as cs_maxtrans. Since the
27 * maxtrans function is intended primarily for square matrices that are
28 * typically structurally nonsingular, the pre-pass is not included here.
29 * If this maxtrans function is used on a matrix with many more columns
30 * than rows, consider passing the transpose to this function, or use
31 * cs_maxtrans instead.
32 *
33 * (3) cs_maxtrans can operate as a randomized algorithm, to help avoid
34 * rare cases of excessive run-time.
35 *
36 * (4) this maxtrans function includes an option that limits the total work
37 * performed. If this limit is reached, the maximum transveral might not
38 * be found.
39 *
40 * Thus, for general usage, cs_maxtrans is preferred. For square matrices that
41 * are typically structurally non-singular, maxtrans is preferred. A partial
42 * maxtrans can still be very useful when solving a sparse linear system.
43 *
44 * By Tim Davis. Copyright (c) 2004-2007, University of Florida.
45 * with support from Sandia National Laboratories. All Rights Reserved.
46 */
47
48 #include "btf.h"
49 #include "btf_internal.h"
50
51
52 /* ========================================================================== */
53 /* === augment ============================================================== */
54 /* ========================================================================== */
55
56 /* Perform a depth-first-search starting at column k, to find an augmenting
57 * path. An augmenting path is a sequence of row/column pairs (i1,k), (i2,j1),
58 * (i3,j2), ..., (i(s+1), js), such that all of the following properties hold:
59 *
60 * * column k is not matched to any row
61 * * entries in the path are nonzero
62 * * the pairs (i1,j1), (i2,j2), (i3,j3) ..., (is,js) have been
63 * previously matched to each other
64 * * (i(s+1), js) is nonzero, and row i(s+1) is not matched to any column
65 *
66 * Once this path is found, the matching can be changed to the set of pairs
67 * path. An augmenting path is a sequence of row/column pairs
68 *
69 * (i1,k), (i2,j1), (i3,j2), ..., (i(s+1), js)
70 *
71 * Once a row is matched with a column it remains matched with some column, but
72 * not necessarily the column it was first matched with.
73 *
74 * In the worst case, this function can examine every nonzero in A. Since it
75 * is called n times by maxtrans, the total time of maxtrans can be as high as
76 * O(n*nnz(A)). To limit this work, pass a value of maxwork > 0. Then at
77 * most O((maxwork+1)*nnz(A)) work will be performed; the maximum matching might
78 * not be found, however.
79 *
80 * This routine is very similar to the dfs routine in klu_kernel.c, in the
81 * KLU sparse LU factorization package. It is essentially identical to the
82 * cs_augment routine in CSparse, and its recursive version (augment function
83 * in cs_maxtransr_mex.c), except that this routine allows for the search to be
84 * terminated early if too much work is being performed.
85 *
86 * The algorithm is based on the paper "On Algorithms for obtaining a maximum
87 * transversal" by Iain Duff, ACM Trans. Mathematical Software, vol 7, no. 1,
88 * pp. 315-330, and "Algorithm 575: Permutations for a zero-free diagonal",
89 * same issue, pp. 387-390. The code here is a new implementation of that
90 * algorithm, with different data structures and control flow. After writing
91 * this code, I carefully compared my algorithm with MC21A/B (ACM Algorithm 575)
92 * Some of the comparisons are partial because I didn't dig deeply into all of
93 * the details of MC21A/B, such as how the stack is maintained. The following
94 * arguments are essentially identical between this code and MC21A:
95 *
96 * maxtrans MC21A,B
97 * -------- -------
98 * n N identical
99 * k JORD identical
100 * Ap IP column / row pointers
101 * Ai ICN row / column indices
102 * Ap[n] LICN length of index array (# of nonzeros in A)
103 * Match IPERM output column / row permutation
104 * nmatch NUMNZ # of nonzeros on diagonal of permuted matrix
105 * Flag CV mark a node as visited by the depth-first-search
106 *
107 * The following are different, but analogous:
108 *
109 * Cheap ARP indicates what part of the a column / row has
110 * already been matched.
111 *
112 * The following arguments are very different:
113 *
114 * - LENR # of entries in each row/column (unused in maxtrans)
115 * Pstack OUT Pstack keeps track of where we are in the depth-
116 * first-search scan of column j. I think that OUT
117 * plays a similar role in MC21B, but I'm unsure.
118 * Istack PR keeps track of the rows in the path. PR is a link
119 * list, though, whereas Istack is a stack. Maxtrans
120 * does not use any link lists.
121 * Jstack OUT? PR? the stack for nodes in the path (unsure)
122 *
123 * The following control structures are roughly comparable:
124 *
125 * maxtrans MC21B
126 * -------- -----
127 * for (k = 0 ; k < n ; k++) DO 100 JORD=1,N
128 * while (head >= 0) DO 70 K=1,JORD
129 * for (p = Cheap [j] ; ...) DO 20 II=IN1,IN2
130 * for (p = head ; ...) DO 90 K=1,JORD
131 */
132
augment(Int k,Int Ap[],Int Ai[],Int Match[],Int Cheap[],Int Flag[],Int Istack[],Int Jstack[],Int Pstack[],double * work,double maxwork)133 static Int augment
134 (
135 Int k, /* which stage of the main loop we're in */
136 Int Ap [ ], /* column pointers, size n+1 */
137 Int Ai [ ], /* row indices, size nz = Ap [n] */
138 Int Match [ ], /* size n, Match [i] = j if col j matched to i */
139 Int Cheap [ ], /* rows Ai [Ap [j] .. Cheap [j]-1] alread matched */
140 Int Flag [ ], /* Flag [j] = k if j already visited this stage */
141 Int Istack [ ], /* size n. Row index stack. */
142 Int Jstack [ ], /* size n. Column index stack. */
143 Int Pstack [ ], /* size n. Keeps track of position in adjacency list */
144 double *work, /* work performed by the depth-first-search */
145 double maxwork /* maximum work allowed */
146 )
147 {
148 /* local variables, but "global" to all DFS levels: */
149 Int found ; /* true if match found. */
150 Int head ; /* top of stack */
151
152 /* variables that are purely local to any one DFS level: */
153 Int j2 ; /* the next DFS goes to node j2 */
154 Int pend ; /* one past the end of the adjacency list for node j */
155 Int pstart ;
156 Int quick ;
157
158 /* variables that need to be pushed then popped from the stack: */
159 Int i ; /* the row tentatively matched to i if DFS successful */
160 Int j ; /* the DFS is at the current node j */
161 Int p ; /* current index into the adj. list for node j */
162 /* the variables i, j, and p are stacked in Istack, Jstack, and Pstack */
163
164 quick = (maxwork > 0) ;
165
166 /* start a DFS to find a match for column k */
167 found = FALSE ;
168 i = EMPTY ;
169 head = 0 ;
170 Jstack [0] = k ;
171 ASSERT (Flag [k] != k) ;
172
173 while (head >= 0)
174 {
175 j = Jstack [head] ;
176 pend = Ap [j+1] ;
177
178 if (Flag [j] != k) /* a node is not yet visited */
179 {
180
181 /* -------------------------------------------------------------- */
182 /* prework for node j */
183 /* -------------------------------------------------------------- */
184
185 /* first time that j has been visited */
186 Flag [j] = k ;
187 /* cheap assignment: find the next unmatched row in col j. This
188 * loop takes at most O(nnz(A)) time for the sum total of all
189 * calls to augment. */
190 for (p = Cheap [j] ; p < pend && !found ; p++)
191 {
192 i = Ai [p] ;
193 found = (Match [i] == EMPTY) ;
194 }
195 Cheap [j] = p ;
196
197 /* -------------------------------------------------------------- */
198
199 /* prepare for DFS */
200 if (found)
201 {
202 /* end of augmenting path, column j matched with row i */
203 Istack [head] = i ;
204 break ;
205 }
206 /* set Pstack [head] to the first entry in column j to scan */
207 Pstack [head] = Ap [j] ;
208 }
209
210 /* ------------------------------------------------------------------ */
211 /* quick return if too much work done */
212 /* ------------------------------------------------------------------ */
213
214 if (quick && *work > maxwork)
215 {
216 /* too much work has been performed; abort the search */
217 return (EMPTY) ;
218 }
219
220 /* ------------------------------------------------------------------ */
221 /* DFS for nodes adjacent to j */
222 /* ------------------------------------------------------------------ */
223
224 /* If cheap assignment not made, continue the depth-first search. All
225 * rows in column j are already matched. Add the adjacent nodes to the
226 * stack by iterating through until finding another non-visited node.
227 *
228 * It is the following loop that can force maxtrans to take
229 * O(n*nnz(A)) time. */
230
231 pstart = Pstack [head] ;
232 for (p = pstart ; p < pend ; p++)
233 {
234 i = Ai [p] ;
235 j2 = Match [i] ;
236 ASSERT (j2 != EMPTY) ;
237 if (Flag [j2] != k)
238 {
239 /* Node j2 is not yet visited, start a depth-first search on
240 * node j2. Keep track of where we left off in the scan of adj
241 * list of node j so we can restart j where we left off. */
242 Pstack [head] = p + 1 ;
243 /* Push j2 onto the stack and immediately break so we can
244 * recurse on node j2. Also keep track of row i which (if this
245 * search for an augmenting path works) will be matched with the
246 * current node j. */
247 Istack [head] = i ;
248 Jstack [++head] = j2 ;
249 break ;
250 }
251 }
252
253 /* ------------------------------------------------------------------ */
254 /* determine how much work was just performed */
255 /* ------------------------------------------------------------------ */
256
257 *work += (p - pstart + 1) ;
258
259 /* ------------------------------------------------------------------ */
260 /* node j is done, but the postwork is postponed - see below */
261 /* ------------------------------------------------------------------ */
262
263 if (p == pend)
264 {
265 /* If all adjacent nodes of j are already visited, pop j from
266 * stack and continue. We failed to find a match. */
267 head-- ;
268 }
269 }
270
271 /* postwork for all nodes j in the stack */
272 /* unwind the path and make the corresponding matches */
273 if (found)
274 {
275 for (p = head ; p >= 0 ; p--)
276 {
277 j = Jstack [p] ;
278 i = Istack [p] ;
279
280 /* -------------------------------------------------------------- */
281 /* postwork for node j */
282 /* -------------------------------------------------------------- */
283 /* if found, match row i with column j */
284 Match [i] = j ;
285 }
286 }
287 return (found) ;
288 }
289
290
291 /* ========================================================================== */
292 /* === maxtrans ============================================================= */
293 /* ========================================================================== */
294
BTF(maxtrans)295 Int BTF(maxtrans) /* returns # of columns in the matching */
296 (
297 /* --- input --- */
298 Int nrow, /* A is nrow-by-ncol in compressed column form */
299 Int ncol,
300 Int Ap [ ], /* size ncol+1 */
301 Int Ai [ ], /* size nz = Ap [ncol] */
302 double maxwork, /* do at most maxwork*nnz(A) work; no limit if <= 0. This
303 * work limit excludes the O(nnz(A)) cheap-match phase. */
304
305 /* --- output --- */
306 double *work, /* work = -1 if maxwork > 0 and the total work performed
307 * reached the maximum of maxwork*nnz(A)).
308 * Otherwise, work = the total work performed. */
309
310 Int Match [ ], /* size nrow. Match [i] = j if column j matched to row i */
311
312 /* --- workspace --- */
313 Int Work [ ] /* size 5*ncol */
314 )
315 {
316 Int *Cheap, *Flag, *Istack, *Jstack, *Pstack ;
317 Int i, j, k, nmatch, work_limit_reached, result ;
318
319 /* ---------------------------------------------------------------------- */
320 /* get workspace and initialize */
321 /* ---------------------------------------------------------------------- */
322
323 Cheap = Work ; Work += ncol ;
324 Flag = Work ; Work += ncol ;
325
326 /* stack for non-recursive depth-first search in augment function */
327 Istack = Work ; Work += ncol ;
328 Jstack = Work ; Work += ncol ;
329 Pstack = Work ;
330
331 /* in column j, rows Ai [Ap [j] .. Cheap [j]-1] are known to be matched */
332 for (j = 0 ; j < ncol ; j++)
333 {
334 Cheap [j] = Ap [j] ;
335 Flag [j] = EMPTY ;
336 }
337
338 /* all rows and columns are currently unmatched */
339 for (i = 0 ; i < nrow ; i++)
340 {
341 Match [i] = EMPTY ;
342 }
343
344 if (maxwork > 0)
345 {
346 maxwork *= Ap [ncol] ;
347 }
348 *work = 0 ;
349
350 /* ---------------------------------------------------------------------- */
351 /* find a matching row for each column k */
352 /* ---------------------------------------------------------------------- */
353
354 nmatch = 0 ;
355 work_limit_reached = FALSE ;
356 for (k = 0 ; k < ncol ; k++)
357 {
358 /* find an augmenting path to match some row i to column k */
359 result = augment (k, Ap, Ai, Match, Cheap, Flag, Istack, Jstack, Pstack,
360 work, maxwork) ;
361 if (result == TRUE)
362 {
363 /* we found it. Match [i] = k for some row i has been done. */
364 nmatch++ ;
365 }
366 else if (result == EMPTY)
367 {
368 /* augment gave up because of too much work, and no match found */
369 work_limit_reached = TRUE ;
370 }
371 }
372
373 /* ---------------------------------------------------------------------- */
374 /* return the Match, and the # of matches made */
375 /* ---------------------------------------------------------------------- */
376
377 /* At this point, row i is matched to j = Match [i] if j >= 0. i is an
378 * unmatched row if Match [i] == EMPTY. */
379
380 if (work_limit_reached)
381 {
382 /* return -1 if the work limit of maxwork*nnz(A) was reached */
383 *work = EMPTY ;
384 }
385
386 return (nmatch) ;
387 }
388