1 /* ========================================================================== */
2 /* === BTF_MAXTRANS ========================================================= */
3 /* ========================================================================== */
4 
5 /* Finds a column permutation that maximizes the number of entries on the
6  * diagonal of a sparse matrix.  See btf.h for more information.
7  *
8  * This function is identical to cs_maxtrans in CSparse, with the following
9  * exceptions:
10  *
11  *  (1) cs_maxtrans finds both jmatch and imatch, where jmatch [i] = j and
12  *      imatch [j] = i if row i is matched to column j.  This function returns
13  *      just jmatch (the Match array).  The MATLAB interface to cs_maxtrans
14  *      (the single-output cs_dmperm) returns imatch, not jmatch to the MATLAB
15  *      caller.
16  *
17  *  (2) cs_maxtrans includes a pre-pass that counts the number of non-empty
18  *      rows and columns (m2 and n2, respectively), and computes the matching
19  *      using the transpose of A if m2 < n2.  cs_maxtrans also returns quickly
20  *      if the diagonal of the matrix is already zero-free.  This pre-pass
21  *      allows cs_maxtrans to be much faster than maxtrans, if the use of the
22  *      transpose is warranted.
23  *
24  *      However, for square structurally non-singular matrices with one or more
25  *      zeros on the diagonal, the pre-pass is a waste of time, and for these
26  *      matrices, maxtrans can be twice as fast as cs_maxtrans.  Since the
27  *      maxtrans function is intended primarily for square matrices that are
28  *      typically structurally nonsingular, the pre-pass is not included here.
29  *      If this maxtrans function is used on a matrix with many more columns
30  *      than rows, consider passing the transpose to this function, or use
31  *      cs_maxtrans instead.
32  *
33  *  (3) cs_maxtrans can operate as a randomized algorithm, to help avoid
34  *      rare cases of excessive run-time.
35  *
36  *  (4) this maxtrans function includes an option that limits the total work
37  *      performed.  If this limit is reached, the maximum transveral might not
38  *      be found.
39  *
40  * Thus, for general usage, cs_maxtrans is preferred.  For square matrices that
41  * are typically structurally non-singular, maxtrans is preferred.  A partial
42  * maxtrans can still be very useful when solving a sparse linear system.
43  *
44  * By Tim Davis.  Copyright (c) 2004-2007, University of Florida.
45  * with support from Sandia National Laboratories.  All Rights Reserved.
46  */
47 
48 #include "btf.h"
49 #include "btf_internal.h"
50 
51 
52 /* ========================================================================== */
53 /* === augment ============================================================== */
54 /* ========================================================================== */
55 
56 /* Perform a depth-first-search starting at column k, to find an augmenting
57  * path.  An augmenting path is a sequence of row/column pairs (i1,k), (i2,j1),
58  * (i3,j2), ..., (i(s+1), js), such that all of the following properties hold:
59  *
60  *      * column k is not matched to any row
61  *      * entries in the path are nonzero
62  *      * the pairs (i1,j1), (i2,j2), (i3,j3) ..., (is,js) have been
63  *          previously matched to each other
64  *      * (i(s+1), js) is nonzero, and row i(s+1) is not matched to any column
65  *
66  * Once this path is found, the matching can be changed to the set of pairs
67  * path.  An augmenting path is a sequence of row/column pairs
68  *
69  *      (i1,k), (i2,j1), (i3,j2), ..., (i(s+1), js)
70  *
71  * Once a row is matched with a column it remains matched with some column, but
72  * not necessarily the column it was first matched with.
73  *
74  * In the worst case, this function can examine every nonzero in A.  Since it
75  * is called n times by maxtrans, the total time of maxtrans can be as high as
76  * O(n*nnz(A)).  To limit this work, pass a value of maxwork > 0.  Then at
77  * most O((maxwork+1)*nnz(A)) work will be performed; the maximum matching might
78  * not be found, however.
79  *
80  * This routine is very similar to the dfs routine in klu_kernel.c, in the
81  * KLU sparse LU factorization package.  It is essentially identical to the
82  * cs_augment routine in CSparse, and its recursive version (augment function
83  * in cs_maxtransr_mex.c), except that this routine allows for the search to be
84  * terminated early if too much work is being performed.
85  *
86  * The algorithm is based on the paper "On Algorithms for obtaining a maximum
87  * transversal" by Iain Duff, ACM Trans. Mathematical Software, vol 7, no. 1,
88  * pp. 315-330, and "Algorithm 575: Permutations for a zero-free diagonal",
89  * same issue, pp. 387-390.  The code here is a new implementation of that
90  * algorithm, with different data structures and control flow.  After writing
91  * this code, I carefully compared my algorithm with MC21A/B (ACM Algorithm 575)
92  * Some of the comparisons are partial because I didn't dig deeply into all of
93  * the details of MC21A/B, such as how the stack is maintained.  The following
94  * arguments are essentially identical between this code and MC21A:
95  *
96  * maxtrans     MC21A,B
97  * --------     -------
98  * n            N           identical
99  * k            JORD        identical
100  * Ap           IP          column / row pointers
101  * Ai           ICN         row / column indices
102  * Ap[n]        LICN        length of index array (# of nonzeros in A)
103  * Match        IPERM       output column / row permutation
104  * nmatch       NUMNZ       # of nonzeros on diagonal of permuted matrix
105  * Flag         CV          mark a node as visited by the depth-first-search
106  *
107  * The following are different, but analogous:
108  *
109  * Cheap        ARP         indicates what part of the a column / row has
110  *                          already been matched.
111  *
112  * The following arguments are very different:
113  *
114  * -            LENR        # of entries in each row/column (unused in maxtrans)
115  * Pstack       OUT         Pstack keeps track of where we are in the depth-
116  *                          first-search scan of column j.  I think that OUT
117  *                          plays a similar role in MC21B, but I'm unsure.
118  * Istack       PR          keeps track of the rows in the path.  PR is a link
119  *                          list, though, whereas Istack is a stack.  Maxtrans
120  *                          does not use any link lists.
121  * Jstack       OUT? PR?    the stack for nodes in the path (unsure)
122  *
123  * The following control structures are roughly comparable:
124  *
125  * maxtrans                     MC21B
126  * --------                     -----
127  * for (k = 0 ; k < n ; k++)    DO 100 JORD=1,N
128  * while (head >= 0)            DO 70 K=1,JORD
129  * for (p = Cheap [j] ; ...)    DO 20 II=IN1,IN2
130  * for (p = head ; ...)         DO 90 K=1,JORD
131  */
132 
augment(Int k,Int Ap[],Int Ai[],Int Match[],Int Cheap[],Int Flag[],Int Istack[],Int Jstack[],Int Pstack[],double * work,double maxwork)133 static Int augment
134 (
135     Int k,              /* which stage of the main loop we're in */
136     Int Ap [ ],         /* column pointers, size n+1 */
137     Int Ai [ ],         /* row indices, size nz = Ap [n] */
138     Int Match [ ],      /* size n,  Match [i] = j if col j matched to i */
139     Int Cheap [ ],      /* rows Ai [Ap [j] .. Cheap [j]-1] alread matched */
140     Int Flag [ ],       /* Flag [j] = k if j already visited this stage */
141     Int Istack [ ],     /* size n.  Row index stack. */
142     Int Jstack [ ],     /* size n.  Column index stack. */
143     Int Pstack [ ],     /* size n.  Keeps track of position in adjacency list */
144     double *work,       /* work performed by the depth-first-search */
145     double maxwork      /* maximum work allowed */
146 )
147 {
148     /* local variables, but "global" to all DFS levels: */
149     Int found ; /* true if match found.  */
150     Int head ;  /* top of stack */
151 
152     /* variables that are purely local to any one DFS level: */
153     Int j2 ;    /* the next DFS goes to node j2 */
154     Int pend ;  /* one past the end of the adjacency list for node j */
155     Int pstart ;
156     Int quick ;
157 
158     /* variables that need to be pushed then popped from the stack: */
159     Int i ;     /* the row tentatively matched to i if DFS successful */
160     Int j ;     /* the DFS is at the current node j */
161     Int p ;     /* current index into the adj. list for node j */
162     /* the variables i, j, and p are stacked in Istack, Jstack, and Pstack */
163 
164     quick = (maxwork > 0) ;
165 
166     /* start a DFS to find a match for column k */
167     found = FALSE ;
168     i = EMPTY ;
169     head = 0 ;
170     Jstack [0] = k ;
171     ASSERT (Flag [k] != k) ;
172 
173     while (head >= 0)
174     {
175         j = Jstack [head] ;
176         pend = Ap [j+1] ;
177 
178         if (Flag [j] != k)          /* a node is not yet visited */
179         {
180 
181             /* -------------------------------------------------------------- */
182             /* prework for node j */
183             /* -------------------------------------------------------------- */
184 
185             /* first time that j has been visited */
186             Flag [j] = k ;
187             /* cheap assignment: find the next unmatched row in col j.  This
188              * loop takes at most O(nnz(A)) time for the sum total of all
189              * calls to augment. */
190             for (p = Cheap [j] ; p < pend && !found ; p++)
191             {
192                 i = Ai [p] ;
193                 found = (Match [i] == EMPTY) ;
194             }
195             Cheap [j] = p ;
196 
197             /* -------------------------------------------------------------- */
198 
199             /* prepare for DFS */
200             if (found)
201             {
202                 /* end of augmenting path, column j matched with row i */
203                 Istack [head] = i ;
204                 break ;
205             }
206             /* set Pstack [head] to the first entry in column j to scan */
207             Pstack [head] = Ap [j] ;
208         }
209 
210         /* ------------------------------------------------------------------ */
211         /* quick return if too much work done */
212         /* ------------------------------------------------------------------ */
213 
214         if (quick && *work > maxwork)
215         {
216             /* too much work has been performed; abort the search */
217             return (EMPTY) ;
218         }
219 
220         /* ------------------------------------------------------------------ */
221         /* DFS for nodes adjacent to j */
222         /* ------------------------------------------------------------------ */
223 
224         /* If cheap assignment not made, continue the depth-first search.  All
225          * rows in column j are already matched.  Add the adjacent nodes to the
226          * stack by iterating through until finding another non-visited node.
227          *
228          * It is the following loop that can force maxtrans to take
229          * O(n*nnz(A)) time. */
230 
231         pstart = Pstack [head] ;
232         for (p = pstart ; p < pend ; p++)
233         {
234             i = Ai [p] ;
235             j2 = Match [i] ;
236             ASSERT (j2 != EMPTY) ;
237             if (Flag [j2] != k)
238             {
239                 /* Node j2 is not yet visited, start a depth-first search on
240                  * node j2.  Keep track of where we left off in the scan of adj
241                  * list of node j so we can restart j where we left off. */
242                 Pstack [head] = p + 1 ;
243                 /* Push j2 onto the stack and immediately break so we can
244                  * recurse on node j2.  Also keep track of row i which (if this
245                  * search for an augmenting path works) will be matched with the
246                  * current node j. */
247                 Istack [head] = i ;
248                 Jstack [++head] = j2 ;
249                 break ;
250             }
251         }
252 
253         /* ------------------------------------------------------------------ */
254         /* determine how much work was just performed */
255         /* ------------------------------------------------------------------ */
256 
257         *work += (p - pstart + 1) ;
258 
259         /* ------------------------------------------------------------------ */
260         /* node j is done, but the postwork is postponed - see below */
261         /* ------------------------------------------------------------------ */
262 
263         if (p == pend)
264         {
265             /* If all adjacent nodes of j are already visited, pop j from
266              * stack and continue.  We failed to find a match. */
267             head-- ;
268         }
269     }
270 
271     /* postwork for all nodes j in the stack */
272     /* unwind the path and make the corresponding matches */
273     if (found)
274     {
275         for (p = head ; p >= 0 ; p--)
276         {
277             j = Jstack [p] ;
278             i = Istack [p] ;
279 
280             /* -------------------------------------------------------------- */
281             /* postwork for node j */
282             /* -------------------------------------------------------------- */
283             /* if found, match row i with column j */
284             Match [i] = j ;
285         }
286     }
287     return (found) ;
288 }
289 
290 
291 /* ========================================================================== */
292 /* === maxtrans ============================================================= */
293 /* ========================================================================== */
294 
BTF(maxtrans)295 Int BTF(maxtrans)   /* returns # of columns in the matching */
296 (
297     /* --- input --- */
298     Int nrow,       /* A is nrow-by-ncol in compressed column form */
299     Int ncol,
300     Int Ap [ ],     /* size ncol+1 */
301     Int Ai [ ],     /* size nz = Ap [ncol] */
302     double maxwork, /* do at most maxwork*nnz(A) work; no limit if <= 0.  This
303                      * work limit excludes the O(nnz(A)) cheap-match phase. */
304 
305     /* --- output --- */
306     double *work,   /* work = -1 if maxwork > 0 and the total work performed
307                      * reached the maximum of maxwork*nnz(A)).
308                      * Otherwise, work = the total work performed. */
309 
310     Int Match [ ],  /* size nrow.  Match [i] = j if column j matched to row i */
311 
312     /* --- workspace --- */
313     Int Work [ ]    /* size 5*ncol */
314 )
315 {
316     Int *Cheap, *Flag, *Istack, *Jstack, *Pstack ;
317     Int i, j, k, nmatch, work_limit_reached, result ;
318 
319     /* ---------------------------------------------------------------------- */
320     /* get workspace and initialize */
321     /* ---------------------------------------------------------------------- */
322 
323     Cheap  = Work ; Work += ncol ;
324     Flag   = Work ; Work += ncol ;
325 
326     /* stack for non-recursive depth-first search in augment function */
327     Istack = Work ; Work += ncol ;
328     Jstack = Work ; Work += ncol ;
329     Pstack = Work ;
330 
331     /* in column j, rows Ai [Ap [j] .. Cheap [j]-1] are known to be matched */
332     for (j = 0 ; j < ncol ; j++)
333     {
334         Cheap [j] = Ap [j] ;
335         Flag [j] = EMPTY ;
336     }
337 
338     /* all rows and columns are currently unmatched */
339     for (i = 0 ; i < nrow ; i++)
340     {
341         Match [i] = EMPTY ;
342     }
343 
344     if (maxwork > 0)
345     {
346         maxwork *= Ap [ncol] ;
347     }
348     *work = 0 ;
349 
350     /* ---------------------------------------------------------------------- */
351     /* find a matching row for each column k */
352     /* ---------------------------------------------------------------------- */
353 
354     nmatch = 0 ;
355     work_limit_reached = FALSE ;
356     for (k = 0 ; k < ncol ; k++)
357     {
358         /* find an augmenting path to match some row i to column k */
359         result = augment (k, Ap, Ai, Match, Cheap, Flag, Istack, Jstack, Pstack,
360             work, maxwork) ;
361         if (result == TRUE)
362         {
363             /* we found it.  Match [i] = k for some row i has been done. */
364             nmatch++ ;
365         }
366         else if (result == EMPTY)
367         {
368             /* augment gave up because of too much work, and no match found */
369             work_limit_reached = TRUE ;
370         }
371     }
372 
373     /* ---------------------------------------------------------------------- */
374     /* return the Match, and the # of matches made */
375     /* ---------------------------------------------------------------------- */
376 
377     /* At this point, row i is matched to j = Match [i] if j >= 0.  i is an
378      * unmatched row if Match [i] == EMPTY. */
379 
380     if (work_limit_reached)
381     {
382         /* return -1 if the work limit of maxwork*nnz(A) was reached */
383         *work = EMPTY ;
384     }
385 
386     return (nmatch) ;
387 }
388