1 /*! \file
2 Copyright (c) 2003, The Regents of the University of California, through
3 Lawrence Berkeley National Laboratory (subject to receipt of any required
4 approvals from U.S. Dept. of Energy)
5 
6 All rights reserved.
7 
8 The source code is distributed under BSD license, see the file License.txt
9 at the top-level directory.
10 */
11 
12 /*! @file dgstrf.c
13  * \brief Computes an LU factorization of a general sparse matrix
14  *
15  * <pre>
16  * -- SuperLU routine (version 3.0) --
17  * Univ. of California Berkeley, Xerox Palo Alto Research Center,
18  * and Lawrence Berkeley National Lab.
19  * October 15, 2003
20  *
21  * Copyright (c) 1994 by Xerox Corporation.  All rights reserved.
22  *
23  * THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY
24  * EXPRESSED OR IMPLIED.  ANY USE IS AT YOUR OWN RISK.
25  *
26  * Permission is hereby granted to use or copy this program for any
27  * purpose, provided the above notices are retained on all copies.
28  * Permission to modify the code and to distribute modified code is
29  * granted, provided the above notices are retained, and a notice that
30  * the code was modified is included with the above copyright notice.
31  * </pre>
32  */
33 
34 
35 #include "slu_ddefs.h"
36 #include "supermatrix.h"
37 #include "slu_util.h"
38 
39 /*! \brief
40  *
41  * <pre>
42  * Purpose
43  * =======
44  *
45  * DGSTRF computes an LU factorization of a general sparse m-by-n
46  * matrix A using partial pivoting with row interchanges.
47  * The factorization has the form
48  *     Pr * A = L * U
49  * where Pr is a row permutation matrix, L is lower triangular with unit
50  * diagonal elements (lower trapezoidal if A->nrow > A->ncol), and U is upper
51  * triangular (upper trapezoidal if A->nrow < A->ncol).
52  *
53  * See supermatrix.h for the definition of 'SuperMatrix' structure.
54  *
55  * Arguments
56  * =========
57  *
58  * options (input) superlu_options_t*
59  *         The structure defines the input parameters to control
60  *         how the LU decomposition will be performed.
61  *
62  * A        (input) SuperMatrix*
63  *	    Original matrix A, permuted by columns, of dimension
64  *          (A->nrow, A->ncol). The type of A can be:
65  *          Stype = SLU_NCP; Dtype = SLU_D; Mtype = SLU_GE.
66  *
67  * relax    (input) int
68  *          To control degree of relaxing supernodes. If the number
69  *          of nodes (columns) in a subtree of the elimination tree is less
70  *          than relax, this subtree is considered as one supernode,
71  *          regardless of the row structures of those columns.
72  *
73  * panel_size (input) int
74  *          A panel consists of at most panel_size consecutive columns.
75  *
76  * etree    (input) int*, dimension (A->ncol)
77  *          Elimination tree of A'*A.
78  *          Note: etree is a vector of parent pointers for a forest whose
79  *          vertices are the integers 0 to A->ncol-1; etree[root]==A->ncol.
80  *          On input, the columns of A should be permuted so that the
81  *          etree is in a certain postorder.
82  *
83  * work     (input/output) void*, size (lwork) (in bytes)
84  *          User-supplied work space and space for the output data structures.
85  *          Not referenced if lwork = 0;
86  *
87  * lwork   (input) int
88  *         Specifies the size of work array in bytes.
89  *         = 0:  allocate space internally by system malloc;
90  *         > 0:  use user-supplied work array of length lwork in bytes,
91  *               returns error if space runs out.
92  *         = -1: the routine guesses the amount of space needed without
93  *               performing the factorization, and returns it in
94  *               *info; no other side effects.
95  *
96  * perm_c   (input) int*, dimension (A->ncol)
97  *	    Column permutation vector, which defines the
98  *          permutation matrix Pc; perm_c[i] = j means column i of A is
99  *          in position j in A*Pc.
100  *          When searching for diagonal, perm_c[*] is applied to the
101  *          row subscripts of A, so that diagonal threshold pivoting
102  *          can find the diagonal of A, rather than that of A*Pc.
103  *
104  * perm_r   (input/output) int*, dimension (A->nrow)
105  *          Row permutation vector which defines the permutation matrix Pr,
106  *          perm_r[i] = j means row i of A is in position j in Pr*A.
107  *          If options->Fact == SamePattern_SameRowPerm, the pivoting routine
108  *             will try to use the input perm_r, unless a certain threshold
109  *             criterion is violated. In that case, perm_r is overwritten by
110  *             a new permutation determined by partial pivoting or diagonal
111  *             threshold pivoting.
112  *          Otherwise, perm_r is output argument;
113  *
114  * L        (output) SuperMatrix*
115  *          The factor L from the factorization Pr*A=L*U; use compressed row
116  *          subscripts storage for supernodes, i.e., L has type:
117  *          Stype = SLU_SC, Dtype = SLU_D, Mtype = SLU_TRLU.
118  *
119  * U        (output) SuperMatrix*
120  *	    The factor U from the factorization Pr*A*Pc=L*U. Use column-wise
121  *          storage scheme, i.e., U has types: Stype = SLU_NC,
122  *          Dtype = SLU_D, Mtype = SLU_TRU.
123  *
124  * Glu      (input/output) GlobalLU_t *
125  *          If options->Fact == SamePattern_SameRowPerm, it is an input;
126  *              The matrix A will be factorized assuming that a
127  *              factorization of a matrix with the same sparsity pattern
128  *              and similar numerical values was performed prior to this one.
129  *              Therefore, this factorization will reuse both row and column
130  *		scaling factors R and C, both row and column permutation
131  *		vectors perm_r and perm_c, and the L & U data structures
132  *		set up from the previous factorization.
133  *          Otherwise, it is an output.
134  *
135  * stat     (output) SuperLUStat_t*
136  *          Record the statistics on runtime and floating-point operation count.
137  *          See slu_util.h for the definition of 'SuperLUStat_t'.
138  *
139  * info     (output) int*
140  *          = 0: successful exit
141  *          < 0: if info = -i, the i-th argument had an illegal value
142  *          > 0: if info = i, and i is
143  *             <= A->ncol: U(i,i) is exactly zero. The factorization has
144  *                been completed, but the factor U is exactly singular,
145  *                and division by zero will occur if it is used to solve a
146  *                system of equations.
147  *             > A->ncol: number of bytes allocated when memory allocation
148  *                failure occurred, plus A->ncol. If lwork = -1, it is
149  *                the estimated amount of space needed, plus A->ncol.
150  *
151  * ======================================================================
152  *
153  * Local Working Arrays:
154  * ======================
155  *   m = number of rows in the matrix
156  *   n = number of columns in the matrix
157  *
158  *   xprune[0:n-1]: xprune[*] points to locations in subscript
159  *	vector lsub[*]. For column i, xprune[i] denotes the point where
160  *	structural pruning begins. I.e. only xlsub[i],..,xprune[i]-1 need
161  *	to be traversed for symbolic factorization.
162  *
163  *   marker[0:3*m-1]: marker[i] = j means that node i has been
164  *	reached when working on column j.
165  *	Storage: relative to original row subscripts
166  *	NOTE: There are 3 of them: marker/marker1 are used for panel dfs,
167  *	      see dpanel_dfs.c; marker2 is used for inner-factorization,
168  *            see dcolumn_dfs.c.
169  *
170  *   parent[0:m-1]: parent vector used during dfs
171  *      Storage: relative to new row subscripts
172  *
173  *   xplore[0:m-1]: xplore[i] gives the location of the next (dfs)
174  *	unexplored neighbor of i in lsub[*]
175  *
176  *   segrep[0:nseg-1]: contains the list of supernodal representatives
177  *	in topological order of the dfs. A supernode representative is the
178  *	last column of a supernode.
179  *      The maximum size of segrep[] is n.
180  *
181  *   repfnz[0:W*m-1]: for a nonzero segment U[*,j] that ends at a
182  *	supernodal representative r, repfnz[r] is the location of the first
183  *	nonzero in this segment.  It is also used during the dfs: repfnz[r]>0
184  *	indicates the supernode r has been explored.
185  *	NOTE: There are W of them, each used for one column of a panel.
186  *
187  *   panel_lsub[0:W*m-1]: temporary for the nonzeros row indices below
188  *      the panel diagonal. These are filled in during dpanel_dfs(), and are
189  *      used later in the inner LU factorization within the panel.
190  *	panel_lsub[]/dense[] pair forms the SPA data structure.
191  *	NOTE: There are W of them.
192  *
193  *   dense[0:W*m-1]: sparse accumulating (SPA) vector for intermediate values;
194  *	    	   NOTE: there are W of them.
195  *
196  *   tempv[0:*]: real temporary used for dense numeric kernels;
197  *	The size of this array is defined by NUM_TEMPV() in slu_ddefs.h.
198  * </pre>
199  */
200 
201 void
dgstrf(superlu_options_t * options,SuperMatrix * A,int relax,int panel_size,int * etree,void * work,int lwork,int * perm_c,int * perm_r,SuperMatrix * L,SuperMatrix * U,GlobalLU_t * Glu,SuperLUStat_t * stat,int * info)202 dgstrf (superlu_options_t *options, SuperMatrix *A,
203         int relax, int panel_size, int *etree, void *work, int lwork,
204         int *perm_c, int *perm_r, SuperMatrix *L, SuperMatrix *U,
205     	GlobalLU_t *Glu, /* persistent to facilitate multiple factorizations */
206         SuperLUStat_t *stat, int *info)
207 {
208     /* Local working arrays */
209     NCPformat *Astore;
210     int       *iperm_r = NULL; /* inverse of perm_r; used when
211                                   options->Fact == SamePattern_SameRowPerm */
212     int       *iperm_c; /* inverse of perm_c */
213     int       *iwork;
214     double    *dwork;
215     int	      *segrep, *repfnz, *parent, *xplore;
216     int	      *panel_lsub; /* dense[]/panel_lsub[] pair forms a w-wide SPA */
217     int	      *xprune;
218     int	      *marker;
219     double    *dense, *tempv;
220     int       *relax_end;
221     double    *a;
222     int       *asub;
223     int       *xa_begin, *xa_end;
224     int       *xsup, *supno;
225     int       *xlsub, *xlusup, *xusub;
226     int       nzlumax;
227     double fill_ratio = sp_ienv(6);  /* estimated fill ratio */
228 
229     /* Local scalars */
230     fact_t    fact = options->Fact;
231     double    diag_pivot_thresh = options->DiagPivotThresh;
232     int       pivrow;   /* pivotal row number in the original matrix A */
233     int       nseg1;	/* no of segments in U-column above panel row jcol */
234     int       nseg;	/* no of segments in each U-column */
235     register int jcol;
236     register int kcol;	/* end column of a relaxed snode */
237     register int icol;
238     register int i, k, jj, new_next, iinfo;
239     int       m, n, min_mn, jsupno, fsupc, nextlu, nextu;
240     int       w_def;	/* upper bound on panel width */
241     int       usepr, iperm_r_allocated = 0;
242     int       nnzL, nnzU;
243     int       *panel_histo = stat->panel_histo;
244     flops_t   *ops = stat->ops;
245 
246     iinfo    = 0;
247     m        = A->nrow;
248     n        = A->ncol;
249     min_mn   = SUPERLU_MIN(m, n);
250     Astore   = A->Store;
251     a        = Astore->nzval;
252     asub     = Astore->rowind;
253     xa_begin = Astore->colbeg;
254     xa_end   = Astore->colend;
255 
256     /* Allocate storage common to the factor routines */
257     *info = dLUMemInit(fact, work, lwork, m, n, Astore->nnz,
258                        panel_size, fill_ratio, L, U, Glu, &iwork, &dwork);
259     if ( *info ) return;
260 
261     xsup    = Glu->xsup;
262     supno   = Glu->supno;
263     xlsub   = Glu->xlsub;
264     xlusup  = Glu->xlusup;
265     xusub   = Glu->xusub;
266 
267     SetIWork(m, n, panel_size, iwork, &segrep, &parent, &xplore,
268 	     &repfnz, &panel_lsub, &xprune, &marker);
269     dSetRWork(m, panel_size, dwork, &dense, &tempv);
270 
271     usepr = (fact == SamePattern_SameRowPerm);
272     if ( usepr ) {
273 	/* Compute the inverse of perm_r */
274 	iperm_r = (int *) intMalloc(m);
275 	for (k = 0; k < m; ++k) iperm_r[perm_r[k]] = k;
276 	iperm_r_allocated = 1;
277     }
278     iperm_c = (int *) intMalloc(n);
279     for (k = 0; k < n; ++k) iperm_c[perm_c[k]] = k;
280 
281     /* Identify relaxed snodes */
282     relax_end = (int *) intMalloc(n);
283     if ( options->SymmetricMode == YES ) {
284         heap_relax_snode(n, etree, relax, marker, relax_end);
285     } else {
286         relax_snode(n, etree, relax, marker, relax_end);
287     }
288 
289     ifill (perm_r, m, EMPTY);
290     ifill (marker, m * NO_MARKER, EMPTY);
291     supno[0] = -1;
292     xsup[0]  = xlsub[0] = xusub[0] = xlusup[0] = 0;
293     w_def    = panel_size;
294 
295     /*
296      * Work on one "panel" at a time. A panel is one of the following:
297      *	   (a) a relaxed supernode at the bottom of the etree, or
298      *	   (b) panel_size contiguous columns, defined by the user
299      */
300     for (jcol = 0; jcol < min_mn; ) {
301 
302 	if ( relax_end[jcol] != EMPTY ) { /* start of a relaxed snode */
303    	    kcol = relax_end[jcol];	  /* end of the relaxed snode */
304 	    panel_histo[kcol-jcol+1]++;
305 
306 	    /* --------------------------------------
307 	     * Factorize the relaxed supernode(jcol:kcol)
308 	     * -------------------------------------- */
309 	    /* Determine the union of the row structure of the snode */
310 	    if ( (*info = dsnode_dfs(jcol, kcol, asub, xa_begin, xa_end,
311 				    xprune, marker, Glu)) != 0 )
312 		return;
313 
314             nextu    = xusub[jcol];
315 	    nextlu   = xlusup[jcol];
316 	    jsupno   = supno[jcol];
317 	    fsupc    = xsup[jsupno];
318 	    new_next = nextlu + (xlsub[fsupc+1]-xlsub[fsupc])*(kcol-jcol+1);
319 	    nzlumax = Glu->nzlumax;
320 	    while ( new_next > nzlumax ) {
321 		if ( (*info = dLUMemXpand(jcol, nextlu, LUSUP, &nzlumax, Glu)) )
322 		    return;
323 	    }
324 
325 	    for (icol = jcol; icol<= kcol; icol++) {
326 		xusub[icol+1] = nextu;
327 
328     		/* Scatter into SPA dense[*] */
329     		for (k = xa_begin[icol]; k < xa_end[icol]; k++)
330         	    dense[asub[k]] = a[k];
331 
332 	       	/* Numeric update within the snode */
333 	        dsnode_bmod(icol, jsupno, fsupc, dense, tempv, Glu, stat);
334 
335 		if ( (*info = dpivotL(icol, diag_pivot_thresh, &usepr, perm_r,
336 				      iperm_r, iperm_c, &pivrow, Glu, stat)) )
337 		    if ( iinfo == 0 ) iinfo = *info;
338 
339 #ifdef DEBUG
340 		dprint_lu_col("[1]: ", icol, pivrow, xprune, Glu);
341 #endif
342 
343 	    }
344 
345 	    jcol = icol;
346 
347 	} else { /* Work on one panel of panel_size columns */
348 
349 	    /* Adjust panel_size so that a panel won't overlap with the next
350 	     * relaxed snode.
351 	     */
352 	    panel_size = w_def;
353 	    for (k = jcol + 1; k < SUPERLU_MIN(jcol+panel_size, min_mn); k++)
354 		if ( relax_end[k] != EMPTY ) {
355 		    panel_size = k - jcol;
356 		    break;
357 		}
358 	    if ( k == min_mn ) panel_size = min_mn - jcol;
359 	    panel_histo[panel_size]++;
360 
361 	    /* symbolic factor on a panel of columns */
362 	    dpanel_dfs(m, panel_size, jcol, A, perm_r, &nseg1,
363 		      dense, panel_lsub, segrep, repfnz, xprune,
364 		      marker, parent, xplore, Glu);
365 
366 	    /* numeric sup-panel updates in topological order */
367 	    dpanel_bmod(m, panel_size, jcol, nseg1, dense,
368 		        tempv, segrep, repfnz, Glu, stat);
369 
370 	    /* Sparse LU within the panel, and below panel diagonal */
371     	    for ( jj = jcol; jj < jcol + panel_size; jj++) {
372  		k = (jj - jcol) * m; /* column index for w-wide arrays */
373 
374 		nseg = nseg1;	/* Begin after all the panel segments */
375 
376 	    	if ((*info = dcolumn_dfs(m, jj, perm_r, &nseg, &panel_lsub[k],
377 					segrep, &repfnz[k], xprune, marker,
378 					parent, xplore, Glu)) != 0) return;
379 
380 	      	/* Numeric updates */
381 	    	if ((*info = dcolumn_bmod(jj, (nseg - nseg1), &dense[k],
382 					 tempv, &segrep[nseg1], &repfnz[k],
383 					 jcol, Glu, stat)) != 0) return;
384 
385 	        /* Copy the U-segments to ucol[*] */
386 		if ((*info = dcopy_to_ucol(jj, nseg, segrep, &repfnz[k],
387 					  perm_r, &dense[k], Glu)) != 0)
388 		    return;
389 
390 	    	if ( (*info = dpivotL(jj, diag_pivot_thresh, &usepr, perm_r,
391 				      iperm_r, iperm_c, &pivrow, Glu, stat)) )
392 		    if ( iinfo == 0 ) iinfo = *info;
393 
394 		/* Prune columns (0:jj-1) using column jj */
395 	    	dpruneL(jj, perm_r, pivrow, nseg, segrep,
396                         &repfnz[k], xprune, Glu);
397 
398 		/* Reset repfnz[] for this column */
399 	    	resetrep_col (nseg, segrep, &repfnz[k]);
400 
401 #ifdef DEBUG
402 		dprint_lu_col("[2]: ", jj, pivrow, xprune, Glu);
403 #endif
404 
405 	    }
406 
407    	    jcol += panel_size;	/* Move to the next panel */
408 
409 	} /* else */
410 
411     } /* for */
412 
413     *info = iinfo;
414 
415     if ( m > n ) {
416 	k = 0;
417         for (i = 0; i < m; ++i)
418             if ( perm_r[i] == EMPTY ) {
419     		perm_r[i] = n + k;
420 		++k;
421 	    }
422     }
423 
424     countnz(min_mn, xprune, &nnzL, &nnzU, Glu);
425     fixupL(min_mn, perm_r, Glu);
426 
427     dLUWorkFree(iwork, dwork, Glu); /* Free work space and compress storage */
428 
429     if ( fact == SamePattern_SameRowPerm ) {
430         /* L and U structures may have changed due to possibly different
431 	   pivoting, even though the storage is available.
432 	   There could also be memory expansions, so the array locations
433            may have changed, */
434         ((SCformat *)L->Store)->nnz = nnzL;
435 	((SCformat *)L->Store)->nsuper = Glu->supno[n];
436 	((SCformat *)L->Store)->nzval = (double *) Glu->lusup;
437 	((SCformat *)L->Store)->nzval_colptr = Glu->xlusup;
438 	((SCformat *)L->Store)->rowind = Glu->lsub;
439 	((SCformat *)L->Store)->rowind_colptr = Glu->xlsub;
440 	((NCformat *)U->Store)->nnz = nnzU;
441 	((NCformat *)U->Store)->nzval = (double *) Glu->ucol;
442 	((NCformat *)U->Store)->rowind = Glu->usub;
443 	((NCformat *)U->Store)->colptr = Glu->xusub;
444     } else {
445         dCreate_SuperNode_Matrix(L, A->nrow, min_mn, nnzL,
446 	      (double *) Glu->lusup, Glu->xlusup,
447               Glu->lsub, Glu->xlsub, Glu->supno, Glu->xsup,
448               SLU_SC, SLU_D, SLU_TRLU);
449     	dCreate_CompCol_Matrix(U, min_mn, min_mn, nnzU,
450 	      (double *) Glu->ucol, Glu->usub, Glu->xusub,
451               SLU_NC, SLU_D, SLU_TRU);
452     }
453 
454     ops[FACT] += ops[TRSV] + ops[GEMV];
455     stat->expansions = --(Glu->num_expansions);
456 
457     if ( iperm_r_allocated ) SUPERLU_FREE (iperm_r);
458     SUPERLU_FREE (iperm_c);
459     SUPERLU_FREE (relax_end);
460 
461 }
462