1 /*! \file
2 Copyright (c) 2003, The Regents of the University of California, through
3 Lawrence Berkeley National Laboratory (subject to receipt of any required
4 approvals from U.S. Dept. of Energy)
5 
6 All rights reserved.
7 
8 The source code is distributed under BSD license, see the file License.txt
9 at the top-level directory.
10 */
11 
12 /*! @file cgstrf.c
13  * \brief Computes an LU factorization of a general sparse matrix
14  *
15  * <pre>
16  * -- SuperLU routine (version 3.0) --
17  * Univ. of California Berkeley, Xerox Palo Alto Research Center,
18  * and Lawrence Berkeley National Lab.
19  * October 15, 2003
20  *
21  * Copyright (c) 1994 by Xerox Corporation.  All rights reserved.
22  *
23  * THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY
24  * EXPRESSED OR IMPLIED.  ANY USE IS AT YOUR OWN RISK.
25  *
26  * Permission is hereby granted to use or copy this program for any
27  * purpose, provided the above notices are retained on all copies.
28  * Permission to modify the code and to distribute modified code is
29  * granted, provided the above notices are retained, and a notice that
30  * the code was modified is included with the above copyright notice.
31  * </pre>
32  */
33 
34 
35 #include "slu_cdefs.h"
36 
37 /*! \brief
38  *
39  * <pre>
40  * Purpose
41  * =======
42  *
43  * CGSTRF computes an LU factorization of a general sparse m-by-n
44  * matrix A using partial pivoting with row interchanges.
45  * The factorization has the form
46  *     Pr * A = L * U
47  * where Pr is a row permutation matrix, L is lower triangular with unit
48  * diagonal elements (lower trapezoidal if A->nrow > A->ncol), and U is upper
49  * triangular (upper trapezoidal if A->nrow < A->ncol).
50  *
51  * See supermatrix.h for the definition of 'SuperMatrix' structure.
52  *
53  * Arguments
54  * =========
55  *
56  * options (input) superlu_options_t*
57  *         The structure defines the input parameters to control
58  *         how the LU decomposition will be performed.
59  *
60  * A        (input) SuperMatrix*
61  *	    Original matrix A, permuted by columns, of dimension
62  *          (A->nrow, A->ncol). The type of A can be:
63  *          Stype = SLU_NCP; Dtype = SLU_C; Mtype = SLU_GE.
64  *
65  * relax    (input) int
66  *          To control degree of relaxing supernodes. If the number
67  *          of nodes (columns) in a subtree of the elimination tree is less
68  *          than relax, this subtree is considered as one supernode,
69  *          regardless of the row structures of those columns.
70  *
71  * panel_size (input) int
72  *          A panel consists of at most panel_size consecutive columns.
73  *
74  * etree    (input) int*, dimension (A->ncol)
75  *          Elimination tree of A'*A.
76  *          Note: etree is a vector of parent pointers for a forest whose
77  *          vertices are the integers 0 to A->ncol-1; etree[root]==A->ncol.
78  *          On input, the columns of A should be permuted so that the
79  *          etree is in a certain postorder.
80  *
81  * work     (input/output) void*, size (lwork) (in bytes)
82  *          User-supplied work space and space for the output data structures.
83  *          Not referenced if lwork = 0;
84  *
85  * lwork   (input) int
86  *         Specifies the size of work array in bytes.
87  *         = 0:  allocate space internally by system malloc;
88  *         > 0:  use user-supplied work array of length lwork in bytes,
89  *               returns error if space runs out.
90  *         = -1: the routine guesses the amount of space needed without
91  *               performing the factorization, and returns it in
92  *               *info; no other side effects.
93  *
94  * perm_c   (input) int*, dimension (A->ncol)
95  *	    Column permutation vector, which defines the
96  *          permutation matrix Pc; perm_c[i] = j means column i of A is
97  *          in position j in A*Pc.
98  *          When searching for diagonal, perm_c[*] is applied to the
99  *          row subscripts of A, so that diagonal threshold pivoting
100  *          can find the diagonal of A, rather than that of A*Pc.
101  *
102  * perm_r   (input/output) int*, dimension (A->nrow)
103  *          Row permutation vector which defines the permutation matrix Pr,
104  *          perm_r[i] = j means row i of A is in position j in Pr*A.
105  *          If options->Fact == SamePattern_SameRowPerm, the pivoting routine
106  *             will try to use the input perm_r, unless a certain threshold
107  *             criterion is violated. In that case, perm_r is overwritten by
108  *             a new permutation determined by partial pivoting or diagonal
109  *             threshold pivoting.
110  *          Otherwise, perm_r is output argument;
111  *
112  * L        (output) SuperMatrix*
113  *          The factor L from the factorization Pr*A=L*U; use compressed row
114  *          subscripts storage for supernodes, i.e., L has type:
115  *          Stype = SLU_SC, Dtype = SLU_C, Mtype = SLU_TRLU.
116  *
117  * U        (output) SuperMatrix*
118  *	    The factor U from the factorization Pr*A*Pc=L*U. Use column-wise
119  *          storage scheme, i.e., U has types: Stype = SLU_NC,
120  *          Dtype = SLU_C, Mtype = SLU_TRU.
121  *
122  * Glu      (input/output) GlobalLU_t *
123  *          If options->Fact == SamePattern_SameRowPerm, it is an input;
124  *              The matrix A will be factorized assuming that a
125  *              factorization of a matrix with the same sparsity pattern
126  *              and similar numerical values was performed prior to this one.
127  *              Therefore, this factorization will reuse both row and column
128  *		scaling factors R and C, both row and column permutation
129  *		vectors perm_r and perm_c, and the L & U data structures
130  *		set up from the previous factorization.
131  *          Otherwise, it is an output.
132  *
133  * stat     (output) SuperLUStat_t*
134  *          Record the statistics on runtime and floating-point operation count.
135  *          See slu_util.h for the definition of 'SuperLUStat_t'.
136  *
137  * info     (output) int*
138  *          = 0: successful exit
139  *          < 0: if info = -i, the i-th argument had an illegal value
140  *          > 0: if info = i, and i is
141  *             <= A->ncol: U(i,i) is exactly zero. The factorization has
142  *                been completed, but the factor U is exactly singular,
143  *                and division by zero will occur if it is used to solve a
144  *                system of equations.
145  *             > A->ncol: number of bytes allocated when memory allocation
146  *                failure occurred, plus A->ncol. If lwork = -1, it is
147  *                the estimated amount of space needed, plus A->ncol.
148  *
149  * ======================================================================
150  *
151  * Local Working Arrays:
152  * ======================
153  *   m = number of rows in the matrix
154  *   n = number of columns in the matrix
155  *
156  *   xprune[0:n-1]: xprune[*] points to locations in subscript
157  *	vector lsub[*]. For column i, xprune[i] denotes the point where
158  *	structural pruning begins. I.e. only xlsub[i],..,xprune[i]-1 need
159  *	to be traversed for symbolic factorization.
160  *
161  *   marker[0:3*m-1]: marker[i] = j means that node i has been
162  *	reached when working on column j.
163  *	Storage: relative to original row subscripts
164  *	NOTE: There are 3 of them: marker/marker1 are used for panel dfs,
165  *	      see cpanel_dfs.c; marker2 is used for inner-factorization,
166  *            see ccolumn_dfs.c.
167  *
168  *   parent[0:m-1]: parent vector used during dfs
169  *      Storage: relative to new row subscripts
170  *
171  *   xplore[0:m-1]: xplore[i] gives the location of the next (dfs)
172  *	unexplored neighbor of i in lsub[*]
173  *
174  *   segrep[0:nseg-1]: contains the list of supernodal representatives
175  *	in topological order of the dfs. A supernode representative is the
176  *	last column of a supernode.
177  *      The maximum size of segrep[] is n.
178  *
179  *   repfnz[0:W*m-1]: for a nonzero segment U[*,j] that ends at a
180  *	supernodal representative r, repfnz[r] is the location of the first
181  *	nonzero in this segment.  It is also used during the dfs: repfnz[r]>0
182  *	indicates the supernode r has been explored.
183  *	NOTE: There are W of them, each used for one column of a panel.
184  *
185  *   panel_lsub[0:W*m-1]: temporary for the nonzeros row indices below
186  *      the panel diagonal. These are filled in during cpanel_dfs(), and are
187  *      used later in the inner LU factorization within the panel.
188  *	panel_lsub[]/dense[] pair forms the SPA data structure.
189  *	NOTE: There are W of them.
190  *
191  *   dense[0:W*m-1]: sparse accumulating (SPA) vector for intermediate values;
192  *	    	   NOTE: there are W of them.
193  *
194  *   tempv[0:*]: real temporary used for dense numeric kernels;
195  *	The size of this array is defined by NUM_TEMPV() in slu_cdefs.h.
196  * </pre>
197  */
198 
199 void
cgstrf(superlu_options_t * options,SuperMatrix * A,int relax,int panel_size,int * etree,void * work,int lwork,int * perm_c,int * perm_r,SuperMatrix * L,SuperMatrix * U,GlobalLU_t * Glu,SuperLUStat_t * stat,int * info)200 cgstrf (superlu_options_t *options, SuperMatrix *A,
201         int relax, int panel_size, int *etree, void *work, int lwork,
202         int *perm_c, int *perm_r, SuperMatrix *L, SuperMatrix *U,
203     	GlobalLU_t *Glu, /* persistent to facilitate multiple factorizations */
204         SuperLUStat_t *stat, int *info)
205 {
206     /* Local working arrays */
207     NCPformat *Astore;
208     int       *iperm_r = NULL; /* inverse of perm_r; used when
209                                   options->Fact == SamePattern_SameRowPerm */
210     int       *iperm_c; /* inverse of perm_c */
211     int       *iwork;
212     complex    *cwork;
213     int	      *segrep, *repfnz, *parent, *xplore;
214     int	      *panel_lsub; /* dense[]/panel_lsub[] pair forms a w-wide SPA */
215     int	      *xprune;
216     int	      *marker;
217     complex    *dense, *tempv;
218     int       *relax_end;
219     complex    *a;
220     int       *asub;
221     int       *xa_begin, *xa_end;
222     int       *xsup, *supno;
223     int       *xlsub, *xlusup, *xusub;
224     int       nzlumax;
225     float fill_ratio = sp_ienv(6);  /* estimated fill ratio */
226 
227     /* Local scalars */
228     fact_t    fact = options->Fact;
229     double    diag_pivot_thresh = options->DiagPivotThresh;
230     int       pivrow;   /* pivotal row number in the original matrix A */
231     int       nseg1;	/* no of segments in U-column above panel row jcol */
232     int       nseg;	/* no of segments in each U-column */
233     register int jcol;
234     register int kcol;	/* end column of a relaxed snode */
235     register int icol;
236     register int i, k, jj, new_next, iinfo;
237     int       m, n, min_mn, jsupno, fsupc, nextlu, nextu;
238     int       w_def;	/* upper bound on panel width */
239     int       usepr, iperm_r_allocated = 0;
240     int       nnzL, nnzU;
241     int       *panel_histo = stat->panel_histo;
242     flops_t   *ops = stat->ops;
243 
244     iinfo    = 0;
245     m        = A->nrow;
246     n        = A->ncol;
247     min_mn   = SUPERLU_MIN(m, n);
248     Astore   = A->Store;
249     a        = Astore->nzval;
250     asub     = Astore->rowind;
251     xa_begin = Astore->colbeg;
252     xa_end   = Astore->colend;
253 
254     /* Allocate storage common to the factor routines */
255     *info = cLUMemInit(fact, work, lwork, m, n, Astore->nnz,
256                        panel_size, fill_ratio, L, U, Glu, &iwork, &cwork);
257     if ( *info ) return;
258 
259     xsup    = Glu->xsup;
260     supno   = Glu->supno;
261     xlsub   = Glu->xlsub;
262     xlusup  = Glu->xlusup;
263     xusub   = Glu->xusub;
264 
265     SetIWork(m, n, panel_size, iwork, &segrep, &parent, &xplore,
266 	     &repfnz, &panel_lsub, &xprune, &marker);
267     cSetRWork(m, panel_size, cwork, &dense, &tempv);
268 
269     usepr = (fact == SamePattern_SameRowPerm);
270     if ( usepr ) {
271 	/* Compute the inverse of perm_r */
272 	iperm_r = (int *) intMalloc(m);
273 	for (k = 0; k < m; ++k) iperm_r[perm_r[k]] = k;
274 	iperm_r_allocated = 1;
275     }
276     iperm_c = (int *) intMalloc(n);
277     for (k = 0; k < n; ++k) iperm_c[perm_c[k]] = k;
278 
279     /* Identify relaxed snodes */
280     relax_end = (int *) intMalloc(n);
281     if ( options->SymmetricMode == YES ) {
282         heap_relax_snode(n, etree, relax, marker, relax_end);
283     } else {
284         relax_snode(n, etree, relax, marker, relax_end);
285     }
286 
287     ifill (perm_r, m, EMPTY);
288     ifill (marker, m * NO_MARKER, EMPTY);
289     supno[0] = -1;
290     xsup[0]  = xlsub[0] = xusub[0] = xlusup[0] = 0;
291     w_def    = panel_size;
292 
293     /*
294      * Work on one "panel" at a time. A panel is one of the following:
295      *	   (a) a relaxed supernode at the bottom of the etree, or
296      *	   (b) panel_size contiguous columns, defined by the user
297      */
298     for (jcol = 0; jcol < min_mn; ) {
299 
300 	if ( relax_end[jcol] != EMPTY ) { /* start of a relaxed snode */
301    	    kcol = relax_end[jcol];	  /* end of the relaxed snode */
302 	    panel_histo[kcol-jcol+1]++;
303 
304 	    /* --------------------------------------
305 	     * Factorize the relaxed supernode(jcol:kcol)
306 	     * -------------------------------------- */
307 	    /* Determine the union of the row structure of the snode */
308 	    if ( (*info = csnode_dfs(jcol, kcol, asub, xa_begin, xa_end,
309 				    xprune, marker, Glu)) != 0 )
310 		return;
311 
312             nextu    = xusub[jcol];
313 	    nextlu   = xlusup[jcol];
314 	    jsupno   = supno[jcol];
315 	    fsupc    = xsup[jsupno];
316 	    new_next = nextlu + (xlsub[fsupc+1]-xlsub[fsupc])*(kcol-jcol+1);
317 	    nzlumax = Glu->nzlumax;
318 	    while ( new_next > nzlumax ) {
319 		if ( (*info = cLUMemXpand(jcol, nextlu, LUSUP, &nzlumax, Glu)) )
320 		    return;
321 	    }
322 
323 	    for (icol = jcol; icol<= kcol; icol++) {
324 		xusub[icol+1] = nextu;
325 
326     		/* Scatter into SPA dense[*] */
327     		for (k = xa_begin[icol]; k < xa_end[icol]; k++)
328         	    dense[asub[k]] = a[k];
329 
330 	       	/* Numeric update within the snode */
331 	        csnode_bmod(icol, jsupno, fsupc, dense, tempv, Glu, stat);
332 
333 		if ( (*info = cpivotL(icol, diag_pivot_thresh, &usepr, perm_r,
334 				      iperm_r, iperm_c, &pivrow, Glu, stat)) )
335 		    if ( iinfo == 0 ) iinfo = *info;
336 
337 #ifdef DEBUG
338 		cprint_lu_col("[1]: ", icol, pivrow, xprune, Glu);
339 #endif
340 
341 	    }
342 
343 	    jcol = icol;
344 
345 	} else { /* Work on one panel of panel_size columns */
346 
347 	    /* Adjust panel_size so that a panel won't overlap with the next
348 	     * relaxed snode.
349 	     */
350 	    panel_size = w_def;
351 	    for (k = jcol + 1; k < SUPERLU_MIN(jcol+panel_size, min_mn); k++)
352 		if ( relax_end[k] != EMPTY ) {
353 		    panel_size = k - jcol;
354 		    break;
355 		}
356 	    if ( k == min_mn ) panel_size = min_mn - jcol;
357 	    panel_histo[panel_size]++;
358 
359 	    /* symbolic factor on a panel of columns */
360 	    cpanel_dfs(m, panel_size, jcol, A, perm_r, &nseg1,
361 		      dense, panel_lsub, segrep, repfnz, xprune,
362 		      marker, parent, xplore, Glu);
363 
364 	    /* numeric sup-panel updates in topological order */
365 	    cpanel_bmod(m, panel_size, jcol, nseg1, dense,
366 		        tempv, segrep, repfnz, Glu, stat);
367 
368 	    /* Sparse LU within the panel, and below panel diagonal */
369     	    for ( jj = jcol; jj < jcol + panel_size; jj++) {
370  		k = (jj - jcol) * m; /* column index for w-wide arrays */
371 
372 		nseg = nseg1;	/* Begin after all the panel segments */
373 
374 	    	if ((*info = ccolumn_dfs(m, jj, perm_r, &nseg, &panel_lsub[k],
375 					segrep, &repfnz[k], xprune, marker,
376 					parent, xplore, Glu)) != 0) return;
377 
378 	      	/* Numeric updates */
379 	    	if ((*info = ccolumn_bmod(jj, (nseg - nseg1), &dense[k],
380 					 tempv, &segrep[nseg1], &repfnz[k],
381 					 jcol, Glu, stat)) != 0) return;
382 
383 	        /* Copy the U-segments to ucol[*] */
384 		if ((*info = ccopy_to_ucol(jj, nseg, segrep, &repfnz[k],
385 					  perm_r, &dense[k], Glu)) != 0)
386 		    return;
387 
388 	    	if ( (*info = cpivotL(jj, diag_pivot_thresh, &usepr, perm_r,
389 				      iperm_r, iperm_c, &pivrow, Glu, stat)) )
390 		    if ( iinfo == 0 ) iinfo = *info;
391 
392 		/* Prune columns (0:jj-1) using column jj */
393 	    	cpruneL(jj, perm_r, pivrow, nseg, segrep,
394                         &repfnz[k], xprune, Glu);
395 
396 		/* Reset repfnz[] for this column */
397 	    	resetrep_col (nseg, segrep, &repfnz[k]);
398 
399 #ifdef DEBUG
400 		cprint_lu_col("[2]: ", jj, pivrow, xprune, Glu);
401 #endif
402 
403 	    }
404 
405    	    jcol += panel_size;	/* Move to the next panel */
406 
407 	} /* else */
408 
409     } /* for */
410 
411     *info = iinfo;
412 
413     if ( m > n ) {
414 	k = 0;
415         for (i = 0; i < m; ++i)
416             if ( perm_r[i] == EMPTY ) {
417     		perm_r[i] = n + k;
418 		++k;
419 	    }
420     }
421 
422     countnz(min_mn, xprune, &nnzL, &nnzU, Glu);
423     fixupL(min_mn, perm_r, Glu);
424 
425     cLUWorkFree(iwork, cwork, Glu); /* Free work space and compress storage */
426 
427     if ( fact == SamePattern_SameRowPerm ) {
428         /* L and U structures may have changed due to possibly different
429 	   pivoting, even though the storage is available.
430 	   There could also be memory expansions, so the array locations
431            may have changed, */
432         ((SCformat *)L->Store)->nnz = nnzL;
433 	((SCformat *)L->Store)->nsuper = Glu->supno[n];
434 	((SCformat *)L->Store)->nzval = (complex *) Glu->lusup;
435 	((SCformat *)L->Store)->nzval_colptr = Glu->xlusup;
436 	((SCformat *)L->Store)->rowind = Glu->lsub;
437 	((SCformat *)L->Store)->rowind_colptr = Glu->xlsub;
438 	((NCformat *)U->Store)->nnz = nnzU;
439 	((NCformat *)U->Store)->nzval = (complex *) Glu->ucol;
440 	((NCformat *)U->Store)->rowind = Glu->usub;
441 	((NCformat *)U->Store)->colptr = Glu->xusub;
442     } else {
443         cCreate_SuperNode_Matrix(L, A->nrow, min_mn, nnzL,
444 	      (complex *) Glu->lusup, Glu->xlusup,
445               Glu->lsub, Glu->xlsub, Glu->supno, Glu->xsup,
446               SLU_SC, SLU_C, SLU_TRLU);
447     	cCreate_CompCol_Matrix(U, min_mn, min_mn, nnzU,
448 	      (complex *) Glu->ucol, Glu->usub, Glu->xusub,
449               SLU_NC, SLU_C, SLU_TRU);
450     }
451 
452     ops[FACT] += ops[TRSV] + ops[GEMV];
453     stat->expansions = --(Glu->num_expansions);
454 
455     if ( iperm_r_allocated ) SUPERLU_FREE (iperm_r);
456     SUPERLU_FREE (iperm_c);
457     SUPERLU_FREE (relax_end);
458 
459 }
460