1 
2 /*! @file zgstrf.c
3  * \brief Computes an LU factorization of a general sparse matrix
4  *
5  * <pre>
6  * -- SuperLU routine (version 3.0) --
7  * Univ. of California Berkeley, Xerox Palo Alto Research Center,
8  * and Lawrence Berkeley National Lab.
9  * October 15, 2003
10  *
11  * Copyright (c) 1994 by Xerox Corporation.  All rights reserved.
12  *
13  * THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY
14  * EXPRESSED OR IMPLIED.  ANY USE IS AT YOUR OWN RISK.
15  *
16  * Permission is hereby granted to use or copy this program for any
17  * purpose, provided the above notices are retained on all copies.
18  * Permission to modify the code and to distribute modified code is
19  * granted, provided the above notices are retained, and a notice that
20  * the code was modified is included with the above copyright notice.
21  * </pre>
22  */
23 
24 
25 #include "slu_zdefs.h"
26 
27 /*! \brief
28  *
29  * <pre>
30  * Purpose
31  * =======
32  *
33  * ZGSTRF computes an LU factorization of a general sparse m-by-n
34  * matrix A using partial pivoting with row interchanges.
35  * The factorization has the form
36  *     Pr * A = L * U
37  * where Pr is a row permutation matrix, L is lower triangular with unit
38  * diagonal elements (lower trapezoidal if A->nrow > A->ncol), and U is upper
39  * triangular (upper trapezoidal if A->nrow < A->ncol).
40  *
41  * See supermatrix.h for the definition of 'SuperMatrix' structure.
42  *
43  * Arguments
44  * =========
45  *
46  * options (input) superlu_options_t*
47  *         The structure defines the input parameters to control
48  *         how the LU decomposition will be performed.
49  *
50  * A        (input) SuperMatrix*
51  *	    Original matrix A, permuted by columns, of dimension
52  *          (A->nrow, A->ncol). The type of A can be:
53  *          Stype = SLU_NCP; Dtype = SLU_Z; Mtype = SLU_GE.
54  *
55  * relax    (input) int
56  *          To control degree of relaxing supernodes. If the number
57  *          of nodes (columns) in a subtree of the elimination tree is less
58  *          than relax, this subtree is considered as one supernode,
59  *          regardless of the row structures of those columns.
60  *
61  * panel_size (input) int
62  *          A panel consists of at most panel_size consecutive columns.
63  *
64  * etree    (input) int*, dimension (A->ncol)
65  *          Elimination tree of A'*A.
66  *          Note: etree is a vector of parent pointers for a forest whose
67  *          vertices are the integers 0 to A->ncol-1; etree[root]==A->ncol.
68  *          On input, the columns of A should be permuted so that the
69  *          etree is in a certain postorder.
70  *
71  * work     (input/output) void*, size (lwork) (in bytes)
72  *          User-supplied work space and space for the output data structures.
73  *          Not referenced if lwork = 0;
74  *
75  * lwork   (input) int
76  *         Specifies the size of work array in bytes.
77  *         = 0:  allocate space internally by system malloc;
78  *         > 0:  use user-supplied work array of length lwork in bytes,
79  *               returns error if space runs out.
80  *         = -1: the routine guesses the amount of space needed without
81  *               performing the factorization, and returns it in
82  *               *info; no other side effects.
83  *
84  * perm_c   (input) int*, dimension (A->ncol)
85  *	    Column permutation vector, which defines the
86  *          permutation matrix Pc; perm_c[i] = j means column i of A is
87  *          in position j in A*Pc.
88  *          When searching for diagonal, perm_c[*] is applied to the
89  *          row subscripts of A, so that diagonal threshold pivoting
90  *          can find the diagonal of A, rather than that of A*Pc.
91  *
92  * perm_r   (input/output) int*, dimension (A->nrow)
93  *          Row permutation vector which defines the permutation matrix Pr,
94  *          perm_r[i] = j means row i of A is in position j in Pr*A.
95  *          If options->Fact = SamePattern_SameRowPerm, the pivoting routine
96  *             will try to use the input perm_r, unless a certain threshold
97  *             criterion is violated. In that case, perm_r is overwritten by
98  *             a new permutation determined by partial pivoting or diagonal
99  *             threshold pivoting.
100  *          Otherwise, perm_r is output argument;
101  *
102  * L        (output) SuperMatrix*
103  *          The factor L from the factorization Pr*A=L*U; use compressed row
104  *          subscripts storage for supernodes, i.e., L has type:
105  *          Stype = SLU_SC, Dtype = SLU_Z, Mtype = SLU_TRLU.
106  *
107  * U        (output) SuperMatrix*
108  *	    The factor U from the factorization Pr*A*Pc=L*U. Use column-wise
109  *          storage scheme, i.e., U has types: Stype = SLU_NC,
110  *          Dtype = SLU_Z, Mtype = SLU_TRU.
111  *
112  * stat     (output) SuperLUStat_t*
113  *          Record the statistics on runtime and floating-point operation count.
114  *          See slu_util.h for the definition of 'SuperLUStat_t'.
115  *
116  * info     (output) int*
117  *          = 0: successful exit
118  *          < 0: if info = -i, the i-th argument had an illegal value
119  *          > 0: if info = i, and i is
120  *             <= A->ncol: U(i,i) is exactly zero. The factorization has
121  *                been completed, but the factor U is exactly singular,
122  *                and division by zero will occur if it is used to solve a
123  *                system of equations.
124  *             > A->ncol: number of bytes allocated when memory allocation
125  *                failure occurred, plus A->ncol. If lwork = -1, it is
126  *                the estimated amount of space needed, plus A->ncol.
127  *
128  * ======================================================================
129  *
130  * Local Working Arrays:
131  * ======================
132  *   m = number of rows in the matrix
133  *   n = number of columns in the matrix
134  *
135  *   xprune[0:n-1]: xprune[*] points to locations in subscript
136  *	vector lsub[*]. For column i, xprune[i] denotes the point where
137  *	structural pruning begins. I.e. only xlsub[i],..,xprune[i]-1 need
138  *	to be traversed for symbolic factorization.
139  *
140  *   marker[0:3*m-1]: marker[i] = j means that node i has been
141  *	reached when working on column j.
142  *	Storage: relative to original row subscripts
143  *	NOTE: There are 3 of them: marker/marker1 are used for panel dfs,
144  *	      see zpanel_dfs.c; marker2 is used for inner-factorization,
145  *            see zcolumn_dfs.c.
146  *
147  *   parent[0:m-1]: parent vector used during dfs
148  *      Storage: relative to new row subscripts
149  *
150  *   xplore[0:m-1]: xplore[i] gives the location of the next (dfs)
151  *	unexplored neighbor of i in lsub[*]
152  *
153  *   segrep[0:nseg-1]: contains the list of supernodal representatives
154  *	in topological order of the dfs. A supernode representative is the
155  *	last column of a supernode.
156  *      The maximum size of segrep[] is n.
157  *
158  *   repfnz[0:W*m-1]: for a nonzero segment U[*,j] that ends at a
159  *	supernodal representative r, repfnz[r] is the location of the first
160  *	nonzero in this segment.  It is also used during the dfs: repfnz[r]>0
161  *	indicates the supernode r has been explored.
162  *	NOTE: There are W of them, each used for one column of a panel.
163  *
164  *   panel_lsub[0:W*m-1]: temporary for the nonzeros row indices below
165  *      the panel diagonal. These are filled in during zpanel_dfs(), and are
166  *      used later in the inner LU factorization within the panel.
167  *	panel_lsub[]/dense[] pair forms the SPA data structure.
168  *	NOTE: There are W of them.
169  *
170  *   dense[0:W*m-1]: sparse accumulating (SPA) vector for intermediate values;
171  *	    	   NOTE: there are W of them.
172  *
173  *   tempv[0:*]: real temporary used for dense numeric kernels;
174  *	The size of this array is defined by NUM_TEMPV() in slu_zdefs.h.
175  * </pre>
176  */
177 
178 void
zgstrf(superlu_options_t * options,SuperMatrix * A,int relax,int panel_size,int * etree,void * work,int lwork,int * perm_c,int * perm_r,SuperMatrix * L,SuperMatrix * U,SuperLUStat_t * stat,int * info)179 zgstrf (superlu_options_t *options, SuperMatrix *A,
180         int relax, int panel_size, int *etree, void *work, int lwork,
181         int *perm_c, int *perm_r, SuperMatrix *L, SuperMatrix *U,
182         SuperLUStat_t *stat, int *info)
183 {
184     /* Local working arrays */
185     NCPformat *Astore;
186     int       *iperm_r = NULL; /* inverse of perm_r; used when
187                                   options->Fact == SamePattern_SameRowPerm */
188     int       *iperm_c; /* inverse of perm_c */
189     int       *iwork;
190     doublecomplex    *zwork;
191     int	      *segrep, *repfnz, *parent, *xplore;
192     int	      *panel_lsub; /* dense[]/panel_lsub[] pair forms a w-wide SPA */
193     int	      *xprune;
194     int	      *marker;
195     doublecomplex    *dense, *tempv;
196     int       *relax_end;
197     doublecomplex    *a;
198     int       *asub;
199     int       *xa_begin, *xa_end;
200     int       *xsup, *supno;
201     int       *xlsub, *xlusup, *xusub;
202     int       nzlumax;
203     double fill_ratio = sp_ienv(6);  /* estimated fill ratio */
204     static    GlobalLU_t Glu; /* persistent to facilitate multiple factors. */
205 
206     /* Local scalars */
207     fact_t    fact = options->Fact;
208     double    diag_pivot_thresh = options->DiagPivotThresh;
209     int       pivrow;   /* pivotal row number in the original matrix A */
210     int       nseg1;	/* no of segments in U-column above panel row jcol */
211     int       nseg;	/* no of segments in each U-column */
212     register int jcol;
213     register int kcol;	/* end column of a relaxed snode */
214     register int icol;
215     register int i, k, jj, new_next, iinfo;
216     int       m, n, min_mn, jsupno, fsupc, nextlu, nextu;
217     int       w_def;	/* upper bound on panel width */
218     int       usepr, iperm_r_allocated = 0;
219     int       nnzL, nnzU;
220     int       *panel_histo = stat->panel_histo;
221     flops_t   *ops = stat->ops;
222 
223     iinfo    = 0;
224     m        = A->nrow;
225     n        = A->ncol;
226     min_mn   = SUPERLU_MIN(m, n);
227     Astore   = A->Store;
228     a        = Astore->nzval;
229     asub     = Astore->rowind;
230     xa_begin = Astore->colbeg;
231     xa_end   = Astore->colend;
232 
233     /* Allocate storage common to the factor routines */
234     *info = zLUMemInit(fact, work, lwork, m, n, Astore->nnz,
235                        panel_size, fill_ratio, L, U, &Glu, &iwork, &zwork);
236     if ( *info ) return;
237 
238     xsup    = Glu.xsup;
239     supno   = Glu.supno;
240     xlsub   = Glu.xlsub;
241     xlusup  = Glu.xlusup;
242     xusub   = Glu.xusub;
243 
244     SetIWork(m, n, panel_size, iwork, &segrep, &parent, &xplore,
245 	     &repfnz, &panel_lsub, &xprune, &marker);
246     zSetRWork(m, panel_size, zwork, &dense, &tempv);
247 
248     usepr = (fact == SamePattern_SameRowPerm);
249     if ( usepr ) {
250 	/* Compute the inverse of perm_r */
251 	iperm_r = (int *) intMalloc(m);
252 	for (k = 0; k < m; ++k) iperm_r[perm_r[k]] = k;
253 	iperm_r_allocated = 1;
254     }
255     iperm_c = (int *) intMalloc(n);
256     for (k = 0; k < n; ++k) iperm_c[perm_c[k]] = k;
257 
258     /* Identify relaxed snodes */
259     relax_end = (int *) intMalloc(n);
260     if ( options->SymmetricMode == YES ) {
261         heap_relax_snode(n, etree, relax, marker, relax_end);
262     } else {
263         relax_snode(n, etree, relax, marker, relax_end);
264     }
265 
266     ifill (perm_r, m, EMPTY);
267     ifill (marker, m * NO_MARKER, EMPTY);
268     supno[0] = -1;
269     xsup[0]  = xlsub[0] = xusub[0] = xlusup[0] = 0;
270     w_def    = panel_size;
271 
272     /*
273      * Work on one "panel" at a time. A panel is one of the following:
274      *	   (a) a relaxed supernode at the bottom of the etree, or
275      *	   (b) panel_size contiguous columns, defined by the user
276      */
277     for (jcol = 0; jcol < min_mn; ) {
278 
279 	if ( relax_end[jcol] != EMPTY ) { /* start of a relaxed snode */
280    	    kcol = relax_end[jcol];	  /* end of the relaxed snode */
281 	    panel_histo[kcol-jcol+1]++;
282 
283 	    /* --------------------------------------
284 	     * Factorize the relaxed supernode(jcol:kcol)
285 	     * -------------------------------------- */
286 	    /* Determine the union of the row structure of the snode */
287 	    if ( (*info = zsnode_dfs(jcol, kcol, asub, xa_begin, xa_end,
288 				    xprune, marker, &Glu)) != 0 )
289 		return;
290 
291             nextu    = xusub[jcol];
292 	    nextlu   = xlusup[jcol];
293 	    jsupno   = supno[jcol];
294 	    fsupc    = xsup[jsupno];
295 	    new_next = nextlu + (xlsub[fsupc+1]-xlsub[fsupc])*(kcol-jcol+1);
296 	    nzlumax = Glu.nzlumax;
297 	    while ( new_next > nzlumax ) {
298 		if ( (*info = zLUMemXpand(jcol, nextlu, LUSUP, &nzlumax, &Glu)) )
299 		    return;
300 	    }
301 
302 	    for (icol = jcol; icol<= kcol; icol++) {
303 		xusub[icol+1] = nextu;
304 
305     		/* Scatter into SPA dense[*] */
306     		for (k = xa_begin[icol]; k < xa_end[icol]; k++)
307         	    dense[asub[k]] = a[k];
308 
309 	       	/* Numeric update within the snode */
310 	        zsnode_bmod(icol, jsupno, fsupc, dense, tempv, &Glu, stat);
311 
312 		if ( (*info = zpivotL(icol, diag_pivot_thresh, &usepr, perm_r,
313 				      iperm_r, iperm_c, &pivrow, &Glu, stat)) )
314 		    if ( iinfo == 0 ) iinfo = *info;
315 
316 #ifdef DEBUG
317 		zprint_lu_col("[1]: ", icol, pivrow, xprune, &Glu);
318 #endif
319 
320 	    }
321 
322 	    jcol = icol;
323 
324 	} else { /* Work on one panel of panel_size columns */
325 
326 	    /* Adjust panel_size so that a panel won't overlap with the next
327 	     * relaxed snode.
328 	     */
329 	    panel_size = w_def;
330 	    for (k = jcol + 1; k < SUPERLU_MIN(jcol+panel_size, min_mn); k++)
331 		if ( relax_end[k] != EMPTY ) {
332 		    panel_size = k - jcol;
333 		    break;
334 		}
335 	    if ( k == min_mn ) panel_size = min_mn - jcol;
336 	    panel_histo[panel_size]++;
337 
338 	    /* symbolic factor on a panel of columns */
339 	    zpanel_dfs(m, panel_size, jcol, A, perm_r, &nseg1,
340 		      dense, panel_lsub, segrep, repfnz, xprune,
341 		      marker, parent, xplore, &Glu);
342 
343 	    /* numeric sup-panel updates in topological order */
344 	    zpanel_bmod(m, panel_size, jcol, nseg1, dense,
345 		        tempv, segrep, repfnz, &Glu, stat);
346 
347 	    /* Sparse LU within the panel, and below panel diagonal */
348     	    for ( jj = jcol; jj < jcol + panel_size; jj++) {
349  		k = (jj - jcol) * m; /* column index for w-wide arrays */
350 
351 		nseg = nseg1;	/* Begin after all the panel segments */
352 
353 	    	if ((*info = zcolumn_dfs(m, jj, perm_r, &nseg, &panel_lsub[k],
354 					segrep, &repfnz[k], xprune, marker,
355 					parent, xplore, &Glu)) != 0) return;
356 
357 	      	/* Numeric updates */
358 	    	if ((*info = zcolumn_bmod(jj, (nseg - nseg1), &dense[k],
359 					 tempv, &segrep[nseg1], &repfnz[k],
360 					 jcol, &Glu, stat)) != 0) return;
361 
362 	        /* Copy the U-segments to ucol[*] */
363 		if ((*info = zcopy_to_ucol(jj, nseg, segrep, &repfnz[k],
364 					  perm_r, &dense[k], &Glu)) != 0)
365 		    return;
366 
367 	    	if ( (*info = zpivotL(jj, diag_pivot_thresh, &usepr, perm_r,
368 				      iperm_r, iperm_c, &pivrow, &Glu, stat)) )
369 		    if ( iinfo == 0 ) iinfo = *info;
370 
371 		/* Prune columns (0:jj-1) using column jj */
372 	    	zpruneL(jj, perm_r, pivrow, nseg, segrep,
373                         &repfnz[k], xprune, &Glu);
374 
375 		/* Reset repfnz[] for this column */
376 	    	resetrep_col (nseg, segrep, &repfnz[k]);
377 
378 #ifdef DEBUG
379 		zprint_lu_col("[2]: ", jj, pivrow, xprune, &Glu);
380 #endif
381 
382 	    }
383 
384    	    jcol += panel_size;	/* Move to the next panel */
385 
386 	} /* else */
387 
388     } /* for */
389 
390     *info = iinfo;
391 
392     if ( m > n ) {
393 	k = 0;
394         for (i = 0; i < m; ++i)
395             if ( perm_r[i] == EMPTY ) {
396     		perm_r[i] = n + k;
397 		++k;
398 	    }
399     }
400 
401     countnz(min_mn, xprune, &nnzL, &nnzU, &Glu);
402     fixupL(min_mn, perm_r, &Glu);
403 
404     zLUWorkFree(iwork, zwork, &Glu); /* Free work space and compress storage */
405 
406     if ( fact == SamePattern_SameRowPerm ) {
407         /* L and U structures may have changed due to possibly different
408 	   pivoting, even though the storage is available.
409 	   There could also be memory expansions, so the array locations
410            may have changed, */
411         ((SCformat *)L->Store)->nnz = nnzL;
412 	((SCformat *)L->Store)->nsuper = Glu.supno[n];
413 	((SCformat *)L->Store)->nzval = Glu.lusup;
414 	((SCformat *)L->Store)->nzval_colptr = Glu.xlusup;
415 	((SCformat *)L->Store)->rowind = Glu.lsub;
416 	((SCformat *)L->Store)->rowind_colptr = Glu.xlsub;
417 	((NCformat *)U->Store)->nnz = nnzU;
418 	((NCformat *)U->Store)->nzval = Glu.ucol;
419 	((NCformat *)U->Store)->rowind = Glu.usub;
420 	((NCformat *)U->Store)->colptr = Glu.xusub;
421     } else {
422         zCreate_SuperNode_Matrix(L, A->nrow, min_mn, nnzL, Glu.lusup,
423 	                         Glu.xlusup, Glu.lsub, Glu.xlsub, Glu.supno,
424 			         Glu.xsup, SLU_SC, SLU_Z, SLU_TRLU);
425     	zCreate_CompCol_Matrix(U, min_mn, min_mn, nnzU, Glu.ucol,
426 			       Glu.usub, Glu.xusub, SLU_NC, SLU_Z, SLU_TRU);
427     }
428 
429     ops[FACT] += ops[TRSV] + ops[GEMV];
430     stat->expansions = --(Glu.num_expansions);
431 
432     if ( iperm_r_allocated ) SUPERLU_FREE (iperm_r);
433     SUPERLU_FREE (iperm_c);
434     SUPERLU_FREE (relax_end);
435 
436 }
437