1
2 /*! @file zgstrf.c
3 * \brief Computes an LU factorization of a general sparse matrix
4 *
5 * <pre>
6 * -- SuperLU routine (version 3.0) --
7 * Univ. of California Berkeley, Xerox Palo Alto Research Center,
8 * and Lawrence Berkeley National Lab.
9 * October 15, 2003
10 *
11 * Copyright (c) 1994 by Xerox Corporation. All rights reserved.
12 *
13 * THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY
14 * EXPRESSED OR IMPLIED. ANY USE IS AT YOUR OWN RISK.
15 *
16 * Permission is hereby granted to use or copy this program for any
17 * purpose, provided the above notices are retained on all copies.
18 * Permission to modify the code and to distribute modified code is
19 * granted, provided the above notices are retained, and a notice that
20 * the code was modified is included with the above copyright notice.
21 * </pre>
22 */
23
24
25 #include "slu_zdefs.h"
26
27 /*! \brief
28 *
29 * <pre>
30 * Purpose
31 * =======
32 *
33 * ZGSTRF computes an LU factorization of a general sparse m-by-n
34 * matrix A using partial pivoting with row interchanges.
35 * The factorization has the form
36 * Pr * A = L * U
37 * where Pr is a row permutation matrix, L is lower triangular with unit
38 * diagonal elements (lower trapezoidal if A->nrow > A->ncol), and U is upper
39 * triangular (upper trapezoidal if A->nrow < A->ncol).
40 *
41 * See supermatrix.h for the definition of 'SuperMatrix' structure.
42 *
43 * Arguments
44 * =========
45 *
46 * options (input) superlu_options_t*
47 * The structure defines the input parameters to control
48 * how the LU decomposition will be performed.
49 *
50 * A (input) SuperMatrix*
51 * Original matrix A, permuted by columns, of dimension
52 * (A->nrow, A->ncol). The type of A can be:
53 * Stype = SLU_NCP; Dtype = SLU_Z; Mtype = SLU_GE.
54 *
55 * relax (input) int
56 * To control degree of relaxing supernodes. If the number
57 * of nodes (columns) in a subtree of the elimination tree is less
58 * than relax, this subtree is considered as one supernode,
59 * regardless of the row structures of those columns.
60 *
61 * panel_size (input) int
62 * A panel consists of at most panel_size consecutive columns.
63 *
64 * etree (input) int*, dimension (A->ncol)
65 * Elimination tree of A'*A.
66 * Note: etree is a vector of parent pointers for a forest whose
67 * vertices are the integers 0 to A->ncol-1; etree[root]==A->ncol.
68 * On input, the columns of A should be permuted so that the
69 * etree is in a certain postorder.
70 *
71 * work (input/output) void*, size (lwork) (in bytes)
72 * User-supplied work space and space for the output data structures.
73 * Not referenced if lwork = 0;
74 *
75 * lwork (input) int
76 * Specifies the size of work array in bytes.
77 * = 0: allocate space internally by system malloc;
78 * > 0: use user-supplied work array of length lwork in bytes,
79 * returns error if space runs out.
80 * = -1: the routine guesses the amount of space needed without
81 * performing the factorization, and returns it in
82 * *info; no other side effects.
83 *
84 * perm_c (input) int*, dimension (A->ncol)
85 * Column permutation vector, which defines the
86 * permutation matrix Pc; perm_c[i] = j means column i of A is
87 * in position j in A*Pc.
88 * When searching for diagonal, perm_c[*] is applied to the
89 * row subscripts of A, so that diagonal threshold pivoting
90 * can find the diagonal of A, rather than that of A*Pc.
91 *
92 * perm_r (input/output) int*, dimension (A->nrow)
93 * Row permutation vector which defines the permutation matrix Pr,
94 * perm_r[i] = j means row i of A is in position j in Pr*A.
95 * If options->Fact = SamePattern_SameRowPerm, the pivoting routine
96 * will try to use the input perm_r, unless a certain threshold
97 * criterion is violated. In that case, perm_r is overwritten by
98 * a new permutation determined by partial pivoting or diagonal
99 * threshold pivoting.
100 * Otherwise, perm_r is output argument;
101 *
102 * L (output) SuperMatrix*
103 * The factor L from the factorization Pr*A=L*U; use compressed row
104 * subscripts storage for supernodes, i.e., L has type:
105 * Stype = SLU_SC, Dtype = SLU_Z, Mtype = SLU_TRLU.
106 *
107 * U (output) SuperMatrix*
108 * The factor U from the factorization Pr*A*Pc=L*U. Use column-wise
109 * storage scheme, i.e., U has types: Stype = SLU_NC,
110 * Dtype = SLU_Z, Mtype = SLU_TRU.
111 *
112 * stat (output) SuperLUStat_t*
113 * Record the statistics on runtime and floating-point operation count.
114 * See slu_util.h for the definition of 'SuperLUStat_t'.
115 *
116 * info (output) int*
117 * = 0: successful exit
118 * < 0: if info = -i, the i-th argument had an illegal value
119 * > 0: if info = i, and i is
120 * <= A->ncol: U(i,i) is exactly zero. The factorization has
121 * been completed, but the factor U is exactly singular,
122 * and division by zero will occur if it is used to solve a
123 * system of equations.
124 * > A->ncol: number of bytes allocated when memory allocation
125 * failure occurred, plus A->ncol. If lwork = -1, it is
126 * the estimated amount of space needed, plus A->ncol.
127 *
128 * ======================================================================
129 *
130 * Local Working Arrays:
131 * ======================
132 * m = number of rows in the matrix
133 * n = number of columns in the matrix
134 *
135 * xprune[0:n-1]: xprune[*] points to locations in subscript
136 * vector lsub[*]. For column i, xprune[i] denotes the point where
137 * structural pruning begins. I.e. only xlsub[i],..,xprune[i]-1 need
138 * to be traversed for symbolic factorization.
139 *
140 * marker[0:3*m-1]: marker[i] = j means that node i has been
141 * reached when working on column j.
142 * Storage: relative to original row subscripts
143 * NOTE: There are 3 of them: marker/marker1 are used for panel dfs,
144 * see zpanel_dfs.c; marker2 is used for inner-factorization,
145 * see zcolumn_dfs.c.
146 *
147 * parent[0:m-1]: parent vector used during dfs
148 * Storage: relative to new row subscripts
149 *
150 * xplore[0:m-1]: xplore[i] gives the location of the next (dfs)
151 * unexplored neighbor of i in lsub[*]
152 *
153 * segrep[0:nseg-1]: contains the list of supernodal representatives
154 * in topological order of the dfs. A supernode representative is the
155 * last column of a supernode.
156 * The maximum size of segrep[] is n.
157 *
158 * repfnz[0:W*m-1]: for a nonzero segment U[*,j] that ends at a
159 * supernodal representative r, repfnz[r] is the location of the first
160 * nonzero in this segment. It is also used during the dfs: repfnz[r]>0
161 * indicates the supernode r has been explored.
162 * NOTE: There are W of them, each used for one column of a panel.
163 *
164 * panel_lsub[0:W*m-1]: temporary for the nonzeros row indices below
165 * the panel diagonal. These are filled in during zpanel_dfs(), and are
166 * used later in the inner LU factorization within the panel.
167 * panel_lsub[]/dense[] pair forms the SPA data structure.
168 * NOTE: There are W of them.
169 *
170 * dense[0:W*m-1]: sparse accumulating (SPA) vector for intermediate values;
171 * NOTE: there are W of them.
172 *
173 * tempv[0:*]: real temporary used for dense numeric kernels;
174 * The size of this array is defined by NUM_TEMPV() in slu_zdefs.h.
175 * </pre>
176 */
177
178 void
zgstrf(superlu_options_t * options,SuperMatrix * A,int relax,int panel_size,int * etree,void * work,int lwork,int * perm_c,int * perm_r,SuperMatrix * L,SuperMatrix * U,SuperLUStat_t * stat,int * info)179 zgstrf (superlu_options_t *options, SuperMatrix *A,
180 int relax, int panel_size, int *etree, void *work, int lwork,
181 int *perm_c, int *perm_r, SuperMatrix *L, SuperMatrix *U,
182 SuperLUStat_t *stat, int *info)
183 {
184 /* Local working arrays */
185 NCPformat *Astore;
186 int *iperm_r = NULL; /* inverse of perm_r; used when
187 options->Fact == SamePattern_SameRowPerm */
188 int *iperm_c; /* inverse of perm_c */
189 int *iwork;
190 doublecomplex *zwork;
191 int *segrep, *repfnz, *parent, *xplore;
192 int *panel_lsub; /* dense[]/panel_lsub[] pair forms a w-wide SPA */
193 int *xprune;
194 int *marker;
195 doublecomplex *dense, *tempv;
196 int *relax_end;
197 doublecomplex *a;
198 int *asub;
199 int *xa_begin, *xa_end;
200 int *xsup, *supno;
201 int *xlsub, *xlusup, *xusub;
202 int nzlumax;
203 double fill_ratio = sp_ienv(6); /* estimated fill ratio */
204 static GlobalLU_t Glu; /* persistent to facilitate multiple factors. */
205
206 /* Local scalars */
207 fact_t fact = options->Fact;
208 double diag_pivot_thresh = options->DiagPivotThresh;
209 int pivrow; /* pivotal row number in the original matrix A */
210 int nseg1; /* no of segments in U-column above panel row jcol */
211 int nseg; /* no of segments in each U-column */
212 register int jcol;
213 register int kcol; /* end column of a relaxed snode */
214 register int icol;
215 register int i, k, jj, new_next, iinfo;
216 int m, n, min_mn, jsupno, fsupc, nextlu, nextu;
217 int w_def; /* upper bound on panel width */
218 int usepr, iperm_r_allocated = 0;
219 int nnzL, nnzU;
220 int *panel_histo = stat->panel_histo;
221 flops_t *ops = stat->ops;
222
223 iinfo = 0;
224 m = A->nrow;
225 n = A->ncol;
226 min_mn = SUPERLU_MIN(m, n);
227 Astore = A->Store;
228 a = Astore->nzval;
229 asub = Astore->rowind;
230 xa_begin = Astore->colbeg;
231 xa_end = Astore->colend;
232
233 /* Allocate storage common to the factor routines */
234 *info = zLUMemInit(fact, work, lwork, m, n, Astore->nnz,
235 panel_size, fill_ratio, L, U, &Glu, &iwork, &zwork);
236 if ( *info ) return;
237
238 xsup = Glu.xsup;
239 supno = Glu.supno;
240 xlsub = Glu.xlsub;
241 xlusup = Glu.xlusup;
242 xusub = Glu.xusub;
243
244 SetIWork(m, n, panel_size, iwork, &segrep, &parent, &xplore,
245 &repfnz, &panel_lsub, &xprune, &marker);
246 zSetRWork(m, panel_size, zwork, &dense, &tempv);
247
248 usepr = (fact == SamePattern_SameRowPerm);
249 if ( usepr ) {
250 /* Compute the inverse of perm_r */
251 iperm_r = (int *) intMalloc(m);
252 for (k = 0; k < m; ++k) iperm_r[perm_r[k]] = k;
253 iperm_r_allocated = 1;
254 }
255 iperm_c = (int *) intMalloc(n);
256 for (k = 0; k < n; ++k) iperm_c[perm_c[k]] = k;
257
258 /* Identify relaxed snodes */
259 relax_end = (int *) intMalloc(n);
260 if ( options->SymmetricMode == YES ) {
261 heap_relax_snode(n, etree, relax, marker, relax_end);
262 } else {
263 relax_snode(n, etree, relax, marker, relax_end);
264 }
265
266 ifill (perm_r, m, EMPTY);
267 ifill (marker, m * NO_MARKER, EMPTY);
268 supno[0] = -1;
269 xsup[0] = xlsub[0] = xusub[0] = xlusup[0] = 0;
270 w_def = panel_size;
271
272 /*
273 * Work on one "panel" at a time. A panel is one of the following:
274 * (a) a relaxed supernode at the bottom of the etree, or
275 * (b) panel_size contiguous columns, defined by the user
276 */
277 for (jcol = 0; jcol < min_mn; ) {
278
279 if ( relax_end[jcol] != EMPTY ) { /* start of a relaxed snode */
280 kcol = relax_end[jcol]; /* end of the relaxed snode */
281 panel_histo[kcol-jcol+1]++;
282
283 /* --------------------------------------
284 * Factorize the relaxed supernode(jcol:kcol)
285 * -------------------------------------- */
286 /* Determine the union of the row structure of the snode */
287 if ( (*info = zsnode_dfs(jcol, kcol, asub, xa_begin, xa_end,
288 xprune, marker, &Glu)) != 0 )
289 return;
290
291 nextu = xusub[jcol];
292 nextlu = xlusup[jcol];
293 jsupno = supno[jcol];
294 fsupc = xsup[jsupno];
295 new_next = nextlu + (xlsub[fsupc+1]-xlsub[fsupc])*(kcol-jcol+1);
296 nzlumax = Glu.nzlumax;
297 while ( new_next > nzlumax ) {
298 if ( (*info = zLUMemXpand(jcol, nextlu, LUSUP, &nzlumax, &Glu)) )
299 return;
300 }
301
302 for (icol = jcol; icol<= kcol; icol++) {
303 xusub[icol+1] = nextu;
304
305 /* Scatter into SPA dense[*] */
306 for (k = xa_begin[icol]; k < xa_end[icol]; k++)
307 dense[asub[k]] = a[k];
308
309 /* Numeric update within the snode */
310 zsnode_bmod(icol, jsupno, fsupc, dense, tempv, &Glu, stat);
311
312 if ( (*info = zpivotL(icol, diag_pivot_thresh, &usepr, perm_r,
313 iperm_r, iperm_c, &pivrow, &Glu, stat)) )
314 if ( iinfo == 0 ) iinfo = *info;
315
316 #ifdef DEBUG
317 zprint_lu_col("[1]: ", icol, pivrow, xprune, &Glu);
318 #endif
319
320 }
321
322 jcol = icol;
323
324 } else { /* Work on one panel of panel_size columns */
325
326 /* Adjust panel_size so that a panel won't overlap with the next
327 * relaxed snode.
328 */
329 panel_size = w_def;
330 for (k = jcol + 1; k < SUPERLU_MIN(jcol+panel_size, min_mn); k++)
331 if ( relax_end[k] != EMPTY ) {
332 panel_size = k - jcol;
333 break;
334 }
335 if ( k == min_mn ) panel_size = min_mn - jcol;
336 panel_histo[panel_size]++;
337
338 /* symbolic factor on a panel of columns */
339 zpanel_dfs(m, panel_size, jcol, A, perm_r, &nseg1,
340 dense, panel_lsub, segrep, repfnz, xprune,
341 marker, parent, xplore, &Glu);
342
343 /* numeric sup-panel updates in topological order */
344 zpanel_bmod(m, panel_size, jcol, nseg1, dense,
345 tempv, segrep, repfnz, &Glu, stat);
346
347 /* Sparse LU within the panel, and below panel diagonal */
348 for ( jj = jcol; jj < jcol + panel_size; jj++) {
349 k = (jj - jcol) * m; /* column index for w-wide arrays */
350
351 nseg = nseg1; /* Begin after all the panel segments */
352
353 if ((*info = zcolumn_dfs(m, jj, perm_r, &nseg, &panel_lsub[k],
354 segrep, &repfnz[k], xprune, marker,
355 parent, xplore, &Glu)) != 0) return;
356
357 /* Numeric updates */
358 if ((*info = zcolumn_bmod(jj, (nseg - nseg1), &dense[k],
359 tempv, &segrep[nseg1], &repfnz[k],
360 jcol, &Glu, stat)) != 0) return;
361
362 /* Copy the U-segments to ucol[*] */
363 if ((*info = zcopy_to_ucol(jj, nseg, segrep, &repfnz[k],
364 perm_r, &dense[k], &Glu)) != 0)
365 return;
366
367 if ( (*info = zpivotL(jj, diag_pivot_thresh, &usepr, perm_r,
368 iperm_r, iperm_c, &pivrow, &Glu, stat)) )
369 if ( iinfo == 0 ) iinfo = *info;
370
371 /* Prune columns (0:jj-1) using column jj */
372 zpruneL(jj, perm_r, pivrow, nseg, segrep,
373 &repfnz[k], xprune, &Glu);
374
375 /* Reset repfnz[] for this column */
376 resetrep_col (nseg, segrep, &repfnz[k]);
377
378 #ifdef DEBUG
379 zprint_lu_col("[2]: ", jj, pivrow, xprune, &Glu);
380 #endif
381
382 }
383
384 jcol += panel_size; /* Move to the next panel */
385
386 } /* else */
387
388 } /* for */
389
390 *info = iinfo;
391
392 if ( m > n ) {
393 k = 0;
394 for (i = 0; i < m; ++i)
395 if ( perm_r[i] == EMPTY ) {
396 perm_r[i] = n + k;
397 ++k;
398 }
399 }
400
401 countnz(min_mn, xprune, &nnzL, &nnzU, &Glu);
402 fixupL(min_mn, perm_r, &Glu);
403
404 zLUWorkFree(iwork, zwork, &Glu); /* Free work space and compress storage */
405
406 if ( fact == SamePattern_SameRowPerm ) {
407 /* L and U structures may have changed due to possibly different
408 pivoting, even though the storage is available.
409 There could also be memory expansions, so the array locations
410 may have changed, */
411 ((SCformat *)L->Store)->nnz = nnzL;
412 ((SCformat *)L->Store)->nsuper = Glu.supno[n];
413 ((SCformat *)L->Store)->nzval = Glu.lusup;
414 ((SCformat *)L->Store)->nzval_colptr = Glu.xlusup;
415 ((SCformat *)L->Store)->rowind = Glu.lsub;
416 ((SCformat *)L->Store)->rowind_colptr = Glu.xlsub;
417 ((NCformat *)U->Store)->nnz = nnzU;
418 ((NCformat *)U->Store)->nzval = Glu.ucol;
419 ((NCformat *)U->Store)->rowind = Glu.usub;
420 ((NCformat *)U->Store)->colptr = Glu.xusub;
421 } else {
422 zCreate_SuperNode_Matrix(L, A->nrow, min_mn, nnzL, Glu.lusup,
423 Glu.xlusup, Glu.lsub, Glu.xlsub, Glu.supno,
424 Glu.xsup, SLU_SC, SLU_Z, SLU_TRLU);
425 zCreate_CompCol_Matrix(U, min_mn, min_mn, nnzU, Glu.ucol,
426 Glu.usub, Glu.xusub, SLU_NC, SLU_Z, SLU_TRU);
427 }
428
429 ops[FACT] += ops[TRSV] + ops[GEMV];
430 stat->expansions = --(Glu.num_expansions);
431
432 if ( iperm_r_allocated ) SUPERLU_FREE (iperm_r);
433 SUPERLU_FREE (iperm_c);
434 SUPERLU_FREE (relax_end);
435
436 }
437