1 /*! \file
2 Copyright (c) 2003, The Regents of the University of California, through
3 Lawrence Berkeley National Laboratory (subject to receipt of any required
4 approvals from U.S. Dept. of Energy)
5
6 All rights reserved.
7
8 The source code is distributed under BSD license, see the file License.txt
9 at the top-level directory.
10 */
11
12 /*! @file cgstrf.c
13 * \brief Computes an LU factorization of a general sparse matrix
14 *
15 * <pre>
16 * -- SuperLU routine (version 3.0) --
17 * Univ. of California Berkeley, Xerox Palo Alto Research Center,
18 * and Lawrence Berkeley National Lab.
19 * October 15, 2003
20 *
21 * Copyright (c) 1994 by Xerox Corporation. All rights reserved.
22 *
23 * THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY
24 * EXPRESSED OR IMPLIED. ANY USE IS AT YOUR OWN RISK.
25 *
26 * Permission is hereby granted to use or copy this program for any
27 * purpose, provided the above notices are retained on all copies.
28 * Permission to modify the code and to distribute modified code is
29 * granted, provided the above notices are retained, and a notice that
30 * the code was modified is included with the above copyright notice.
31 * </pre>
32 */
33
34
35 #include "slu_cdefs.h"
36
37 /*! \brief
38 *
39 * <pre>
40 * Purpose
41 * =======
42 *
43 * CGSTRF computes an LU factorization of a general sparse m-by-n
44 * matrix A using partial pivoting with row interchanges.
45 * The factorization has the form
46 * Pr * A = L * U
47 * where Pr is a row permutation matrix, L is lower triangular with unit
48 * diagonal elements (lower trapezoidal if A->nrow > A->ncol), and U is upper
49 * triangular (upper trapezoidal if A->nrow < A->ncol).
50 *
51 * See supermatrix.h for the definition of 'SuperMatrix' structure.
52 *
53 * Arguments
54 * =========
55 *
56 * options (input) superlu_options_t*
57 * The structure defines the input parameters to control
58 * how the LU decomposition will be performed.
59 *
60 * A (input) SuperMatrix*
61 * Original matrix A, permuted by columns, of dimension
62 * (A->nrow, A->ncol). The type of A can be:
63 * Stype = SLU_NCP; Dtype = SLU_C; Mtype = SLU_GE.
64 *
65 * relax (input) int
66 * To control degree of relaxing supernodes. If the number
67 * of nodes (columns) in a subtree of the elimination tree is less
68 * than relax, this subtree is considered as one supernode,
69 * regardless of the row structures of those columns.
70 *
71 * panel_size (input) int
72 * A panel consists of at most panel_size consecutive columns.
73 *
74 * etree (input) int*, dimension (A->ncol)
75 * Elimination tree of A'*A.
76 * Note: etree is a vector of parent pointers for a forest whose
77 * vertices are the integers 0 to A->ncol-1; etree[root]==A->ncol.
78 * On input, the columns of A should be permuted so that the
79 * etree is in a certain postorder.
80 *
81 * work (input/output) void*, size (lwork) (in bytes)
82 * User-supplied work space and space for the output data structures.
83 * Not referenced if lwork = 0;
84 *
85 * lwork (input) int
86 * Specifies the size of work array in bytes.
87 * = 0: allocate space internally by system malloc;
88 * > 0: use user-supplied work array of length lwork in bytes,
89 * returns error if space runs out.
90 * = -1: the routine guesses the amount of space needed without
91 * performing the factorization, and returns it in
92 * *info; no other side effects.
93 *
94 * perm_c (input) int*, dimension (A->ncol)
95 * Column permutation vector, which defines the
96 * permutation matrix Pc; perm_c[i] = j means column i of A is
97 * in position j in A*Pc.
98 * When searching for diagonal, perm_c[*] is applied to the
99 * row subscripts of A, so that diagonal threshold pivoting
100 * can find the diagonal of A, rather than that of A*Pc.
101 *
102 * perm_r (input/output) int*, dimension (A->nrow)
103 * Row permutation vector which defines the permutation matrix Pr,
104 * perm_r[i] = j means row i of A is in position j in Pr*A.
105 * If options->Fact == SamePattern_SameRowPerm, the pivoting routine
106 * will try to use the input perm_r, unless a certain threshold
107 * criterion is violated. In that case, perm_r is overwritten by
108 * a new permutation determined by partial pivoting or diagonal
109 * threshold pivoting.
110 * Otherwise, perm_r is output argument;
111 *
112 * L (output) SuperMatrix*
113 * The factor L from the factorization Pr*A=L*U; use compressed row
114 * subscripts storage for supernodes, i.e., L has type:
115 * Stype = SLU_SC, Dtype = SLU_C, Mtype = SLU_TRLU.
116 *
117 * U (output) SuperMatrix*
118 * The factor U from the factorization Pr*A*Pc=L*U. Use column-wise
119 * storage scheme, i.e., U has types: Stype = SLU_NC,
120 * Dtype = SLU_C, Mtype = SLU_TRU.
121 *
122 * Glu (input/output) GlobalLU_t *
123 * If options->Fact == SamePattern_SameRowPerm, it is an input;
124 * The matrix A will be factorized assuming that a
125 * factorization of a matrix with the same sparsity pattern
126 * and similar numerical values was performed prior to this one.
127 * Therefore, this factorization will reuse both row and column
128 * scaling factors R and C, both row and column permutation
129 * vectors perm_r and perm_c, and the L & U data structures
130 * set up from the previous factorization.
131 * Otherwise, it is an output.
132 *
133 * stat (output) SuperLUStat_t*
134 * Record the statistics on runtime and floating-point operation count.
135 * See slu_util.h for the definition of 'SuperLUStat_t'.
136 *
137 * info (output) int*
138 * = 0: successful exit
139 * < 0: if info = -i, the i-th argument had an illegal value
140 * > 0: if info = i, and i is
141 * <= A->ncol: U(i,i) is exactly zero. The factorization has
142 * been completed, but the factor U is exactly singular,
143 * and division by zero will occur if it is used to solve a
144 * system of equations.
145 * > A->ncol: number of bytes allocated when memory allocation
146 * failure occurred, plus A->ncol. If lwork = -1, it is
147 * the estimated amount of space needed, plus A->ncol.
148 *
149 * ======================================================================
150 *
151 * Local Working Arrays:
152 * ======================
153 * m = number of rows in the matrix
154 * n = number of columns in the matrix
155 *
156 * xprune[0:n-1]: xprune[*] points to locations in subscript
157 * vector lsub[*]. For column i, xprune[i] denotes the point where
158 * structural pruning begins. I.e. only xlsub[i],..,xprune[i]-1 need
159 * to be traversed for symbolic factorization.
160 *
161 * marker[0:3*m-1]: marker[i] = j means that node i has been
162 * reached when working on column j.
163 * Storage: relative to original row subscripts
164 * NOTE: There are 3 of them: marker/marker1 are used for panel dfs,
165 * see cpanel_dfs.c; marker2 is used for inner-factorization,
166 * see ccolumn_dfs.c.
167 *
168 * parent[0:m-1]: parent vector used during dfs
169 * Storage: relative to new row subscripts
170 *
171 * xplore[0:m-1]: xplore[i] gives the location of the next (dfs)
172 * unexplored neighbor of i in lsub[*]
173 *
174 * segrep[0:nseg-1]: contains the list of supernodal representatives
175 * in topological order of the dfs. A supernode representative is the
176 * last column of a supernode.
177 * The maximum size of segrep[] is n.
178 *
179 * repfnz[0:W*m-1]: for a nonzero segment U[*,j] that ends at a
180 * supernodal representative r, repfnz[r] is the location of the first
181 * nonzero in this segment. It is also used during the dfs: repfnz[r]>0
182 * indicates the supernode r has been explored.
183 * NOTE: There are W of them, each used for one column of a panel.
184 *
185 * panel_lsub[0:W*m-1]: temporary for the nonzeros row indices below
186 * the panel diagonal. These are filled in during cpanel_dfs(), and are
187 * used later in the inner LU factorization within the panel.
188 * panel_lsub[]/dense[] pair forms the SPA data structure.
189 * NOTE: There are W of them.
190 *
191 * dense[0:W*m-1]: sparse accumulating (SPA) vector for intermediate values;
192 * NOTE: there are W of them.
193 *
194 * tempv[0:*]: real temporary used for dense numeric kernels;
195 * The size of this array is defined by NUM_TEMPV() in slu_cdefs.h.
196 * </pre>
197 */
198
199 void
cgstrf(superlu_options_t * options,SuperMatrix * A,int relax,int panel_size,int * etree,void * work,int lwork,int * perm_c,int * perm_r,SuperMatrix * L,SuperMatrix * U,GlobalLU_t * Glu,SuperLUStat_t * stat,int * info)200 cgstrf (superlu_options_t *options, SuperMatrix *A,
201 int relax, int panel_size, int *etree, void *work, int lwork,
202 int *perm_c, int *perm_r, SuperMatrix *L, SuperMatrix *U,
203 GlobalLU_t *Glu, /* persistent to facilitate multiple factorizations */
204 SuperLUStat_t *stat, int *info)
205 {
206 /* Local working arrays */
207 NCPformat *Astore;
208 int *iperm_r = NULL; /* inverse of perm_r; used when
209 options->Fact == SamePattern_SameRowPerm */
210 int *iperm_c; /* inverse of perm_c */
211 int *iwork;
212 complex *cwork;
213 int *segrep, *repfnz, *parent, *xplore;
214 int *panel_lsub; /* dense[]/panel_lsub[] pair forms a w-wide SPA */
215 int *xprune;
216 int *marker;
217 complex *dense, *tempv;
218 int *relax_end;
219 complex *a;
220 int *asub;
221 int *xa_begin, *xa_end;
222 int *xsup, *supno;
223 int *xlsub, *xlusup, *xusub;
224 int nzlumax;
225 float fill_ratio = sp_ienv(6); /* estimated fill ratio */
226
227 /* Local scalars */
228 fact_t fact = options->Fact;
229 double diag_pivot_thresh = options->DiagPivotThresh;
230 int pivrow; /* pivotal row number in the original matrix A */
231 int nseg1; /* no of segments in U-column above panel row jcol */
232 int nseg; /* no of segments in each U-column */
233 register int jcol;
234 register int kcol; /* end column of a relaxed snode */
235 register int icol;
236 register int i, k, jj, new_next, iinfo;
237 int m, n, min_mn, jsupno, fsupc, nextlu, nextu;
238 int w_def; /* upper bound on panel width */
239 int usepr, iperm_r_allocated = 0;
240 int nnzL, nnzU;
241 int *panel_histo = stat->panel_histo;
242 flops_t *ops = stat->ops;
243
244 iinfo = 0;
245 m = A->nrow;
246 n = A->ncol;
247 min_mn = SUPERLU_MIN(m, n);
248 Astore = A->Store;
249 a = Astore->nzval;
250 asub = Astore->rowind;
251 xa_begin = Astore->colbeg;
252 xa_end = Astore->colend;
253
254 /* Allocate storage common to the factor routines */
255 *info = cLUMemInit(fact, work, lwork, m, n, Astore->nnz,
256 panel_size, fill_ratio, L, U, Glu, &iwork, &cwork);
257 if ( *info ) return;
258
259 xsup = Glu->xsup;
260 supno = Glu->supno;
261 xlsub = Glu->xlsub;
262 xlusup = Glu->xlusup;
263 xusub = Glu->xusub;
264
265 SetIWork(m, n, panel_size, iwork, &segrep, &parent, &xplore,
266 &repfnz, &panel_lsub, &xprune, &marker);
267 cSetRWork(m, panel_size, cwork, &dense, &tempv);
268
269 usepr = (fact == SamePattern_SameRowPerm);
270 if ( usepr ) {
271 /* Compute the inverse of perm_r */
272 iperm_r = (int *) intMalloc(m);
273 for (k = 0; k < m; ++k) iperm_r[perm_r[k]] = k;
274 iperm_r_allocated = 1;
275 }
276 iperm_c = (int *) intMalloc(n);
277 for (k = 0; k < n; ++k) iperm_c[perm_c[k]] = k;
278
279 /* Identify relaxed snodes */
280 relax_end = (int *) intMalloc(n);
281 if ( options->SymmetricMode == YES ) {
282 heap_relax_snode(n, etree, relax, marker, relax_end);
283 } else {
284 relax_snode(n, etree, relax, marker, relax_end);
285 }
286
287 ifill (perm_r, m, EMPTY);
288 ifill (marker, m * NO_MARKER, EMPTY);
289 supno[0] = -1;
290 xsup[0] = xlsub[0] = xusub[0] = xlusup[0] = 0;
291 w_def = panel_size;
292
293 /*
294 * Work on one "panel" at a time. A panel is one of the following:
295 * (a) a relaxed supernode at the bottom of the etree, or
296 * (b) panel_size contiguous columns, defined by the user
297 */
298 for (jcol = 0; jcol < min_mn; ) {
299
300 if ( relax_end[jcol] != EMPTY ) { /* start of a relaxed snode */
301 kcol = relax_end[jcol]; /* end of the relaxed snode */
302 panel_histo[kcol-jcol+1]++;
303
304 /* --------------------------------------
305 * Factorize the relaxed supernode(jcol:kcol)
306 * -------------------------------------- */
307 /* Determine the union of the row structure of the snode */
308 if ( (*info = csnode_dfs(jcol, kcol, asub, xa_begin, xa_end,
309 xprune, marker, Glu)) != 0 )
310 return;
311
312 nextu = xusub[jcol];
313 nextlu = xlusup[jcol];
314 jsupno = supno[jcol];
315 fsupc = xsup[jsupno];
316 new_next = nextlu + (xlsub[fsupc+1]-xlsub[fsupc])*(kcol-jcol+1);
317 nzlumax = Glu->nzlumax;
318 while ( new_next > nzlumax ) {
319 if ( (*info = cLUMemXpand(jcol, nextlu, LUSUP, &nzlumax, Glu)) )
320 return;
321 }
322
323 for (icol = jcol; icol<= kcol; icol++) {
324 xusub[icol+1] = nextu;
325
326 /* Scatter into SPA dense[*] */
327 for (k = xa_begin[icol]; k < xa_end[icol]; k++)
328 dense[asub[k]] = a[k];
329
330 /* Numeric update within the snode */
331 csnode_bmod(icol, jsupno, fsupc, dense, tempv, Glu, stat);
332
333 if ( (*info = cpivotL(icol, diag_pivot_thresh, &usepr, perm_r,
334 iperm_r, iperm_c, &pivrow, Glu, stat)) )
335 if ( iinfo == 0 ) iinfo = *info;
336
337 #ifdef DEBUG
338 cprint_lu_col("[1]: ", icol, pivrow, xprune, Glu);
339 #endif
340
341 }
342
343 jcol = icol;
344
345 } else { /* Work on one panel of panel_size columns */
346
347 /* Adjust panel_size so that a panel won't overlap with the next
348 * relaxed snode.
349 */
350 panel_size = w_def;
351 for (k = jcol + 1; k < SUPERLU_MIN(jcol+panel_size, min_mn); k++)
352 if ( relax_end[k] != EMPTY ) {
353 panel_size = k - jcol;
354 break;
355 }
356 if ( k == min_mn ) panel_size = min_mn - jcol;
357 panel_histo[panel_size]++;
358
359 /* symbolic factor on a panel of columns */
360 cpanel_dfs(m, panel_size, jcol, A, perm_r, &nseg1,
361 dense, panel_lsub, segrep, repfnz, xprune,
362 marker, parent, xplore, Glu);
363
364 /* numeric sup-panel updates in topological order */
365 cpanel_bmod(m, panel_size, jcol, nseg1, dense,
366 tempv, segrep, repfnz, Glu, stat);
367
368 /* Sparse LU within the panel, and below panel diagonal */
369 for ( jj = jcol; jj < jcol + panel_size; jj++) {
370 k = (jj - jcol) * m; /* column index for w-wide arrays */
371
372 nseg = nseg1; /* Begin after all the panel segments */
373
374 if ((*info = ccolumn_dfs(m, jj, perm_r, &nseg, &panel_lsub[k],
375 segrep, &repfnz[k], xprune, marker,
376 parent, xplore, Glu)) != 0) return;
377
378 /* Numeric updates */
379 if ((*info = ccolumn_bmod(jj, (nseg - nseg1), &dense[k],
380 tempv, &segrep[nseg1], &repfnz[k],
381 jcol, Glu, stat)) != 0) return;
382
383 /* Copy the U-segments to ucol[*] */
384 if ((*info = ccopy_to_ucol(jj, nseg, segrep, &repfnz[k],
385 perm_r, &dense[k], Glu)) != 0)
386 return;
387
388 if ( (*info = cpivotL(jj, diag_pivot_thresh, &usepr, perm_r,
389 iperm_r, iperm_c, &pivrow, Glu, stat)) )
390 if ( iinfo == 0 ) iinfo = *info;
391
392 /* Prune columns (0:jj-1) using column jj */
393 cpruneL(jj, perm_r, pivrow, nseg, segrep,
394 &repfnz[k], xprune, Glu);
395
396 /* Reset repfnz[] for this column */
397 resetrep_col (nseg, segrep, &repfnz[k]);
398
399 #ifdef DEBUG
400 cprint_lu_col("[2]: ", jj, pivrow, xprune, Glu);
401 #endif
402
403 }
404
405 jcol += panel_size; /* Move to the next panel */
406
407 } /* else */
408
409 } /* for */
410
411 *info = iinfo;
412
413 if ( m > n ) {
414 k = 0;
415 for (i = 0; i < m; ++i)
416 if ( perm_r[i] == EMPTY ) {
417 perm_r[i] = n + k;
418 ++k;
419 }
420 }
421
422 countnz(min_mn, xprune, &nnzL, &nnzU, Glu);
423 fixupL(min_mn, perm_r, Glu);
424
425 cLUWorkFree(iwork, cwork, Glu); /* Free work space and compress storage */
426
427 if ( fact == SamePattern_SameRowPerm ) {
428 /* L and U structures may have changed due to possibly different
429 pivoting, even though the storage is available.
430 There could also be memory expansions, so the array locations
431 may have changed, */
432 ((SCformat *)L->Store)->nnz = nnzL;
433 ((SCformat *)L->Store)->nsuper = Glu->supno[n];
434 ((SCformat *)L->Store)->nzval = (complex *) Glu->lusup;
435 ((SCformat *)L->Store)->nzval_colptr = Glu->xlusup;
436 ((SCformat *)L->Store)->rowind = Glu->lsub;
437 ((SCformat *)L->Store)->rowind_colptr = Glu->xlsub;
438 ((NCformat *)U->Store)->nnz = nnzU;
439 ((NCformat *)U->Store)->nzval = (complex *) Glu->ucol;
440 ((NCformat *)U->Store)->rowind = Glu->usub;
441 ((NCformat *)U->Store)->colptr = Glu->xusub;
442 } else {
443 cCreate_SuperNode_Matrix(L, A->nrow, min_mn, nnzL,
444 (complex *) Glu->lusup, Glu->xlusup,
445 Glu->lsub, Glu->xlsub, Glu->supno, Glu->xsup,
446 SLU_SC, SLU_C, SLU_TRLU);
447 cCreate_CompCol_Matrix(U, min_mn, min_mn, nnzU,
448 (complex *) Glu->ucol, Glu->usub, Glu->xusub,
449 SLU_NC, SLU_C, SLU_TRU);
450 }
451
452 ops[FACT] += ops[TRSV] + ops[GEMV];
453 stat->expansions = --(Glu->num_expansions);
454
455 if ( iperm_r_allocated ) SUPERLU_FREE (iperm_r);
456 SUPERLU_FREE (iperm_c);
457 SUPERLU_FREE (relax_end);
458
459 }
460