1 /*! \file
2 Copyright (c) 2003, The Regents of the University of California, through
3 Lawrence Berkeley National Laboratory (subject to receipt of any required
4 approvals from U.S. Dept. of Energy)
5
6 All rights reserved.
7
8 The source code is distributed under BSD license, see the file License.txt
9 at the top-level directory.
10 */
11
12 /*! @file dgstrf.c
13 * \brief Computes an LU factorization of a general sparse matrix
14 *
15 * <pre>
16 * -- SuperLU routine (version 3.0) --
17 * Univ. of California Berkeley, Xerox Palo Alto Research Center,
18 * and Lawrence Berkeley National Lab.
19 * October 15, 2003
20 *
21 * Copyright (c) 1994 by Xerox Corporation. All rights reserved.
22 *
23 * THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY
24 * EXPRESSED OR IMPLIED. ANY USE IS AT YOUR OWN RISK.
25 *
26 * Permission is hereby granted to use or copy this program for any
27 * purpose, provided the above notices are retained on all copies.
28 * Permission to modify the code and to distribute modified code is
29 * granted, provided the above notices are retained, and a notice that
30 * the code was modified is included with the above copyright notice.
31 * </pre>
32 */
33
34
35 #include "slu_ddefs.h"
36 #include "supermatrix.h"
37 #include "slu_util.h"
38
39 /*! \brief
40 *
41 * <pre>
42 * Purpose
43 * =======
44 *
45 * DGSTRF computes an LU factorization of a general sparse m-by-n
46 * matrix A using partial pivoting with row interchanges.
47 * The factorization has the form
48 * Pr * A = L * U
49 * where Pr is a row permutation matrix, L is lower triangular with unit
50 * diagonal elements (lower trapezoidal if A->nrow > A->ncol), and U is upper
51 * triangular (upper trapezoidal if A->nrow < A->ncol).
52 *
53 * See supermatrix.h for the definition of 'SuperMatrix' structure.
54 *
55 * Arguments
56 * =========
57 *
58 * options (input) superlu_options_t*
59 * The structure defines the input parameters to control
60 * how the LU decomposition will be performed.
61 *
62 * A (input) SuperMatrix*
63 * Original matrix A, permuted by columns, of dimension
64 * (A->nrow, A->ncol). The type of A can be:
65 * Stype = SLU_NCP; Dtype = SLU_D; Mtype = SLU_GE.
66 *
67 * relax (input) int
68 * To control degree of relaxing supernodes. If the number
69 * of nodes (columns) in a subtree of the elimination tree is less
70 * than relax, this subtree is considered as one supernode,
71 * regardless of the row structures of those columns.
72 *
73 * panel_size (input) int
74 * A panel consists of at most panel_size consecutive columns.
75 *
76 * etree (input) int*, dimension (A->ncol)
77 * Elimination tree of A'*A.
78 * Note: etree is a vector of parent pointers for a forest whose
79 * vertices are the integers 0 to A->ncol-1; etree[root]==A->ncol.
80 * On input, the columns of A should be permuted so that the
81 * etree is in a certain postorder.
82 *
83 * work (input/output) void*, size (lwork) (in bytes)
84 * User-supplied work space and space for the output data structures.
85 * Not referenced if lwork = 0;
86 *
87 * lwork (input) int
88 * Specifies the size of work array in bytes.
89 * = 0: allocate space internally by system malloc;
90 * > 0: use user-supplied work array of length lwork in bytes,
91 * returns error if space runs out.
92 * = -1: the routine guesses the amount of space needed without
93 * performing the factorization, and returns it in
94 * *info; no other side effects.
95 *
96 * perm_c (input) int*, dimension (A->ncol)
97 * Column permutation vector, which defines the
98 * permutation matrix Pc; perm_c[i] = j means column i of A is
99 * in position j in A*Pc.
100 * When searching for diagonal, perm_c[*] is applied to the
101 * row subscripts of A, so that diagonal threshold pivoting
102 * can find the diagonal of A, rather than that of A*Pc.
103 *
104 * perm_r (input/output) int*, dimension (A->nrow)
105 * Row permutation vector which defines the permutation matrix Pr,
106 * perm_r[i] = j means row i of A is in position j in Pr*A.
107 * If options->Fact == SamePattern_SameRowPerm, the pivoting routine
108 * will try to use the input perm_r, unless a certain threshold
109 * criterion is violated. In that case, perm_r is overwritten by
110 * a new permutation determined by partial pivoting or diagonal
111 * threshold pivoting.
112 * Otherwise, perm_r is output argument;
113 *
114 * L (output) SuperMatrix*
115 * The factor L from the factorization Pr*A=L*U; use compressed row
116 * subscripts storage for supernodes, i.e., L has type:
117 * Stype = SLU_SC, Dtype = SLU_D, Mtype = SLU_TRLU.
118 *
119 * U (output) SuperMatrix*
120 * The factor U from the factorization Pr*A*Pc=L*U. Use column-wise
121 * storage scheme, i.e., U has types: Stype = SLU_NC,
122 * Dtype = SLU_D, Mtype = SLU_TRU.
123 *
124 * Glu (input/output) GlobalLU_t *
125 * If options->Fact == SamePattern_SameRowPerm, it is an input;
126 * The matrix A will be factorized assuming that a
127 * factorization of a matrix with the same sparsity pattern
128 * and similar numerical values was performed prior to this one.
129 * Therefore, this factorization will reuse both row and column
130 * scaling factors R and C, both row and column permutation
131 * vectors perm_r and perm_c, and the L & U data structures
132 * set up from the previous factorization.
133 * Otherwise, it is an output.
134 *
135 * stat (output) SuperLUStat_t*
136 * Record the statistics on runtime and floating-point operation count.
137 * See slu_util.h for the definition of 'SuperLUStat_t'.
138 *
139 * info (output) int*
140 * = 0: successful exit
141 * < 0: if info = -i, the i-th argument had an illegal value
142 * > 0: if info = i, and i is
143 * <= A->ncol: U(i,i) is exactly zero. The factorization has
144 * been completed, but the factor U is exactly singular,
145 * and division by zero will occur if it is used to solve a
146 * system of equations.
147 * > A->ncol: number of bytes allocated when memory allocation
148 * failure occurred, plus A->ncol. If lwork = -1, it is
149 * the estimated amount of space needed, plus A->ncol.
150 *
151 * ======================================================================
152 *
153 * Local Working Arrays:
154 * ======================
155 * m = number of rows in the matrix
156 * n = number of columns in the matrix
157 *
158 * xprune[0:n-1]: xprune[*] points to locations in subscript
159 * vector lsub[*]. For column i, xprune[i] denotes the point where
160 * structural pruning begins. I.e. only xlsub[i],..,xprune[i]-1 need
161 * to be traversed for symbolic factorization.
162 *
163 * marker[0:3*m-1]: marker[i] = j means that node i has been
164 * reached when working on column j.
165 * Storage: relative to original row subscripts
166 * NOTE: There are 3 of them: marker/marker1 are used for panel dfs,
167 * see dpanel_dfs.c; marker2 is used for inner-factorization,
168 * see dcolumn_dfs.c.
169 *
170 * parent[0:m-1]: parent vector used during dfs
171 * Storage: relative to new row subscripts
172 *
173 * xplore[0:m-1]: xplore[i] gives the location of the next (dfs)
174 * unexplored neighbor of i in lsub[*]
175 *
176 * segrep[0:nseg-1]: contains the list of supernodal representatives
177 * in topological order of the dfs. A supernode representative is the
178 * last column of a supernode.
179 * The maximum size of segrep[] is n.
180 *
181 * repfnz[0:W*m-1]: for a nonzero segment U[*,j] that ends at a
182 * supernodal representative r, repfnz[r] is the location of the first
183 * nonzero in this segment. It is also used during the dfs: repfnz[r]>0
184 * indicates the supernode r has been explored.
185 * NOTE: There are W of them, each used for one column of a panel.
186 *
187 * panel_lsub[0:W*m-1]: temporary for the nonzeros row indices below
188 * the panel diagonal. These are filled in during dpanel_dfs(), and are
189 * used later in the inner LU factorization within the panel.
190 * panel_lsub[]/dense[] pair forms the SPA data structure.
191 * NOTE: There are W of them.
192 *
193 * dense[0:W*m-1]: sparse accumulating (SPA) vector for intermediate values;
194 * NOTE: there are W of them.
195 *
196 * tempv[0:*]: real temporary used for dense numeric kernels;
197 * The size of this array is defined by NUM_TEMPV() in slu_ddefs.h.
198 * </pre>
199 */
200
201 void
dgstrf(superlu_options_t * options,SuperMatrix * A,int relax,int panel_size,int * etree,void * work,int lwork,int * perm_c,int * perm_r,SuperMatrix * L,SuperMatrix * U,GlobalLU_t * Glu,SuperLUStat_t * stat,int * info)202 dgstrf (superlu_options_t *options, SuperMatrix *A,
203 int relax, int panel_size, int *etree, void *work, int lwork,
204 int *perm_c, int *perm_r, SuperMatrix *L, SuperMatrix *U,
205 GlobalLU_t *Glu, /* persistent to facilitate multiple factorizations */
206 SuperLUStat_t *stat, int *info)
207 {
208 /* Local working arrays */
209 NCPformat *Astore;
210 int *iperm_r = NULL; /* inverse of perm_r; used when
211 options->Fact == SamePattern_SameRowPerm */
212 int *iperm_c; /* inverse of perm_c */
213 int *iwork;
214 double *dwork;
215 int *segrep, *repfnz, *parent, *xplore;
216 int *panel_lsub; /* dense[]/panel_lsub[] pair forms a w-wide SPA */
217 int *xprune;
218 int *marker;
219 double *dense, *tempv;
220 int *relax_end;
221 double *a;
222 int *asub;
223 int *xa_begin, *xa_end;
224 int *xsup, *supno;
225 int *xlsub, *xlusup, *xusub;
226 int nzlumax;
227 double fill_ratio = sp_ienv(6); /* estimated fill ratio */
228
229 /* Local scalars */
230 fact_t fact = options->Fact;
231 double diag_pivot_thresh = options->DiagPivotThresh;
232 int pivrow; /* pivotal row number in the original matrix A */
233 int nseg1; /* no of segments in U-column above panel row jcol */
234 int nseg; /* no of segments in each U-column */
235 register int jcol;
236 register int kcol; /* end column of a relaxed snode */
237 register int icol;
238 register int i, k, jj, new_next, iinfo;
239 int m, n, min_mn, jsupno, fsupc, nextlu, nextu;
240 int w_def; /* upper bound on panel width */
241 int usepr, iperm_r_allocated = 0;
242 int nnzL, nnzU;
243 int *panel_histo = stat->panel_histo;
244 flops_t *ops = stat->ops;
245
246 iinfo = 0;
247 m = A->nrow;
248 n = A->ncol;
249 min_mn = SUPERLU_MIN(m, n);
250 Astore = A->Store;
251 a = Astore->nzval;
252 asub = Astore->rowind;
253 xa_begin = Astore->colbeg;
254 xa_end = Astore->colend;
255
256 /* Allocate storage common to the factor routines */
257 *info = dLUMemInit(fact, work, lwork, m, n, Astore->nnz,
258 panel_size, fill_ratio, L, U, Glu, &iwork, &dwork);
259 if ( *info ) return;
260
261 xsup = Glu->xsup;
262 supno = Glu->supno;
263 xlsub = Glu->xlsub;
264 xlusup = Glu->xlusup;
265 xusub = Glu->xusub;
266
267 SetIWork(m, n, panel_size, iwork, &segrep, &parent, &xplore,
268 &repfnz, &panel_lsub, &xprune, &marker);
269 dSetRWork(m, panel_size, dwork, &dense, &tempv);
270
271 usepr = (fact == SamePattern_SameRowPerm);
272 if ( usepr ) {
273 /* Compute the inverse of perm_r */
274 iperm_r = (int *) intMalloc(m);
275 for (k = 0; k < m; ++k) iperm_r[perm_r[k]] = k;
276 iperm_r_allocated = 1;
277 }
278 iperm_c = (int *) intMalloc(n);
279 for (k = 0; k < n; ++k) iperm_c[perm_c[k]] = k;
280
281 /* Identify relaxed snodes */
282 relax_end = (int *) intMalloc(n);
283 if ( options->SymmetricMode == YES ) {
284 heap_relax_snode(n, etree, relax, marker, relax_end);
285 } else {
286 relax_snode(n, etree, relax, marker, relax_end);
287 }
288
289 ifill (perm_r, m, EMPTY);
290 ifill (marker, m * NO_MARKER, EMPTY);
291 supno[0] = -1;
292 xsup[0] = xlsub[0] = xusub[0] = xlusup[0] = 0;
293 w_def = panel_size;
294
295 /*
296 * Work on one "panel" at a time. A panel is one of the following:
297 * (a) a relaxed supernode at the bottom of the etree, or
298 * (b) panel_size contiguous columns, defined by the user
299 */
300 for (jcol = 0; jcol < min_mn; ) {
301
302 if ( relax_end[jcol] != EMPTY ) { /* start of a relaxed snode */
303 kcol = relax_end[jcol]; /* end of the relaxed snode */
304 panel_histo[kcol-jcol+1]++;
305
306 /* --------------------------------------
307 * Factorize the relaxed supernode(jcol:kcol)
308 * -------------------------------------- */
309 /* Determine the union of the row structure of the snode */
310 if ( (*info = dsnode_dfs(jcol, kcol, asub, xa_begin, xa_end,
311 xprune, marker, Glu)) != 0 )
312 return;
313
314 nextu = xusub[jcol];
315 nextlu = xlusup[jcol];
316 jsupno = supno[jcol];
317 fsupc = xsup[jsupno];
318 new_next = nextlu + (xlsub[fsupc+1]-xlsub[fsupc])*(kcol-jcol+1);
319 nzlumax = Glu->nzlumax;
320 while ( new_next > nzlumax ) {
321 if ( (*info = dLUMemXpand(jcol, nextlu, LUSUP, &nzlumax, Glu)) )
322 return;
323 }
324
325 for (icol = jcol; icol<= kcol; icol++) {
326 xusub[icol+1] = nextu;
327
328 /* Scatter into SPA dense[*] */
329 for (k = xa_begin[icol]; k < xa_end[icol]; k++)
330 dense[asub[k]] = a[k];
331
332 /* Numeric update within the snode */
333 dsnode_bmod(icol, jsupno, fsupc, dense, tempv, Glu, stat);
334
335 if ( (*info = dpivotL(icol, diag_pivot_thresh, &usepr, perm_r,
336 iperm_r, iperm_c, &pivrow, Glu, stat)) )
337 if ( iinfo == 0 ) iinfo = *info;
338
339 #ifdef DEBUG
340 dprint_lu_col("[1]: ", icol, pivrow, xprune, Glu);
341 #endif
342
343 }
344
345 jcol = icol;
346
347 } else { /* Work on one panel of panel_size columns */
348
349 /* Adjust panel_size so that a panel won't overlap with the next
350 * relaxed snode.
351 */
352 panel_size = w_def;
353 for (k = jcol + 1; k < SUPERLU_MIN(jcol+panel_size, min_mn); k++)
354 if ( relax_end[k] != EMPTY ) {
355 panel_size = k - jcol;
356 break;
357 }
358 if ( k == min_mn ) panel_size = min_mn - jcol;
359 panel_histo[panel_size]++;
360
361 /* symbolic factor on a panel of columns */
362 dpanel_dfs(m, panel_size, jcol, A, perm_r, &nseg1,
363 dense, panel_lsub, segrep, repfnz, xprune,
364 marker, parent, xplore, Glu);
365
366 /* numeric sup-panel updates in topological order */
367 dpanel_bmod(m, panel_size, jcol, nseg1, dense,
368 tempv, segrep, repfnz, Glu, stat);
369
370 /* Sparse LU within the panel, and below panel diagonal */
371 for ( jj = jcol; jj < jcol + panel_size; jj++) {
372 k = (jj - jcol) * m; /* column index for w-wide arrays */
373
374 nseg = nseg1; /* Begin after all the panel segments */
375
376 if ((*info = dcolumn_dfs(m, jj, perm_r, &nseg, &panel_lsub[k],
377 segrep, &repfnz[k], xprune, marker,
378 parent, xplore, Glu)) != 0) return;
379
380 /* Numeric updates */
381 if ((*info = dcolumn_bmod(jj, (nseg - nseg1), &dense[k],
382 tempv, &segrep[nseg1], &repfnz[k],
383 jcol, Glu, stat)) != 0) return;
384
385 /* Copy the U-segments to ucol[*] */
386 if ((*info = dcopy_to_ucol(jj, nseg, segrep, &repfnz[k],
387 perm_r, &dense[k], Glu)) != 0)
388 return;
389
390 if ( (*info = dpivotL(jj, diag_pivot_thresh, &usepr, perm_r,
391 iperm_r, iperm_c, &pivrow, Glu, stat)) )
392 if ( iinfo == 0 ) iinfo = *info;
393
394 /* Prune columns (0:jj-1) using column jj */
395 dpruneL(jj, perm_r, pivrow, nseg, segrep,
396 &repfnz[k], xprune, Glu);
397
398 /* Reset repfnz[] for this column */
399 resetrep_col (nseg, segrep, &repfnz[k]);
400
401 #ifdef DEBUG
402 dprint_lu_col("[2]: ", jj, pivrow, xprune, Glu);
403 #endif
404
405 }
406
407 jcol += panel_size; /* Move to the next panel */
408
409 } /* else */
410
411 } /* for */
412
413 *info = iinfo;
414
415 if ( m > n ) {
416 k = 0;
417 for (i = 0; i < m; ++i)
418 if ( perm_r[i] == EMPTY ) {
419 perm_r[i] = n + k;
420 ++k;
421 }
422 }
423
424 countnz(min_mn, xprune, &nnzL, &nnzU, Glu);
425 fixupL(min_mn, perm_r, Glu);
426
427 dLUWorkFree(iwork, dwork, Glu); /* Free work space and compress storage */
428
429 if ( fact == SamePattern_SameRowPerm ) {
430 /* L and U structures may have changed due to possibly different
431 pivoting, even though the storage is available.
432 There could also be memory expansions, so the array locations
433 may have changed, */
434 ((SCformat *)L->Store)->nnz = nnzL;
435 ((SCformat *)L->Store)->nsuper = Glu->supno[n];
436 ((SCformat *)L->Store)->nzval = (double *) Glu->lusup;
437 ((SCformat *)L->Store)->nzval_colptr = Glu->xlusup;
438 ((SCformat *)L->Store)->rowind = Glu->lsub;
439 ((SCformat *)L->Store)->rowind_colptr = Glu->xlsub;
440 ((NCformat *)U->Store)->nnz = nnzU;
441 ((NCformat *)U->Store)->nzval = (double *) Glu->ucol;
442 ((NCformat *)U->Store)->rowind = Glu->usub;
443 ((NCformat *)U->Store)->colptr = Glu->xusub;
444 } else {
445 dCreate_SuperNode_Matrix(L, A->nrow, min_mn, nnzL,
446 (double *) Glu->lusup, Glu->xlusup,
447 Glu->lsub, Glu->xlsub, Glu->supno, Glu->xsup,
448 SLU_SC, SLU_D, SLU_TRLU);
449 dCreate_CompCol_Matrix(U, min_mn, min_mn, nnzU,
450 (double *) Glu->ucol, Glu->usub, Glu->xusub,
451 SLU_NC, SLU_D, SLU_TRU);
452 }
453
454 ops[FACT] += ops[TRSV] + ops[GEMV];
455 stat->expansions = --(Glu->num_expansions);
456
457 if ( iperm_r_allocated ) SUPERLU_FREE (iperm_r);
458 SUPERLU_FREE (iperm_c);
459 SUPERLU_FREE (relax_end);
460
461 }
462