1 /*! \file
2 Copyright (c) 2003, The Regents of the University of California, through
3 Lawrence Berkeley National Laboratory (subject to receipt of any required
4 approvals from U.S. Dept. of Energy)
5
6 All rights reserved.
7
8 The source code is distributed under BSD license, see the file License.txt
9 at the top-level directory.
10 */
11
12 /*! @file sgsitrf.c
13 * \brief Computes an ILU factorization of a general sparse matrix
14 *
15 * <pre>
16 * -- SuperLU routine (version 4.1) --
17 * Lawrence Berkeley National Laboratory.
18 * June 30, 2009
19 *
20 * </pre>
21 */
22
23 #include "slu_sdefs.h"
24 #include "supermatrix.h"
25 #include "slu_util.h"
26
27 #ifdef DEBUG
28 int num_drop_L;
29 #endif
30
31 /*! \brief
32 *
33 * <pre>
34 * Purpose
35 * =======
36 *
37 * SGSITRF computes an ILU factorization of a general sparse m-by-n
38 * matrix A using partial pivoting with row interchanges.
39 * The factorization has the form
40 * Pr * A = L * U
41 * where Pr is a row permutation matrix, L is lower triangular with unit
42 * diagonal elements (lower trapezoidal if A->nrow > A->ncol), and U is upper
43 * triangular (upper trapezoidal if A->nrow < A->ncol).
44 *
45 * See supermatrix.h for the definition of 'SuperMatrix' structure.
46 *
47 * Arguments
48 * =========
49 *
50 * options (input) superlu_options_t*
51 * The structure defines the input parameters to control
52 * how the ILU decomposition will be performed.
53 *
54 * A (input) SuperMatrix*
55 * Original matrix A, permuted by columns, of dimension
56 * (A->nrow, A->ncol). The type of A can be:
57 * Stype = SLU_NCP; Dtype = SLU_S; Mtype = SLU_GE.
58 *
59 * relax (input) int
60 * To control degree of relaxing supernodes. If the number
61 * of nodes (columns) in a subtree of the elimination tree is less
62 * than relax, this subtree is considered as one supernode,
63 * regardless of the row structures of those columns.
64 *
65 * panel_size (input) int
66 * A panel consists of at most panel_size consecutive columns.
67 *
68 * etree (input) int*, dimension (A->ncol)
69 * Elimination tree of A'*A.
70 * Note: etree is a vector of parent pointers for a forest whose
71 * vertices are the integers 0 to A->ncol-1; etree[root]==A->ncol.
72 * On input, the columns of A should be permuted so that the
73 * etree is in a certain postorder.
74 *
75 * work (input/output) void*, size (lwork) (in bytes)
76 * User-supplied work space and space for the output data structures.
77 * Not referenced if lwork = 0;
78 *
79 * lwork (input) int
80 * Specifies the size of work array in bytes.
81 * = 0: allocate space internally by system malloc;
82 * > 0: use user-supplied work array of length lwork in bytes,
83 * returns error if space runs out.
84 * = -1: the routine guesses the amount of space needed without
85 * performing the factorization, and returns it in
86 * *info; no other side effects.
87 *
88 * perm_c (input) int*, dimension (A->ncol)
89 * Column permutation vector, which defines the
90 * permutation matrix Pc; perm_c[i] = j means column i of A is
91 * in position j in A*Pc.
92 * When searching for diagonal, perm_c[*] is applied to the
93 * row subscripts of A, so that diagonal threshold pivoting
94 * can find the diagonal of A, rather than that of A*Pc.
95 *
96 * perm_r (input/output) int*, dimension (A->nrow)
97 * Row permutation vector which defines the permutation matrix Pr,
98 * perm_r[i] = j means row i of A is in position j in Pr*A.
99 * If options->Fact = SamePattern_SameRowPerm, the pivoting routine
100 * will try to use the input perm_r, unless a certain threshold
101 * criterion is violated. In that case, perm_r is overwritten by
102 * a new permutation determined by partial pivoting or diagonal
103 * threshold pivoting.
104 * Otherwise, perm_r is output argument;
105 *
106 * L (output) SuperMatrix*
107 * The factor L from the factorization Pr*A=L*U; use compressed row
108 * subscripts storage for supernodes, i.e., L has type:
109 * Stype = SLU_SC, Dtype = SLU_S, Mtype = SLU_TRLU.
110 *
111 * U (output) SuperMatrix*
112 * The factor U from the factorization Pr*A*Pc=L*U. Use column-wise
113 * storage scheme, i.e., U has types: Stype = SLU_NC,
114 * Dtype = SLU_S, Mtype = SLU_TRU.
115 *
116 * Glu (input/output) GlobalLU_t *
117 * If options->Fact == SamePattern_SameRowPerm, it is an input;
118 * The matrix A will be factorized assuming that a
119 * factorization of a matrix with the same sparsity pattern
120 * and similar numerical values was performed prior to this one.
121 * Therefore, this factorization will reuse both row and column
122 * scaling factors R and C, both row and column permutation
123 * vectors perm_r and perm_c, and the L & U data structures
124 * set up from the previous factorization.
125 * Otherwise, it is an output.
126 *
127 * stat (output) SuperLUStat_t*
128 * Record the statistics on runtime and floating-point operation count.
129 * See slu_util.h for the definition of 'SuperLUStat_t'.
130 *
131 * info (output) int*
132 * = 0: successful exit
133 * < 0: if info = -i, the i-th argument had an illegal value
134 * > 0: if info = i, and i is
135 * <= A->ncol: number of zero pivots. They are replaced by small
136 * entries according to options->ILU_FillTol.
137 * > A->ncol: number of bytes allocated when memory allocation
138 * failure occurred, plus A->ncol. If lwork = -1, it is
139 * the estimated amount of space needed, plus A->ncol.
140 *
141 * ======================================================================
142 *
143 * Local Working Arrays:
144 * ======================
145 * m = number of rows in the matrix
146 * n = number of columns in the matrix
147 *
148 * marker[0:3*m-1]: marker[i] = j means that node i has been
149 * reached when working on column j.
150 * Storage: relative to original row subscripts
151 * NOTE: There are 4 of them:
152 * marker/marker1 are used for panel dfs, see (ilu_)dpanel_dfs.c;
153 * marker2 is used for inner-factorization, see (ilu)_dcolumn_dfs.c;
154 * marker_relax(has its own space) is used for relaxed supernodes.
155 *
156 * parent[0:m-1]: parent vector used during dfs
157 * Storage: relative to new row subscripts
158 *
159 * xplore[0:m-1]: xplore[i] gives the location of the next (dfs)
160 * unexplored neighbor of i in lsub[*]
161 *
162 * segrep[0:nseg-1]: contains the list of supernodal representatives
163 * in topological order of the dfs. A supernode representative is the
164 * last column of a supernode.
165 * The maximum size of segrep[] is n.
166 *
167 * repfnz[0:W*m-1]: for a nonzero segment U[*,j] that ends at a
168 * supernodal representative r, repfnz[r] is the location of the first
169 * nonzero in this segment. It is also used during the dfs: repfnz[r]>0
170 * indicates the supernode r has been explored.
171 * NOTE: There are W of them, each used for one column of a panel.
172 *
173 * panel_lsub[0:W*m-1]: temporary for the nonzeros row indices below
174 * the panel diagonal. These are filled in during dpanel_dfs(), and are
175 * used later in the inner LU factorization within the panel.
176 * panel_lsub[]/dense[] pair forms the SPA data structure.
177 * NOTE: There are W of them.
178 *
179 * dense[0:W*m-1]: sparse accumulating (SPA) vector for intermediate values;
180 * NOTE: there are W of them.
181 *
182 * tempv[0:*]: real temporary used for dense numeric kernels;
183 * The size of this array is defined by NUM_TEMPV() in slu_util.h.
184 * It is also used by the dropping routine ilu_ddrop_row().
185 * </pre>
186 */
187
188 void
sgsitrf(superlu_options_t * options,SuperMatrix * A,int relax,int panel_size,int * etree,void * work,int lwork,int * perm_c,int * perm_r,SuperMatrix * L,SuperMatrix * U,GlobalLU_t * Glu,SuperLUStat_t * stat,int * info)189 sgsitrf(superlu_options_t *options, SuperMatrix *A, int relax, int panel_size,
190 int *etree, void *work, int lwork, int *perm_c, int *perm_r,
191 SuperMatrix *L, SuperMatrix *U,
192 GlobalLU_t *Glu, /* persistent to facilitate multiple factorizations */
193 SuperLUStat_t *stat, int *info)
194 {
195 /* Local working arrays */
196 NCPformat *Astore;
197 int *iperm_r = NULL; /* inverse of perm_r; used when
198 options->Fact == SamePattern_SameRowPerm */
199 int *iperm_c; /* inverse of perm_c */
200 int *swap, *iswap; /* swap is used to store the row permutation
201 during the factorization. Initially, it is set
202 to iperm_c (row indeces of Pc*A*Pc').
203 iswap is the inverse of swap. After the
204 factorization, it is equal to perm_r. */
205 int *iwork;
206 float *swork;
207 int *segrep, *repfnz, *parent, *xplore;
208 int *panel_lsub; /* dense[]/panel_lsub[] pair forms a w-wide SPA */
209 int *marker, *marker_relax;
210 float *dense, *tempv;
211 int *relax_end, *relax_fsupc;
212 float *a;
213 int *asub;
214 int *xa_begin, *xa_end;
215 int *xsup, *supno;
216 int *xlsub, *xlusup, *xusub;
217 int nzlumax;
218 float *amax;
219 float drop_sum;
220 float alpha, omega; /* used in MILU, mimicing DRIC */
221 float *swork2; /* used by the second dropping rule */
222
223 /* Local scalars */
224 fact_t fact = options->Fact;
225 double diag_pivot_thresh = options->DiagPivotThresh;
226 double drop_tol = options->ILU_DropTol; /* tau */
227 double fill_ini = options->ILU_FillTol; /* tau^hat */
228 double gamma = options->ILU_FillFactor;
229 int drop_rule = options->ILU_DropRule;
230 milu_t milu = options->ILU_MILU;
231 double fill_tol;
232 int pivrow; /* pivotal row number in the original matrix A */
233 int nseg1; /* no of segments in U-column above panel row jcol */
234 int nseg; /* no of segments in each U-column */
235 register int jcol;
236 register int kcol; /* end column of a relaxed snode */
237 register int icol;
238 register int i, k, jj, new_next, iinfo;
239 int m, n, min_mn, jsupno, fsupc, nextlu, nextu;
240 int w_def; /* upper bound on panel width */
241 int usepr, iperm_r_allocated = 0;
242 int nnzL, nnzU;
243 int *panel_histo = stat->panel_histo;
244 flops_t *ops = stat->ops;
245
246 int last_drop;/* the last column which the dropping rules applied */
247 int quota;
248 int nnzAj; /* number of nonzeros in A(:,1:j) */
249 int nnzLj, nnzUj;
250 double tol_L = drop_tol, tol_U = drop_tol;
251 float zero = 0.0;
252 float one = 1.0;
253
254 /* Executable */
255 iinfo = 0;
256 m = A->nrow;
257 n = A->ncol;
258 min_mn = SUPERLU_MIN(m, n);
259 Astore = A->Store;
260 a = Astore->nzval;
261 asub = Astore->rowind;
262 xa_begin = Astore->colbeg;
263 xa_end = Astore->colend;
264
265 /* Allocate storage common to the factor routines */
266 *info = sLUMemInit(fact, work, lwork, m, n, Astore->nnz, panel_size,
267 gamma, L, U, Glu, &iwork, &swork);
268 if ( *info ) return;
269
270 xsup = Glu->xsup;
271 supno = Glu->supno;
272 xlsub = Glu->xlsub;
273 xlusup = Glu->xlusup;
274 xusub = Glu->xusub;
275
276 SetIWork(m, n, panel_size, iwork, &segrep, &parent, &xplore,
277 &repfnz, &panel_lsub, &marker_relax, &marker);
278 sSetRWork(m, panel_size, swork, &dense, &tempv);
279
280 usepr = (fact == SamePattern_SameRowPerm);
281 if ( usepr ) {
282 /* Compute the inverse of perm_r */
283 iperm_r = (int *) intMalloc(m);
284 for (k = 0; k < m; ++k) iperm_r[perm_r[k]] = k;
285 iperm_r_allocated = 1;
286 }
287
288 iperm_c = (int *) intMalloc(n);
289 for (k = 0; k < n; ++k) iperm_c[perm_c[k]] = k;
290 swap = (int *)intMalloc(n);
291 for (k = 0; k < n; k++) swap[k] = iperm_c[k];
292 iswap = (int *)intMalloc(n);
293 for (k = 0; k < n; k++) iswap[k] = perm_c[k];
294 amax = (float *) floatMalloc(panel_size);
295 if (drop_rule & DROP_SECONDARY)
296 swork2 = (float *)floatMalloc(n);
297 else
298 swork2 = NULL;
299
300 nnzAj = 0;
301 nnzLj = 0;
302 nnzUj = 0;
303 last_drop = SUPERLU_MAX(min_mn - 2 * sp_ienv(7), (int)(min_mn * 0.95));
304 alpha = pow((double)n, -1.0 / options->ILU_MILU_Dim);
305
306 /* Identify relaxed snodes */
307 relax_end = (int *) intMalloc(n);
308 relax_fsupc = (int *) intMalloc(n);
309 if ( options->SymmetricMode == YES )
310 ilu_heap_relax_snode(n, etree, relax, marker, relax_end, relax_fsupc);
311 else
312 ilu_relax_snode(n, etree, relax, marker, relax_end, relax_fsupc);
313
314 ifill (perm_r, m, EMPTY);
315 ifill (marker, m * NO_MARKER, EMPTY);
316 supno[0] = -1;
317 xsup[0] = xlsub[0] = xusub[0] = xlusup[0] = 0;
318 w_def = panel_size;
319
320 /* Mark the rows used by relaxed supernodes */
321 ifill (marker_relax, m, EMPTY);
322 i = mark_relax(m, relax_end, relax_fsupc, xa_begin, xa_end,
323 asub, marker_relax);
324 #if ( PRNTlevel >= 1)
325 printf("%d relaxed supernodes.\n", i);
326 #endif
327
328 /*
329 * Work on one "panel" at a time. A panel is one of the following:
330 * (a) a relaxed supernode at the bottom of the etree, or
331 * (b) panel_size contiguous columns, defined by the user
332 */
333 for (jcol = 0; jcol < min_mn; ) {
334
335 if ( relax_end[jcol] != EMPTY ) { /* start of a relaxed snode */
336 kcol = relax_end[jcol]; /* end of the relaxed snode */
337 panel_histo[kcol-jcol+1]++;
338
339 /* Drop small rows in the previous supernode. */
340 if (jcol > 0 && jcol < last_drop) {
341 int first = xsup[supno[jcol - 1]];
342 int last = jcol - 1;
343 int quota;
344
345 /* Compute the quota */
346 if (drop_rule & DROP_PROWS)
347 quota = gamma * Astore->nnz / m * (m - first) / m
348 * (last - first + 1);
349 else if (drop_rule & DROP_COLUMN) {
350 int i;
351 quota = 0;
352 for (i = first; i <= last; i++)
353 quota += xa_end[i] - xa_begin[i];
354 quota = gamma * quota * (m - first) / m;
355 } else if (drop_rule & DROP_AREA)
356 quota = gamma * nnzAj * (1.0 - 0.5 * (last + 1.0) / m)
357 - nnzLj;
358 else
359 quota = m * n;
360 fill_tol = pow(fill_ini, 1.0 - 0.5 * (first + last) / min_mn);
361
362 /* Drop small rows */
363 i = ilu_sdrop_row(options, first, last, tol_L, quota, &nnzLj,
364 &fill_tol, Glu, tempv, swork2, 0);
365 /* Reset the parameters */
366 if (drop_rule & DROP_DYNAMIC) {
367 if (gamma * nnzAj * (1.0 - 0.5 * (last + 1.0) / m)
368 < nnzLj)
369 tol_L = SUPERLU_MIN(1.0, tol_L * 2.0);
370 else
371 tol_L = SUPERLU_MAX(drop_tol, tol_L * 0.5);
372 }
373 if (fill_tol < 0) iinfo -= (int)fill_tol;
374 #ifdef DEBUG
375 num_drop_L += i * (last - first + 1);
376 #endif
377 }
378
379 /* --------------------------------------
380 * Factorize the relaxed supernode(jcol:kcol)
381 * -------------------------------------- */
382 /* Determine the union of the row structure of the snode */
383 if ( (*info = ilu_ssnode_dfs(jcol, kcol, asub, xa_begin, xa_end,
384 marker, Glu)) != 0 )
385 return;
386
387 nextu = xusub[jcol];
388 nextlu = xlusup[jcol];
389 jsupno = supno[jcol];
390 fsupc = xsup[jsupno];
391 new_next = nextlu + (xlsub[fsupc+1]-xlsub[fsupc])*(kcol-jcol+1);
392 nzlumax = Glu->nzlumax;
393 while ( new_next > nzlumax ) {
394 if ((*info = sLUMemXpand(jcol, nextlu, LUSUP, &nzlumax, Glu)))
395 return;
396 }
397
398 for (icol = jcol; icol <= kcol; icol++) {
399 xusub[icol+1] = nextu;
400
401 amax[0] = 0.0;
402 /* Scatter into SPA dense[*] */
403 for (k = xa_begin[icol]; k < xa_end[icol]; k++) {
404 register float tmp = fabs(a[k]);
405 if (tmp > amax[0]) amax[0] = tmp;
406 dense[asub[k]] = a[k];
407 }
408 nnzAj += xa_end[icol] - xa_begin[icol];
409 if (amax[0] == 0.0) {
410 amax[0] = fill_ini;
411 #if ( PRNTlevel >= 1)
412 printf("Column %d is entirely zero!\n", icol);
413 fflush(stdout);
414 #endif
415 }
416
417 /* Numeric update within the snode */
418 ssnode_bmod(icol, jsupno, fsupc, dense, tempv, Glu, stat);
419
420 if (usepr) pivrow = iperm_r[icol];
421 fill_tol = pow(fill_ini, 1.0 - (double)icol / (double)min_mn);
422 if ( (*info = ilu_spivotL(icol, diag_pivot_thresh, &usepr,
423 perm_r, iperm_c[icol], swap, iswap,
424 marker_relax, &pivrow,
425 amax[0] * fill_tol, milu, zero,
426 Glu, stat)) ) {
427 iinfo++;
428 marker[pivrow] = kcol;
429 }
430
431 }
432
433 jcol = kcol + 1;
434
435 } else { /* Work on one panel of panel_size columns */
436
437 /* Adjust panel_size so that a panel won't overlap with the next
438 * relaxed snode.
439 */
440 panel_size = w_def;
441 for (k = jcol + 1; k < SUPERLU_MIN(jcol+panel_size, min_mn); k++)
442 if ( relax_end[k] != EMPTY ) {
443 panel_size = k - jcol;
444 break;
445 }
446 if ( k == min_mn ) panel_size = min_mn - jcol;
447 panel_histo[panel_size]++;
448
449 /* symbolic factor on a panel of columns */
450 ilu_spanel_dfs(m, panel_size, jcol, A, perm_r, &nseg1,
451 dense, amax, panel_lsub, segrep, repfnz,
452 marker, parent, xplore, Glu);
453
454 /* numeric sup-panel updates in topological order */
455 spanel_bmod(m, panel_size, jcol, nseg1, dense,
456 tempv, segrep, repfnz, Glu, stat);
457
458 /* Sparse LU within the panel, and below panel diagonal */
459 for (jj = jcol; jj < jcol + panel_size; jj++) {
460
461 k = (jj - jcol) * m; /* column index for w-wide arrays */
462
463 nseg = nseg1; /* Begin after all the panel segments */
464
465 nnzAj += xa_end[jj] - xa_begin[jj];
466
467 if ((*info = ilu_scolumn_dfs(m, jj, perm_r, &nseg,
468 &panel_lsub[k], segrep, &repfnz[k],
469 marker, parent, xplore, Glu)))
470 return;
471
472 /* Numeric updates */
473 if ((*info = scolumn_bmod(jj, (nseg - nseg1), &dense[k],
474 tempv, &segrep[nseg1], &repfnz[k],
475 jcol, Glu, stat)) != 0) return;
476
477 /* Make a fill-in position if the column is entirely zero */
478 if (xlsub[jj + 1] == xlsub[jj]) {
479 register int i, row;
480 int nextl;
481 int nzlmax = Glu->nzlmax;
482 int *lsub = Glu->lsub;
483 int *marker2 = marker + 2 * m;
484
485 /* Allocate memory */
486 nextl = xlsub[jj] + 1;
487 if (nextl >= nzlmax) {
488 int error = sLUMemXpand(jj, nextl, LSUB, &nzlmax, Glu);
489 if (error) { *info = error; return; }
490 lsub = Glu->lsub;
491 }
492 xlsub[jj + 1]++;
493 assert(xlusup[jj]==xlusup[jj+1]);
494 xlusup[jj + 1]++;
495 ((float *) Glu->lusup)[xlusup[jj]] = zero;
496
497 /* Choose a row index (pivrow) for fill-in */
498 for (i = jj; i < n; i++)
499 if (marker_relax[swap[i]] <= jj) break;
500 row = swap[i];
501 marker2[row] = jj;
502 lsub[xlsub[jj]] = row;
503 #ifdef DEBUG
504 printf("Fill col %d.\n", jj);
505 fflush(stdout);
506 #endif
507 }
508
509 /* Computer the quota */
510 if (drop_rule & DROP_PROWS)
511 quota = gamma * Astore->nnz / m * jj / m;
512 else if (drop_rule & DROP_COLUMN)
513 quota = gamma * (xa_end[jj] - xa_begin[jj]) *
514 (jj + 1) / m;
515 else if (drop_rule & DROP_AREA)
516 quota = gamma * 0.9 * nnzAj * 0.5 - nnzUj;
517 else
518 quota = m;
519
520 /* Copy the U-segments to ucol[*] and drop small entries */
521 if ((*info = ilu_scopy_to_ucol(jj, nseg, segrep, &repfnz[k],
522 perm_r, &dense[k], drop_rule,
523 milu, amax[jj - jcol] * tol_U,
524 quota, &drop_sum, &nnzUj, Glu,
525 swork2)) != 0)
526 return;
527
528 /* Reset the dropping threshold if required */
529 if (drop_rule & DROP_DYNAMIC) {
530 if (gamma * 0.9 * nnzAj * 0.5 < nnzLj)
531 tol_U = SUPERLU_MIN(1.0, tol_U * 2.0);
532 else
533 tol_U = SUPERLU_MAX(drop_tol, tol_U * 0.5);
534 }
535
536 if (drop_sum != zero)
537 {
538 if (drop_sum > zero)
539 omega = SUPERLU_MIN(2.0 * (1.0 - alpha)
540 * amax[jj - jcol] / drop_sum, one);
541 else
542 omega = SUPERLU_MAX(2.0 * (1.0 - alpha)
543 * amax[jj - jcol] / drop_sum, -one);
544 drop_sum *= omega;
545 }
546 if (usepr) pivrow = iperm_r[jj];
547 fill_tol = pow(fill_ini, 1.0 - (double)jj / (double)min_mn);
548 if ( (*info = ilu_spivotL(jj, diag_pivot_thresh, &usepr, perm_r,
549 iperm_c[jj], swap, iswap,
550 marker_relax, &pivrow,
551 amax[jj - jcol] * fill_tol, milu,
552 drop_sum, Glu, stat)) ) {
553 iinfo++;
554 marker[m + pivrow] = jj;
555 marker[2 * m + pivrow] = jj;
556 }
557
558 /* Reset repfnz[] for this column */
559 resetrep_col (nseg, segrep, &repfnz[k]);
560
561 /* Start a new supernode, drop the previous one */
562 if (jj > 0 && supno[jj] > supno[jj - 1] && jj < last_drop) {
563 int first = xsup[supno[jj - 1]];
564 int last = jj - 1;
565 int quota;
566
567 /* Compute the quota */
568 if (drop_rule & DROP_PROWS)
569 quota = gamma * Astore->nnz / m * (m - first) / m
570 * (last - first + 1);
571 else if (drop_rule & DROP_COLUMN) {
572 int i;
573 quota = 0;
574 for (i = first; i <= last; i++)
575 quota += xa_end[i] - xa_begin[i];
576 quota = gamma * quota * (m - first) / m;
577 } else if (drop_rule & DROP_AREA)
578 quota = gamma * nnzAj * (1.0 - 0.5 * (last + 1.0)
579 / m) - nnzLj;
580 else
581 quota = m * n;
582 fill_tol = pow(fill_ini, 1.0 - 0.5 * (first + last) /
583 (double)min_mn);
584
585 /* Drop small rows */
586 i = ilu_sdrop_row(options, first, last, tol_L, quota,
587 &nnzLj, &fill_tol, Glu, tempv, swork2,
588 1);
589
590 /* Reset the parameters */
591 if (drop_rule & DROP_DYNAMIC) {
592 if (gamma * nnzAj * (1.0 - 0.5 * (last + 1.0) / m)
593 < nnzLj)
594 tol_L = SUPERLU_MIN(1.0, tol_L * 2.0);
595 else
596 tol_L = SUPERLU_MAX(drop_tol, tol_L * 0.5);
597 }
598 if (fill_tol < 0) iinfo -= (int)fill_tol;
599 #ifdef DEBUG
600 num_drop_L += i * (last - first + 1);
601 #endif
602 } /* if start a new supernode */
603
604 } /* for */
605
606 jcol += panel_size; /* Move to the next panel */
607
608 } /* else */
609
610 } /* for */
611
612 *info = iinfo;
613
614 if ( m > n ) {
615 k = 0;
616 for (i = 0; i < m; ++i)
617 if ( perm_r[i] == EMPTY ) {
618 perm_r[i] = n + k;
619 ++k;
620 }
621 }
622
623 ilu_countnz(min_mn, &nnzL, &nnzU, Glu);
624 fixupL(min_mn, perm_r, Glu);
625
626 sLUWorkFree(iwork, swork, Glu); /* Free work space and compress storage */
627
628 if ( fact == SamePattern_SameRowPerm ) {
629 /* L and U structures may have changed due to possibly different
630 pivoting, even though the storage is available.
631 There could also be memory expansions, so the array locations
632 may have changed, */
633 ((SCformat *)L->Store)->nnz = nnzL;
634 ((SCformat *)L->Store)->nsuper = Glu->supno[n];
635 ((SCformat *)L->Store)->nzval = (float *) Glu->lusup;
636 ((SCformat *)L->Store)->nzval_colptr = Glu->xlusup;
637 ((SCformat *)L->Store)->rowind = Glu->lsub;
638 ((SCformat *)L->Store)->rowind_colptr = Glu->xlsub;
639 ((NCformat *)U->Store)->nnz = nnzU;
640 ((NCformat *)U->Store)->nzval = (float *) Glu->ucol;
641 ((NCformat *)U->Store)->rowind = Glu->usub;
642 ((NCformat *)U->Store)->colptr = Glu->xusub;
643 } else {
644 sCreate_SuperNode_Matrix(L, A->nrow, min_mn, nnzL,
645 (float *) Glu->lusup, Glu->xlusup,
646 Glu->lsub, Glu->xlsub, Glu->supno, Glu->xsup,
647 SLU_SC, SLU_S, SLU_TRLU);
648 sCreate_CompCol_Matrix(U, min_mn, min_mn, nnzU,
649 (float *) Glu->ucol, Glu->usub, Glu->xusub,
650 SLU_NC, SLU_S, SLU_TRU);
651 }
652
653 ops[FACT] += ops[TRSV] + ops[GEMV];
654 stat->expansions = --(Glu->num_expansions);
655
656 if ( iperm_r_allocated ) SUPERLU_FREE (iperm_r);
657 SUPERLU_FREE (iperm_c);
658 SUPERLU_FREE (relax_end);
659 SUPERLU_FREE (swap);
660 SUPERLU_FREE (iswap);
661 SUPERLU_FREE (relax_fsupc);
662 SUPERLU_FREE (amax);
663 if ( swork2 ) SUPERLU_FREE (swork2);
664
665 }
666