1 /*! \file
2 Copyright (c) 2003, The Regents of the University of California, through
3 Lawrence Berkeley National Laboratory (subject to receipt of any required
4 approvals from U.S. Dept. of Energy)
5
6 All rights reserved.
7
8 The source code is distributed under BSD license, see the file License.txt
9 at the top-level directory.
10 */
11
12 /*! @file sgsitrf.c
13 * \brief Computes an ILU factorization of a general sparse matrix
14 *
15 * <pre>
16 * -- SuperLU routine (version 4.1) --
17 * Lawrence Berkeley National Laboratory.
18 * June 30, 2009
19 *
20 * </pre>
21 */
22
23 #include "slu_sdefs.h"
24
25 #ifdef DEBUG
26 int num_drop_L;
27 #endif
28
29 /*! \brief
30 *
31 * <pre>
32 * Purpose
33 * =======
34 *
35 * SGSITRF computes an ILU factorization of a general sparse m-by-n
36 * matrix A using partial pivoting with row interchanges.
37 * The factorization has the form
38 * Pr * A = L * U
39 * where Pr is a row permutation matrix, L is lower triangular with unit
40 * diagonal elements (lower trapezoidal if A->nrow > A->ncol), and U is upper
41 * triangular (upper trapezoidal if A->nrow < A->ncol).
42 *
43 * See supermatrix.h for the definition of 'SuperMatrix' structure.
44 *
45 * Arguments
46 * =========
47 *
48 * options (input) superlu_options_t*
49 * The structure defines the input parameters to control
50 * how the ILU decomposition will be performed.
51 *
52 * A (input) SuperMatrix*
53 * Original matrix A, permuted by columns, of dimension
54 * (A->nrow, A->ncol). The type of A can be:
55 * Stype = SLU_NCP; Dtype = SLU_S; Mtype = SLU_GE.
56 *
57 * relax (input) int
58 * To control degree of relaxing supernodes. If the number
59 * of nodes (columns) in a subtree of the elimination tree is less
60 * than relax, this subtree is considered as one supernode,
61 * regardless of the row structures of those columns.
62 *
63 * panel_size (input) int
64 * A panel consists of at most panel_size consecutive columns.
65 *
66 * etree (input) int*, dimension (A->ncol)
67 * Elimination tree of A'*A.
68 * Note: etree is a vector of parent pointers for a forest whose
69 * vertices are the integers 0 to A->ncol-1; etree[root]==A->ncol.
70 * On input, the columns of A should be permuted so that the
71 * etree is in a certain postorder.
72 *
73 * work (input/output) void*, size (lwork) (in bytes)
74 * User-supplied work space and space for the output data structures.
75 * Not referenced if lwork = 0;
76 *
77 * lwork (input) int
78 * Specifies the size of work array in bytes.
79 * = 0: allocate space internally by system malloc;
80 * > 0: use user-supplied work array of length lwork in bytes,
81 * returns error if space runs out.
82 * = -1: the routine guesses the amount of space needed without
83 * performing the factorization, and returns it in
84 * *info; no other side effects.
85 *
86 * perm_c (input) int*, dimension (A->ncol)
87 * Column permutation vector, which defines the
88 * permutation matrix Pc; perm_c[i] = j means column i of A is
89 * in position j in A*Pc.
90 * When searching for diagonal, perm_c[*] is applied to the
91 * row subscripts of A, so that diagonal threshold pivoting
92 * can find the diagonal of A, rather than that of A*Pc.
93 *
94 * perm_r (input/output) int*, dimension (A->nrow)
95 * Row permutation vector which defines the permutation matrix Pr,
96 * perm_r[i] = j means row i of A is in position j in Pr*A.
97 * If options->Fact = SamePattern_SameRowPerm, the pivoting routine
98 * will try to use the input perm_r, unless a certain threshold
99 * criterion is violated. In that case, perm_r is overwritten by
100 * a new permutation determined by partial pivoting or diagonal
101 * threshold pivoting.
102 * Otherwise, perm_r is output argument;
103 *
104 * L (output) SuperMatrix*
105 * The factor L from the factorization Pr*A=L*U; use compressed row
106 * subscripts storage for supernodes, i.e., L has type:
107 * Stype = SLU_SC, Dtype = SLU_S, Mtype = SLU_TRLU.
108 *
109 * U (output) SuperMatrix*
110 * The factor U from the factorization Pr*A*Pc=L*U. Use column-wise
111 * storage scheme, i.e., U has types: Stype = SLU_NC,
112 * Dtype = SLU_S, Mtype = SLU_TRU.
113 *
114 * Glu (input/output) GlobalLU_t *
115 * If options->Fact == SamePattern_SameRowPerm, it is an input;
116 * The matrix A will be factorized assuming that a
117 * factorization of a matrix with the same sparsity pattern
118 * and similar numerical values was performed prior to this one.
119 * Therefore, this factorization will reuse both row and column
120 * scaling factors R and C, both row and column permutation
121 * vectors perm_r and perm_c, and the L & U data structures
122 * set up from the previous factorization.
123 * Otherwise, it is an output.
124 *
125 * stat (output) SuperLUStat_t*
126 * Record the statistics on runtime and floating-point operation count.
127 * See slu_util.h for the definition of 'SuperLUStat_t'.
128 *
129 * info (output) int*
130 * = 0: successful exit
131 * < 0: if info = -i, the i-th argument had an illegal value
132 * > 0: if info = i, and i is
133 * <= A->ncol: number of zero pivots. They are replaced by small
134 * entries according to options->ILU_FillTol.
135 * > A->ncol: number of bytes allocated when memory allocation
136 * failure occurred, plus A->ncol. If lwork = -1, it is
137 * the estimated amount of space needed, plus A->ncol.
138 *
139 * ======================================================================
140 *
141 * Local Working Arrays:
142 * ======================
143 * m = number of rows in the matrix
144 * n = number of columns in the matrix
145 *
146 * marker[0:3*m-1]: marker[i] = j means that node i has been
147 * reached when working on column j.
148 * Storage: relative to original row subscripts
149 * NOTE: There are 4 of them:
150 * marker/marker1 are used for panel dfs, see (ilu_)dpanel_dfs.c;
151 * marker2 is used for inner-factorization, see (ilu)_dcolumn_dfs.c;
152 * marker_relax(has its own space) is used for relaxed supernodes.
153 *
154 * parent[0:m-1]: parent vector used during dfs
155 * Storage: relative to new row subscripts
156 *
157 * xplore[0:m-1]: xplore[i] gives the location of the next (dfs)
158 * unexplored neighbor of i in lsub[*]
159 *
160 * segrep[0:nseg-1]: contains the list of supernodal representatives
161 * in topological order of the dfs. A supernode representative is the
162 * last column of a supernode.
163 * The maximum size of segrep[] is n.
164 *
165 * repfnz[0:W*m-1]: for a nonzero segment U[*,j] that ends at a
166 * supernodal representative r, repfnz[r] is the location of the first
167 * nonzero in this segment. It is also used during the dfs: repfnz[r]>0
168 * indicates the supernode r has been explored.
169 * NOTE: There are W of them, each used for one column of a panel.
170 *
171 * panel_lsub[0:W*m-1]: temporary for the nonzeros row indices below
172 * the panel diagonal. These are filled in during dpanel_dfs(), and are
173 * used later in the inner LU factorization within the panel.
174 * panel_lsub[]/dense[] pair forms the SPA data structure.
175 * NOTE: There are W of them.
176 *
177 * dense[0:W*m-1]: sparse accumulating (SPA) vector for intermediate values;
178 * NOTE: there are W of them.
179 *
180 * tempv[0:*]: real temporary used for dense numeric kernels;
181 * The size of this array is defined by NUM_TEMPV() in slu_util.h.
182 * It is also used by the dropping routine ilu_ddrop_row().
183 * </pre>
184 */
185
186 void
sgsitrf(superlu_options_t * options,SuperMatrix * A,int relax,int panel_size,int * etree,void * work,int lwork,int * perm_c,int * perm_r,SuperMatrix * L,SuperMatrix * U,GlobalLU_t * Glu,SuperLUStat_t * stat,int * info)187 sgsitrf(superlu_options_t *options, SuperMatrix *A, int relax, int panel_size,
188 int *etree, void *work, int lwork, int *perm_c, int *perm_r,
189 SuperMatrix *L, SuperMatrix *U,
190 GlobalLU_t *Glu, /* persistent to facilitate multiple factorizations */
191 SuperLUStat_t *stat, int *info)
192 {
193 /* Local working arrays */
194 NCPformat *Astore;
195 int *iperm_r = NULL; /* inverse of perm_r; used when
196 options->Fact == SamePattern_SameRowPerm */
197 int *iperm_c; /* inverse of perm_c */
198 int *swap, *iswap; /* swap is used to store the row permutation
199 during the factorization. Initially, it is set
200 to iperm_c (row indeces of Pc*A*Pc').
201 iswap is the inverse of swap. After the
202 factorization, it is equal to perm_r. */
203 int *iwork;
204 float *swork;
205 int *segrep, *repfnz, *parent, *xplore;
206 int *panel_lsub; /* dense[]/panel_lsub[] pair forms a w-wide SPA */
207 int *marker, *marker_relax;
208 float *dense, *tempv;
209 int *relax_end, *relax_fsupc;
210 float *a;
211 int *asub;
212 int *xa_begin, *xa_end;
213 int *xsup, *supno;
214 int *xlsub, *xlusup, *xusub;
215 int nzlumax;
216 float *amax;
217 float drop_sum;
218 float alpha, omega; /* used in MILU, mimicing DRIC */
219 float *swork2; /* used by the second dropping rule */
220
221 /* Local scalars */
222 fact_t fact = options->Fact;
223 double diag_pivot_thresh = options->DiagPivotThresh;
224 double drop_tol = options->ILU_DropTol; /* tau */
225 double fill_ini = options->ILU_FillTol; /* tau^hat */
226 double gamma = options->ILU_FillFactor;
227 int drop_rule = options->ILU_DropRule;
228 milu_t milu = options->ILU_MILU;
229 double fill_tol;
230 int pivrow; /* pivotal row number in the original matrix A */
231 int nseg1; /* no of segments in U-column above panel row jcol */
232 int nseg; /* no of segments in each U-column */
233 register int jcol;
234 register int kcol; /* end column of a relaxed snode */
235 register int icol;
236 register int i, k, jj, new_next, iinfo;
237 int m, n, min_mn, jsupno, fsupc, nextlu, nextu;
238 int w_def; /* upper bound on panel width */
239 int usepr, iperm_r_allocated = 0;
240 int nnzL, nnzU;
241 int *panel_histo = stat->panel_histo;
242 flops_t *ops = stat->ops;
243
244 int last_drop;/* the last column which the dropping rules applied */
245 int quota;
246 int nnzAj; /* number of nonzeros in A(:,1:j) */
247 int nnzLj, nnzUj;
248 double tol_L = drop_tol, tol_U = drop_tol;
249 float zero = 0.0;
250 float one = 1.0;
251
252 /* Executable */
253 iinfo = 0;
254 m = A->nrow;
255 n = A->ncol;
256 min_mn = SUPERLU_MIN(m, n);
257 Astore = A->Store;
258 a = Astore->nzval;
259 asub = Astore->rowind;
260 xa_begin = Astore->colbeg;
261 xa_end = Astore->colend;
262
263 /* Allocate storage common to the factor routines */
264 *info = sLUMemInit(fact, work, lwork, m, n, Astore->nnz, panel_size,
265 gamma, L, U, Glu, &iwork, &swork);
266 if ( *info ) return;
267
268 xsup = Glu->xsup;
269 supno = Glu->supno;
270 xlsub = Glu->xlsub;
271 xlusup = Glu->xlusup;
272 xusub = Glu->xusub;
273
274 SetIWork(m, n, panel_size, iwork, &segrep, &parent, &xplore,
275 &repfnz, &panel_lsub, &marker_relax, &marker);
276 sSetRWork(m, panel_size, swork, &dense, &tempv);
277
278 usepr = (fact == SamePattern_SameRowPerm);
279 if ( usepr ) {
280 /* Compute the inverse of perm_r */
281 iperm_r = (int *) intMalloc(m);
282 for (k = 0; k < m; ++k) iperm_r[perm_r[k]] = k;
283 iperm_r_allocated = 1;
284 }
285
286 iperm_c = (int *) intMalloc(n);
287 for (k = 0; k < n; ++k) iperm_c[perm_c[k]] = k;
288 swap = (int *)intMalloc(n);
289 for (k = 0; k < n; k++) swap[k] = iperm_c[k];
290 iswap = (int *)intMalloc(n);
291 for (k = 0; k < n; k++) iswap[k] = perm_c[k];
292 amax = (float *) SUPERLU_MALLOC(panel_size * sizeof(float));
293 if (drop_rule & DROP_SECONDARY)
294 swork2 = SUPERLU_MALLOC(n * sizeof(float));
295 else
296 swork2 = NULL;
297
298 nnzAj = 0;
299 nnzLj = 0;
300 nnzUj = 0;
301 last_drop = SUPERLU_MAX(min_mn - 2 * sp_ienv(7), (int)(min_mn * 0.95));
302 alpha = pow((double)n, -1.0 / options->ILU_MILU_Dim);
303
304 /* Identify relaxed snodes */
305 relax_end = (int *) intMalloc(n);
306 relax_fsupc = (int *) intMalloc(n);
307 if ( options->SymmetricMode == YES )
308 ilu_heap_relax_snode(n, etree, relax, marker, relax_end, relax_fsupc);
309 else
310 ilu_relax_snode(n, etree, relax, marker, relax_end, relax_fsupc);
311
312 ifill (perm_r, m, EMPTY);
313 ifill (marker, m * NO_MARKER, EMPTY);
314 supno[0] = -1;
315 xsup[0] = xlsub[0] = xusub[0] = xlusup[0] = 0;
316 w_def = panel_size;
317
318 /* Mark the rows used by relaxed supernodes */
319 ifill (marker_relax, m, EMPTY);
320 i = mark_relax(m, relax_end, relax_fsupc, xa_begin, xa_end,
321 asub, marker_relax);
322 #if ( PRNTlevel >= 1)
323 printf("%d relaxed supernodes.\n", i);
324 #endif
325
326 /*
327 * Work on one "panel" at a time. A panel is one of the following:
328 * (a) a relaxed supernode at the bottom of the etree, or
329 * (b) panel_size contiguous columns, defined by the user
330 */
331 for (jcol = 0; jcol < min_mn; ) {
332
333 if ( relax_end[jcol] != EMPTY ) { /* start of a relaxed snode */
334 kcol = relax_end[jcol]; /* end of the relaxed snode */
335 panel_histo[kcol-jcol+1]++;
336
337 /* Drop small rows in the previous supernode. */
338 if (jcol > 0 && jcol < last_drop) {
339 int first = xsup[supno[jcol - 1]];
340 int last = jcol - 1;
341 int quota;
342
343 /* Compute the quota */
344 if (drop_rule & DROP_PROWS)
345 quota = gamma * Astore->nnz / m * (m - first) / m
346 * (last - first + 1);
347 else if (drop_rule & DROP_COLUMN) {
348 int i;
349 quota = 0;
350 for (i = first; i <= last; i++)
351 quota += xa_end[i] - xa_begin[i];
352 quota = gamma * quota * (m - first) / m;
353 } else if (drop_rule & DROP_AREA)
354 quota = gamma * nnzAj * (1.0 - 0.5 * (last + 1.0) / m)
355 - nnzLj;
356 else
357 quota = m * n;
358 fill_tol = pow(fill_ini, 1.0 - 0.5 * (first + last) / min_mn);
359
360 /* Drop small rows */
361 i = ilu_sdrop_row(options, first, last, tol_L, quota, &nnzLj,
362 &fill_tol, Glu, tempv, swork2, 0);
363 /* Reset the parameters */
364 if (drop_rule & DROP_DYNAMIC) {
365 if (gamma * nnzAj * (1.0 - 0.5 * (last + 1.0) / m)
366 < nnzLj)
367 tol_L = SUPERLU_MIN(1.0, tol_L * 2.0);
368 else
369 tol_L = SUPERLU_MAX(drop_tol, tol_L * 0.5);
370 }
371 if (fill_tol < 0) iinfo -= (int)fill_tol;
372 #ifdef DEBUG
373 num_drop_L += i * (last - first + 1);
374 #endif
375 }
376
377 /* --------------------------------------
378 * Factorize the relaxed supernode(jcol:kcol)
379 * -------------------------------------- */
380 /* Determine the union of the row structure of the snode */
381 if ( (*info = ilu_ssnode_dfs(jcol, kcol, asub, xa_begin, xa_end,
382 marker, Glu)) != 0 )
383 return;
384
385 nextu = xusub[jcol];
386 nextlu = xlusup[jcol];
387 jsupno = supno[jcol];
388 fsupc = xsup[jsupno];
389 new_next = nextlu + (xlsub[fsupc+1]-xlsub[fsupc])*(kcol-jcol+1);
390 nzlumax = Glu->nzlumax;
391 while ( new_next > nzlumax ) {
392 if ((*info = sLUMemXpand(jcol, nextlu, LUSUP, &nzlumax, Glu)))
393 return;
394 }
395
396 for (icol = jcol; icol <= kcol; icol++) {
397 xusub[icol+1] = nextu;
398
399 amax[0] = 0.0;
400 /* Scatter into SPA dense[*] */
401 for (k = xa_begin[icol]; k < xa_end[icol]; k++) {
402 register float tmp = fabs(a[k]);
403 if (tmp > amax[0]) amax[0] = tmp;
404 dense[asub[k]] = a[k];
405 }
406 nnzAj += xa_end[icol] - xa_begin[icol];
407 if (amax[0] == 0.0) {
408 amax[0] = fill_ini;
409 #if ( PRNTlevel >= 1)
410 printf("Column %d is entirely zero!\n", icol);
411 fflush(stdout);
412 #endif
413 }
414
415 /* Numeric update within the snode */
416 ssnode_bmod(icol, jsupno, fsupc, dense, tempv, Glu, stat);
417
418 if (usepr) pivrow = iperm_r[icol];
419 fill_tol = pow(fill_ini, 1.0 - (double)icol / (double)min_mn);
420 if ( (*info = ilu_spivotL(icol, diag_pivot_thresh, &usepr,
421 perm_r, iperm_c[icol], swap, iswap,
422 marker_relax, &pivrow,
423 amax[0] * fill_tol, milu, zero,
424 Glu, stat)) ) {
425 iinfo++;
426 marker[pivrow] = kcol;
427 }
428
429 }
430
431 jcol = kcol + 1;
432
433 } else { /* Work on one panel of panel_size columns */
434
435 /* Adjust panel_size so that a panel won't overlap with the next
436 * relaxed snode.
437 */
438 panel_size = w_def;
439 for (k = jcol + 1; k < SUPERLU_MIN(jcol+panel_size, min_mn); k++)
440 if ( relax_end[k] != EMPTY ) {
441 panel_size = k - jcol;
442 break;
443 }
444 if ( k == min_mn ) panel_size = min_mn - jcol;
445 panel_histo[panel_size]++;
446
447 /* symbolic factor on a panel of columns */
448 ilu_spanel_dfs(m, panel_size, jcol, A, perm_r, &nseg1,
449 dense, amax, panel_lsub, segrep, repfnz,
450 marker, parent, xplore, Glu);
451
452 /* numeric sup-panel updates in topological order */
453 spanel_bmod(m, panel_size, jcol, nseg1, dense,
454 tempv, segrep, repfnz, Glu, stat);
455
456 /* Sparse LU within the panel, and below panel diagonal */
457 for (jj = jcol; jj < jcol + panel_size; jj++) {
458
459 k = (jj - jcol) * m; /* column index for w-wide arrays */
460
461 nseg = nseg1; /* Begin after all the panel segments */
462
463 nnzAj += xa_end[jj] - xa_begin[jj];
464
465 if ((*info = ilu_scolumn_dfs(m, jj, perm_r, &nseg,
466 &panel_lsub[k], segrep, &repfnz[k],
467 marker, parent, xplore, Glu)))
468 return;
469
470 /* Numeric updates */
471 if ((*info = scolumn_bmod(jj, (nseg - nseg1), &dense[k],
472 tempv, &segrep[nseg1], &repfnz[k],
473 jcol, Glu, stat)) != 0) return;
474
475 /* Make a fill-in position if the column is entirely zero */
476 if (xlsub[jj + 1] == xlsub[jj]) {
477 register int i, row;
478 int nextl;
479 int nzlmax = Glu->nzlmax;
480 int *lsub = Glu->lsub;
481 int *marker2 = marker + 2 * m;
482
483 /* Allocate memory */
484 nextl = xlsub[jj] + 1;
485 if (nextl >= nzlmax) {
486 int error = sLUMemXpand(jj, nextl, LSUB, &nzlmax, Glu);
487 if (error) { *info = error; return; }
488 lsub = Glu->lsub;
489 }
490 xlsub[jj + 1]++;
491 assert(xlusup[jj]==xlusup[jj+1]);
492 xlusup[jj + 1]++;
493 ((float *) Glu->lusup)[xlusup[jj]] = zero;
494
495 /* Choose a row index (pivrow) for fill-in */
496 for (i = jj; i < n; i++)
497 if (marker_relax[swap[i]] <= jj) break;
498 row = swap[i];
499 marker2[row] = jj;
500 lsub[xlsub[jj]] = row;
501 #ifdef DEBUG
502 printf("Fill col %d.\n", jj);
503 fflush(stdout);
504 #endif
505 }
506
507 /* Computer the quota */
508 if (drop_rule & DROP_PROWS)
509 quota = gamma * Astore->nnz / m * jj / m;
510 else if (drop_rule & DROP_COLUMN)
511 quota = gamma * (xa_end[jj] - xa_begin[jj]) *
512 (jj + 1) / m;
513 else if (drop_rule & DROP_AREA)
514 quota = gamma * 0.9 * nnzAj * 0.5 - nnzUj;
515 else
516 quota = m;
517
518 /* Copy the U-segments to ucol[*] and drop small entries */
519 if ((*info = ilu_scopy_to_ucol(jj, nseg, segrep, &repfnz[k],
520 perm_r, &dense[k], drop_rule,
521 milu, amax[jj - jcol] * tol_U,
522 quota, &drop_sum, &nnzUj, Glu,
523 swork2)) != 0)
524 return;
525
526 /* Reset the dropping threshold if required */
527 if (drop_rule & DROP_DYNAMIC) {
528 if (gamma * 0.9 * nnzAj * 0.5 < nnzLj)
529 tol_U = SUPERLU_MIN(1.0, tol_U * 2.0);
530 else
531 tol_U = SUPERLU_MAX(drop_tol, tol_U * 0.5);
532 }
533
534 if (drop_sum != zero)
535 {
536 if (drop_sum > zero)
537 omega = SUPERLU_MIN(2.0 * (1.0 - alpha)
538 * amax[jj - jcol] / drop_sum, one);
539 else
540 omega = SUPERLU_MAX(2.0 * (1.0 - alpha)
541 * amax[jj - jcol] / drop_sum, -one);
542 drop_sum *= omega;
543 }
544 if (usepr) pivrow = iperm_r[jj];
545 fill_tol = pow(fill_ini, 1.0 - (double)jj / (double)min_mn);
546 if ( (*info = ilu_spivotL(jj, diag_pivot_thresh, &usepr, perm_r,
547 iperm_c[jj], swap, iswap,
548 marker_relax, &pivrow,
549 amax[jj - jcol] * fill_tol, milu,
550 drop_sum, Glu, stat)) ) {
551 iinfo++;
552 marker[m + pivrow] = jj;
553 marker[2 * m + pivrow] = jj;
554 }
555
556 /* Reset repfnz[] for this column */
557 resetrep_col (nseg, segrep, &repfnz[k]);
558
559 /* Start a new supernode, drop the previous one */
560 if (jj > 0 && supno[jj] > supno[jj - 1] && jj < last_drop) {
561 int first = xsup[supno[jj - 1]];
562 int last = jj - 1;
563 int quota;
564
565 /* Compute the quota */
566 if (drop_rule & DROP_PROWS)
567 quota = gamma * Astore->nnz / m * (m - first) / m
568 * (last - first + 1);
569 else if (drop_rule & DROP_COLUMN) {
570 int i;
571 quota = 0;
572 for (i = first; i <= last; i++)
573 quota += xa_end[i] - xa_begin[i];
574 quota = gamma * quota * (m - first) / m;
575 } else if (drop_rule & DROP_AREA)
576 quota = gamma * nnzAj * (1.0 - 0.5 * (last + 1.0)
577 / m) - nnzLj;
578 else
579 quota = m * n;
580 fill_tol = pow(fill_ini, 1.0 - 0.5 * (first + last) /
581 (double)min_mn);
582
583 /* Drop small rows */
584 i = ilu_sdrop_row(options, first, last, tol_L, quota,
585 &nnzLj, &fill_tol, Glu, tempv, swork2,
586 1);
587
588 /* Reset the parameters */
589 if (drop_rule & DROP_DYNAMIC) {
590 if (gamma * nnzAj * (1.0 - 0.5 * (last + 1.0) / m)
591 < nnzLj)
592 tol_L = SUPERLU_MIN(1.0, tol_L * 2.0);
593 else
594 tol_L = SUPERLU_MAX(drop_tol, tol_L * 0.5);
595 }
596 if (fill_tol < 0) iinfo -= (int)fill_tol;
597 #ifdef DEBUG
598 num_drop_L += i * (last - first + 1);
599 #endif
600 } /* if start a new supernode */
601
602 } /* for */
603
604 jcol += panel_size; /* Move to the next panel */
605
606 } /* else */
607
608 } /* for */
609
610 *info = iinfo;
611
612 if ( m > n ) {
613 k = 0;
614 for (i = 0; i < m; ++i)
615 if ( perm_r[i] == EMPTY ) {
616 perm_r[i] = n + k;
617 ++k;
618 }
619 }
620
621 ilu_countnz(min_mn, &nnzL, &nnzU, Glu);
622 fixupL(min_mn, perm_r, Glu);
623
624 sLUWorkFree(iwork, swork, Glu); /* Free work space and compress storage */
625
626 if ( fact == SamePattern_SameRowPerm ) {
627 /* L and U structures may have changed due to possibly different
628 pivoting, even though the storage is available.
629 There could also be memory expansions, so the array locations
630 may have changed, */
631 ((SCformat *)L->Store)->nnz = nnzL;
632 ((SCformat *)L->Store)->nsuper = Glu->supno[n];
633 ((SCformat *)L->Store)->nzval = (float *) Glu->lusup;
634 ((SCformat *)L->Store)->nzval_colptr = Glu->xlusup;
635 ((SCformat *)L->Store)->rowind = Glu->lsub;
636 ((SCformat *)L->Store)->rowind_colptr = Glu->xlsub;
637 ((NCformat *)U->Store)->nnz = nnzU;
638 ((NCformat *)U->Store)->nzval = (float *) Glu->ucol;
639 ((NCformat *)U->Store)->rowind = Glu->usub;
640 ((NCformat *)U->Store)->colptr = Glu->xusub;
641 } else {
642 sCreate_SuperNode_Matrix(L, A->nrow, min_mn, nnzL,
643 (float *) Glu->lusup, Glu->xlusup,
644 Glu->lsub, Glu->xlsub, Glu->supno, Glu->xsup,
645 SLU_SC, SLU_S, SLU_TRLU);
646 sCreate_CompCol_Matrix(U, min_mn, min_mn, nnzU,
647 (float *) Glu->ucol, Glu->usub, Glu->xusub,
648 SLU_NC, SLU_S, SLU_TRU);
649 }
650
651 ops[FACT] += ops[TRSV] + ops[GEMV];
652 stat->expansions = --(Glu->num_expansions);
653
654 if ( iperm_r_allocated ) SUPERLU_FREE (iperm_r);
655 SUPERLU_FREE (iperm_c);
656 SUPERLU_FREE (relax_end);
657 SUPERLU_FREE (swap);
658 SUPERLU_FREE (iswap);
659 SUPERLU_FREE (relax_fsupc);
660 SUPERLU_FREE (amax);
661 if ( swork2 ) SUPERLU_FREE (swork2);
662
663 }
664