1
2 /*! @file cgsitf.c
3 * \brief Computes an ILU factorization of a general sparse matrix
4 *
5 * <pre>
6 * -- SuperLU routine (version 4.1) --
7 * Lawrence Berkeley National Laboratory.
8 * June 30, 2009
9 * </pre>
10 */
11
12 #include "slu_cdefs.h"
13
14 #ifdef DEBUG
15 int num_drop_L;
16 #endif
17
18 /*! \brief
19 *
20 * <pre>
21 * Purpose
22 * =======
23 *
24 * CGSITRF computes an ILU factorization of a general sparse m-by-n
25 * matrix A using partial pivoting with row interchanges.
26 * The factorization has the form
27 * Pr * A = L * U
28 * where Pr is a row permutation matrix, L is lower triangular with unit
29 * diagonal elements (lower trapezoidal if A->nrow > A->ncol), and U is upper
30 * triangular (upper trapezoidal if A->nrow < A->ncol).
31 *
32 * See supermatrix.h for the definition of 'SuperMatrix' structure.
33 *
34 * Arguments
35 * =========
36 *
37 * options (input) superlu_options_t*
38 * The structure defines the input parameters to control
39 * how the ILU decomposition will be performed.
40 *
41 * A (input) SuperMatrix*
42 * Original matrix A, permuted by columns, of dimension
43 * (A->nrow, A->ncol). The type of A can be:
44 * Stype = SLU_NCP; Dtype = SLU_C; Mtype = SLU_GE.
45 *
46 * relax (input) int
47 * To control degree of relaxing supernodes. If the number
48 * of nodes (columns) in a subtree of the elimination tree is less
49 * than relax, this subtree is considered as one supernode,
50 * regardless of the row structures of those columns.
51 *
52 * panel_size (input) int
53 * A panel consists of at most panel_size consecutive columns.
54 *
55 * etree (input) int*, dimension (A->ncol)
56 * Elimination tree of A'*A.
57 * Note: etree is a vector of parent pointers for a forest whose
58 * vertices are the integers 0 to A->ncol-1; etree[root]==A->ncol.
59 * On input, the columns of A should be permuted so that the
60 * etree is in a certain postorder.
61 *
62 * work (input/output) void*, size (lwork) (in bytes)
63 * User-supplied work space and space for the output data structures.
64 * Not referenced if lwork = 0;
65 *
66 * lwork (input) int
67 * Specifies the size of work array in bytes.
68 * = 0: allocate space internally by system malloc;
69 * > 0: use user-supplied work array of length lwork in bytes,
70 * returns error if space runs out.
71 * = -1: the routine guesses the amount of space needed without
72 * performing the factorization, and returns it in
73 * *info; no other side effects.
74 *
75 * perm_c (input) int*, dimension (A->ncol)
76 * Column permutation vector, which defines the
77 * permutation matrix Pc; perm_c[i] = j means column i of A is
78 * in position j in A*Pc.
79 * When searching for diagonal, perm_c[*] is applied to the
80 * row subscripts of A, so that diagonal threshold pivoting
81 * can find the diagonal of A, rather than that of A*Pc.
82 *
83 * perm_r (input/output) int*, dimension (A->nrow)
84 * Row permutation vector which defines the permutation matrix Pr,
85 * perm_r[i] = j means row i of A is in position j in Pr*A.
86 * If options->Fact = SamePattern_SameRowPerm, the pivoting routine
87 * will try to use the input perm_r, unless a certain threshold
88 * criterion is violated. In that case, perm_r is overwritten by
89 * a new permutation determined by partial pivoting or diagonal
90 * threshold pivoting.
91 * Otherwise, perm_r is output argument;
92 *
93 * L (output) SuperMatrix*
94 * The factor L from the factorization Pr*A=L*U; use compressed row
95 * subscripts storage for supernodes, i.e., L has type:
96 * Stype = SLU_SC, Dtype = SLU_C, Mtype = SLU_TRLU.
97 *
98 * U (output) SuperMatrix*
99 * The factor U from the factorization Pr*A*Pc=L*U. Use column-wise
100 * storage scheme, i.e., U has types: Stype = SLU_NC,
101 * Dtype = SLU_C, Mtype = SLU_TRU.
102 *
103 * stat (output) SuperLUStat_t*
104 * Record the statistics on runtime and floating-point operation count.
105 * See slu_util.h for the definition of 'SuperLUStat_t'.
106 *
107 * info (output) int*
108 * = 0: successful exit
109 * < 0: if info = -i, the i-th argument had an illegal value
110 * > 0: if info = i, and i is
111 * <= A->ncol: number of zero pivots. They are replaced by small
112 * entries according to options->ILU_FillTol.
113 * > A->ncol: number of bytes allocated when memory allocation
114 * failure occurred, plus A->ncol. If lwork = -1, it is
115 * the estimated amount of space needed, plus A->ncol.
116 *
117 * ======================================================================
118 *
119 * Local Working Arrays:
120 * ======================
121 * m = number of rows in the matrix
122 * n = number of columns in the matrix
123 *
124 * marker[0:3*m-1]: marker[i] = j means that node i has been
125 * reached when working on column j.
126 * Storage: relative to original row subscripts
127 * NOTE: There are 4 of them:
128 * marker/marker1 are used for panel dfs, see (ilu_)dpanel_dfs.c;
129 * marker2 is used for inner-factorization, see (ilu)_dcolumn_dfs.c;
130 * marker_relax(has its own space) is used for relaxed supernodes.
131 *
132 * parent[0:m-1]: parent vector used during dfs
133 * Storage: relative to new row subscripts
134 *
135 * xplore[0:m-1]: xplore[i] gives the location of the next (dfs)
136 * unexplored neighbor of i in lsub[*]
137 *
138 * segrep[0:nseg-1]: contains the list of supernodal representatives
139 * in topological order of the dfs. A supernode representative is the
140 * last column of a supernode.
141 * The maximum size of segrep[] is n.
142 *
143 * repfnz[0:W*m-1]: for a nonzero segment U[*,j] that ends at a
144 * supernodal representative r, repfnz[r] is the location of the first
145 * nonzero in this segment. It is also used during the dfs: repfnz[r]>0
146 * indicates the supernode r has been explored.
147 * NOTE: There are W of them, each used for one column of a panel.
148 *
149 * panel_lsub[0:W*m-1]: temporary for the nonzeros row indices below
150 * the panel diagonal. These are filled in during dpanel_dfs(), and are
151 * used later in the inner LU factorization within the panel.
152 * panel_lsub[]/dense[] pair forms the SPA data structure.
153 * NOTE: There are W of them.
154 *
155 * dense[0:W*m-1]: sparse accumulating (SPA) vector for intermediate values;
156 * NOTE: there are W of them.
157 *
158 * tempv[0:*]: real temporary used for dense numeric kernels;
159 * The size of this array is defined by NUM_TEMPV() in slu_util.h.
160 * It is also used by the dropping routine ilu_ddrop_row().
161 * </pre>
162 */
163
164 void
cgsitrf(superlu_options_t * options,SuperMatrix * A,int relax,int panel_size,int * etree,void * work,int lwork,int * perm_c,int * perm_r,SuperMatrix * L,SuperMatrix * U,SuperLUStat_t * stat,int * info)165 cgsitrf(superlu_options_t *options, SuperMatrix *A, int relax, int panel_size,
166 int *etree, void *work, int lwork, int *perm_c, int *perm_r,
167 SuperMatrix *L, SuperMatrix *U, SuperLUStat_t *stat, int *info)
168 {
169 /* Local working arrays */
170 NCPformat *Astore;
171 int *iperm_r = NULL; /* inverse of perm_r; used when
172 options->Fact == SamePattern_SameRowPerm */
173 int *iperm_c; /* inverse of perm_c */
174 int *swap, *iswap; /* swap is used to store the row permutation
175 during the factorization. Initially, it is set
176 to iperm_c (row indeces of Pc*A*Pc').
177 iswap is the inverse of swap. After the
178 factorization, it is equal to perm_r. */
179 int *iwork;
180 complex *cwork;
181 int *segrep, *repfnz, *parent, *xplore;
182 int *panel_lsub; /* dense[]/panel_lsub[] pair forms a w-wide SPA */
183 int *marker, *marker_relax;
184 complex *dense, *tempv;
185 float *stempv;
186 int *relax_end, *relax_fsupc;
187 complex *a;
188 int *asub;
189 int *xa_begin, *xa_end;
190 int *xsup, *supno;
191 int *xlsub, *xlusup, *xusub;
192 int nzlumax;
193 float *amax;
194 complex drop_sum;
195 float alpha, omega; /* used in MILU, mimicing DRIC */
196 static GlobalLU_t Glu; /* persistent to facilitate multiple factors. */
197 float *swork2; /* used by the second dropping rule */
198
199 /* Local scalars */
200 fact_t fact = options->Fact;
201 double diag_pivot_thresh = options->DiagPivotThresh;
202 double drop_tol = options->ILU_DropTol; /* tau */
203 double fill_ini = options->ILU_FillTol; /* tau^hat */
204 double gamma = options->ILU_FillFactor;
205 int drop_rule = options->ILU_DropRule;
206 milu_t milu = options->ILU_MILU;
207 double fill_tol;
208 int pivrow; /* pivotal row number in the original matrix A */
209 int nseg1; /* no of segments in U-column above panel row jcol */
210 int nseg; /* no of segments in each U-column */
211 register int jcol;
212 register int kcol; /* end column of a relaxed snode */
213 register int icol;
214 register int i, k, jj, new_next, iinfo;
215 int m, n, min_mn, jsupno, fsupc, nextlu, nextu;
216 int w_def; /* upper bound on panel width */
217 int usepr, iperm_r_allocated = 0;
218 int nnzL, nnzU;
219 int *panel_histo = stat->panel_histo;
220 flops_t *ops = stat->ops;
221
222 int last_drop;/* the last column which the dropping rules applied */
223 int quota;
224 int nnzAj; /* number of nonzeros in A(:,1:j) */
225 int nnzLj, nnzUj;
226 double tol_L = drop_tol, tol_U = drop_tol;
227 complex zero = {0.0, 0.0};
228 float one = 1.0;
229
230 /* Executable */
231 iinfo = 0;
232 m = A->nrow;
233 n = A->ncol;
234 min_mn = SUPERLU_MIN(m, n);
235 Astore = A->Store;
236 a = Astore->nzval;
237 asub = Astore->rowind;
238 xa_begin = Astore->colbeg;
239 xa_end = Astore->colend;
240
241 /* Allocate storage common to the factor routines */
242 *info = cLUMemInit(fact, work, lwork, m, n, Astore->nnz, panel_size,
243 gamma, L, U, &Glu, &iwork, &cwork);
244 if ( *info ) return;
245
246 xsup = Glu.xsup;
247 supno = Glu.supno;
248 xlsub = Glu.xlsub;
249 xlusup = Glu.xlusup;
250 xusub = Glu.xusub;
251
252 SetIWork(m, n, panel_size, iwork, &segrep, &parent, &xplore,
253 &repfnz, &panel_lsub, &marker_relax, &marker);
254 cSetRWork(m, panel_size, cwork, &dense, &tempv);
255
256 usepr = (fact == SamePattern_SameRowPerm);
257 if ( usepr ) {
258 /* Compute the inverse of perm_r */
259 iperm_r = (int *) intMalloc(m);
260 for (k = 0; k < m; ++k) iperm_r[perm_r[k]] = k;
261 iperm_r_allocated = 1;
262 }
263
264 iperm_c = (int *) intMalloc(n);
265 for (k = 0; k < n; ++k) iperm_c[perm_c[k]] = k;
266 swap = (int *)intMalloc(n);
267 for (k = 0; k < n; k++) swap[k] = iperm_c[k];
268 iswap = (int *)intMalloc(n);
269 for (k = 0; k < n; k++) iswap[k] = perm_c[k];
270 amax = (float *) floatMalloc(panel_size);
271 if (drop_rule & DROP_SECONDARY)
272 swork2 = (float *)floatMalloc(n);
273 else
274 swork2 = NULL;
275
276 nnzAj = 0;
277 nnzLj = 0;
278 nnzUj = 0;
279 last_drop = SUPERLU_MAX(min_mn - 2 * sp_ienv(7), (int)(min_mn * 0.95));
280 alpha = pow((double)n, -1.0 / options->ILU_MILU_Dim);
281
282 /* Identify relaxed snodes */
283 relax_end = (int *) intMalloc(n);
284 relax_fsupc = (int *) intMalloc(n);
285 if ( options->SymmetricMode == YES )
286 ilu_heap_relax_snode(n, etree, relax, marker, relax_end, relax_fsupc);
287 else
288 ilu_relax_snode(n, etree, relax, marker, relax_end, relax_fsupc);
289
290 ifill (perm_r, m, EMPTY);
291 ifill (marker, m * NO_MARKER, EMPTY);
292 supno[0] = -1;
293 xsup[0] = xlsub[0] = xusub[0] = xlusup[0] = 0;
294 w_def = panel_size;
295
296 /* Mark the rows used by relaxed supernodes */
297 ifill (marker_relax, m, EMPTY);
298 i = mark_relax(m, relax_end, relax_fsupc, xa_begin, xa_end,
299 asub, marker_relax);
300 #if ( PRNTlevel >= 1)
301 printf("%d relaxed supernodes.\n", i);
302 #endif
303
304 /*
305 * Work on one "panel" at a time. A panel is one of the following:
306 * (a) a relaxed supernode at the bottom of the etree, or
307 * (b) panel_size contiguous columns, defined by the user
308 */
309 for (jcol = 0; jcol < min_mn; ) {
310
311 if ( relax_end[jcol] != EMPTY ) { /* start of a relaxed snode */
312 kcol = relax_end[jcol]; /* end of the relaxed snode */
313 panel_histo[kcol-jcol+1]++;
314
315 /* Drop small rows in the previous supernode. */
316 if (jcol > 0 && jcol < last_drop) {
317 int first = xsup[supno[jcol - 1]];
318 int last = jcol - 1;
319 int quota;
320
321 /* Compute the quota */
322 if (drop_rule & DROP_PROWS)
323 quota = gamma * Astore->nnz / m * (m - first) / m
324 * (last - first + 1);
325 else if (drop_rule & DROP_COLUMN) {
326 int i;
327 quota = 0;
328 for (i = first; i <= last; i++)
329 quota += xa_end[i] - xa_begin[i];
330 quota = gamma * quota * (m - first) / m;
331 } else if (drop_rule & DROP_AREA)
332 quota = gamma * nnzAj * (1.0 - 0.5 * (last + 1.0) / m)
333 - nnzLj;
334 else
335 quota = m * n;
336 fill_tol = pow(fill_ini, 1.0 - 0.5 * (first + last) / min_mn);
337
338 /* Drop small rows */
339 stempv = (float *) tempv;
340 i = ilu_cdrop_row(options, first, last, tol_L, quota, &nnzLj,
341 &fill_tol, &Glu, stempv, swork2, 0);
342 /* Reset the parameters */
343 if (drop_rule & DROP_DYNAMIC) {
344 if (gamma * nnzAj * (1.0 - 0.5 * (last + 1.0) / m)
345 < nnzLj)
346 tol_L = SUPERLU_MIN(1.0, tol_L * 2.0);
347 else
348 tol_L = SUPERLU_MAX(drop_tol, tol_L * 0.5);
349 }
350 if (fill_tol < 0) iinfo -= (int)fill_tol;
351 #ifdef DEBUG
352 num_drop_L += i * (last - first + 1);
353 #endif
354 }
355
356 /* --------------------------------------
357 * Factorize the relaxed supernode(jcol:kcol)
358 * -------------------------------------- */
359 /* Determine the union of the row structure of the snode */
360 if ( (*info = ilu_csnode_dfs(jcol, kcol, asub, xa_begin, xa_end,
361 marker, &Glu)) != 0 )
362 return;
363
364 nextu = xusub[jcol];
365 nextlu = xlusup[jcol];
366 jsupno = supno[jcol];
367 fsupc = xsup[jsupno];
368 new_next = nextlu + (xlsub[fsupc+1]-xlsub[fsupc])*(kcol-jcol+1);
369 nzlumax = Glu.nzlumax;
370 while ( new_next > nzlumax ) {
371 if ((*info = cLUMemXpand(jcol, nextlu, LUSUP, &nzlumax, &Glu)))
372 return;
373 }
374
375 for (icol = jcol; icol <= kcol; icol++) {
376 xusub[icol+1] = nextu;
377
378 amax[0] = 0.0;
379 /* Scatter into SPA dense[*] */
380 for (k = xa_begin[icol]; k < xa_end[icol]; k++) {
381 register float tmp = c_abs1 (&a[k]);
382 if (tmp > amax[0]) amax[0] = tmp;
383 dense[asub[k]] = a[k];
384 }
385 nnzAj += xa_end[icol] - xa_begin[icol];
386 if (amax[0] == 0.0) {
387 amax[0] = fill_ini;
388 #if ( PRNTlevel >= 1)
389 printf("Column %d is entirely zero!\n", icol);
390 fflush(stdout);
391 #endif
392 }
393
394 /* Numeric update within the snode */
395 csnode_bmod(icol, jsupno, fsupc, dense, tempv, &Glu, stat);
396
397 if (usepr) pivrow = iperm_r[icol];
398 fill_tol = pow(fill_ini, 1.0 - (double)icol / (double)min_mn);
399 if ( (*info = ilu_cpivotL(icol, diag_pivot_thresh, &usepr,
400 perm_r, iperm_c[icol], swap, iswap,
401 marker_relax, &pivrow,
402 amax[0] * fill_tol, milu, zero,
403 &Glu, stat)) ) {
404 iinfo++;
405 marker[pivrow] = kcol;
406 }
407
408 }
409
410 jcol = kcol + 1;
411
412 } else { /* Work on one panel of panel_size columns */
413
414 /* Adjust panel_size so that a panel won't overlap with the next
415 * relaxed snode.
416 */
417 panel_size = w_def;
418 for (k = jcol + 1; k < SUPERLU_MIN(jcol+panel_size, min_mn); k++)
419 if ( relax_end[k] != EMPTY ) {
420 panel_size = k - jcol;
421 break;
422 }
423 if ( k == min_mn ) panel_size = min_mn - jcol;
424 panel_histo[panel_size]++;
425
426 /* symbolic factor on a panel of columns */
427 ilu_cpanel_dfs(m, panel_size, jcol, A, perm_r, &nseg1,
428 dense, amax, panel_lsub, segrep, repfnz,
429 marker, parent, xplore, &Glu);
430
431 /* numeric sup-panel updates in topological order */
432 cpanel_bmod(m, panel_size, jcol, nseg1, dense,
433 tempv, segrep, repfnz, &Glu, stat);
434
435 /* Sparse LU within the panel, and below panel diagonal */
436 for (jj = jcol; jj < jcol + panel_size; jj++) {
437
438 k = (jj - jcol) * m; /* column index for w-wide arrays */
439
440 nseg = nseg1; /* Begin after all the panel segments */
441
442 nnzAj += xa_end[jj] - xa_begin[jj];
443
444 if ((*info = ilu_ccolumn_dfs(m, jj, perm_r, &nseg,
445 &panel_lsub[k], segrep, &repfnz[k],
446 marker, parent, xplore, &Glu)))
447 return;
448
449 /* Numeric updates */
450 if ((*info = ccolumn_bmod(jj, (nseg - nseg1), &dense[k],
451 tempv, &segrep[nseg1], &repfnz[k],
452 jcol, &Glu, stat)) != 0) return;
453
454 /* Make a fill-in position if the column is entirely zero */
455 if (xlsub[jj + 1] == xlsub[jj]) {
456 register int i, row;
457 int nextl;
458 int nzlmax = Glu.nzlmax;
459 int *lsub = Glu.lsub;
460 int *marker2 = marker + 2 * m;
461
462 /* Allocate memory */
463 nextl = xlsub[jj] + 1;
464 if (nextl >= nzlmax) {
465 int error = cLUMemXpand(jj, nextl, LSUB, &nzlmax, &Glu);
466 if (error) { *info = error; return; }
467 lsub = Glu.lsub;
468 }
469 xlsub[jj + 1]++;
470 assert(xlusup[jj]==xlusup[jj+1]);
471 xlusup[jj + 1]++;
472 Glu.lusup[xlusup[jj]] = zero;
473
474 /* Choose a row index (pivrow) for fill-in */
475 for (i = jj; i < n; i++)
476 if (marker_relax[swap[i]] <= jj) break;
477 row = swap[i];
478 marker2[row] = jj;
479 lsub[xlsub[jj]] = row;
480 #ifdef DEBUG
481 printf("Fill col %d.\n", jj);
482 fflush(stdout);
483 #endif
484 }
485
486 /* Computer the quota */
487 if (drop_rule & DROP_PROWS)
488 quota = gamma * Astore->nnz / m * jj / m;
489 else if (drop_rule & DROP_COLUMN)
490 quota = gamma * (xa_end[jj] - xa_begin[jj]) *
491 (jj + 1) / m;
492 else if (drop_rule & DROP_AREA)
493 quota = gamma * 0.9 * nnzAj * 0.5 - nnzUj;
494 else
495 quota = m;
496
497 /* Copy the U-segments to ucol[*] and drop small entries */
498 if ((*info = ilu_ccopy_to_ucol(jj, nseg, segrep, &repfnz[k],
499 perm_r, &dense[k], drop_rule,
500 milu, amax[jj - jcol] * tol_U,
501 quota, &drop_sum, &nnzUj, &Glu,
502 swork2)) != 0)
503 return;
504
505 /* Reset the dropping threshold if required */
506 if (drop_rule & DROP_DYNAMIC) {
507 if (gamma * 0.9 * nnzAj * 0.5 < nnzLj)
508 tol_U = SUPERLU_MIN(1.0, tol_U * 2.0);
509 else
510 tol_U = SUPERLU_MAX(drop_tol, tol_U * 0.5);
511 }
512
513 if (drop_sum.r != 0.0 && drop_sum.i != 0.0)
514 {
515 omega = SUPERLU_MIN(2.0*(1.0-alpha)/c_abs1(&drop_sum), 1.0);
516 cs_mult(&drop_sum, &drop_sum, omega);
517 }
518 if (usepr) pivrow = iperm_r[jj];
519 fill_tol = pow(fill_ini, 1.0 - (double)jj / (double)min_mn);
520 if ( (*info = ilu_cpivotL(jj, diag_pivot_thresh, &usepr, perm_r,
521 iperm_c[jj], swap, iswap,
522 marker_relax, &pivrow,
523 amax[jj - jcol] * fill_tol, milu,
524 drop_sum, &Glu, stat)) ) {
525 iinfo++;
526 marker[m + pivrow] = jj;
527 marker[2 * m + pivrow] = jj;
528 }
529
530 /* Reset repfnz[] for this column */
531 resetrep_col (nseg, segrep, &repfnz[k]);
532
533 /* Start a new supernode, drop the previous one */
534 if (jj > 0 && supno[jj] > supno[jj - 1] && jj < last_drop) {
535 int first = xsup[supno[jj - 1]];
536 int last = jj - 1;
537 int quota;
538
539 /* Compute the quota */
540 if (drop_rule & DROP_PROWS)
541 quota = gamma * Astore->nnz / m * (m - first) / m
542 * (last - first + 1);
543 else if (drop_rule & DROP_COLUMN) {
544 int i;
545 quota = 0;
546 for (i = first; i <= last; i++)
547 quota += xa_end[i] - xa_begin[i];
548 quota = gamma * quota * (m - first) / m;
549 } else if (drop_rule & DROP_AREA)
550 quota = gamma * nnzAj * (1.0 - 0.5 * (last + 1.0)
551 / m) - nnzLj;
552 else
553 quota = m * n;
554 fill_tol = pow(fill_ini, 1.0 - 0.5 * (first + last) /
555 (double)min_mn);
556
557 /* Drop small rows */
558 stempv = (float *) tempv;
559 i = ilu_cdrop_row(options, first, last, tol_L, quota,
560 &nnzLj, &fill_tol, &Glu, stempv, swork2,
561 1);
562
563 /* Reset the parameters */
564 if (drop_rule & DROP_DYNAMIC) {
565 if (gamma * nnzAj * (1.0 - 0.5 * (last + 1.0) / m)
566 < nnzLj)
567 tol_L = SUPERLU_MIN(1.0, tol_L * 2.0);
568 else
569 tol_L = SUPERLU_MAX(drop_tol, tol_L * 0.5);
570 }
571 if (fill_tol < 0) iinfo -= (int)fill_tol;
572 #ifdef DEBUG
573 num_drop_L += i * (last - first + 1);
574 #endif
575 } /* if start a new supernode */
576
577 } /* for */
578
579 jcol += panel_size; /* Move to the next panel */
580
581 } /* else */
582
583 } /* for */
584
585 *info = iinfo;
586
587 if ( m > n ) {
588 k = 0;
589 for (i = 0; i < m; ++i)
590 if ( perm_r[i] == EMPTY ) {
591 perm_r[i] = n + k;
592 ++k;
593 }
594 }
595
596 ilu_countnz(min_mn, &nnzL, &nnzU, &Glu);
597 fixupL(min_mn, perm_r, &Glu);
598
599 cLUWorkFree(iwork, cwork, &Glu); /* Free work space and compress storage */
600
601 if ( fact == SamePattern_SameRowPerm ) {
602 /* L and U structures may have changed due to possibly different
603 pivoting, even though the storage is available.
604 There could also be memory expansions, so the array locations
605 may have changed, */
606 ((SCformat *)L->Store)->nnz = nnzL;
607 ((SCformat *)L->Store)->nsuper = Glu.supno[n];
608 ((SCformat *)L->Store)->nzval = Glu.lusup;
609 ((SCformat *)L->Store)->nzval_colptr = Glu.xlusup;
610 ((SCformat *)L->Store)->rowind = Glu.lsub;
611 ((SCformat *)L->Store)->rowind_colptr = Glu.xlsub;
612 ((NCformat *)U->Store)->nnz = nnzU;
613 ((NCformat *)U->Store)->nzval = Glu.ucol;
614 ((NCformat *)U->Store)->rowind = Glu.usub;
615 ((NCformat *)U->Store)->colptr = Glu.xusub;
616 } else {
617 cCreate_SuperNode_Matrix(L, A->nrow, min_mn, nnzL, Glu.lusup,
618 Glu.xlusup, Glu.lsub, Glu.xlsub, Glu.supno,
619 Glu.xsup, SLU_SC, SLU_C, SLU_TRLU);
620 cCreate_CompCol_Matrix(U, min_mn, min_mn, nnzU, Glu.ucol,
621 Glu.usub, Glu.xusub, SLU_NC, SLU_C, SLU_TRU);
622 }
623
624 ops[FACT] += ops[TRSV] + ops[GEMV];
625 stat->expansions = --(Glu.num_expansions);
626
627 if ( iperm_r_allocated ) SUPERLU_FREE (iperm_r);
628 SUPERLU_FREE (iperm_c);
629 SUPERLU_FREE (relax_end);
630 SUPERLU_FREE (swap);
631 SUPERLU_FREE (iswap);
632 SUPERLU_FREE (relax_fsupc);
633 SUPERLU_FREE (amax);
634 if ( swork2 ) SUPERLU_FREE (swork2);
635
636 }
637