1 /*! \file
2 Copyright (c) 2003, The Regents of the University of California, through
3 Lawrence Berkeley National Laboratory (subject to receipt of any required
4 approvals from U.S. Dept. of Energy)
5 
6 All rights reserved.
7 
8 The source code is distributed under BSD license, see the file License.txt
9 at the top-level directory.
10 */
11 
12 /*! @file sgsitrf.c
13  * \brief Computes an ILU factorization of a general sparse matrix
14  *
15  * <pre>
16  * -- SuperLU routine (version 4.1) --
17  * Lawrence Berkeley National Laboratory.
18  * June 30, 2009
19  *
20  * </pre>
21  */
22 
23 #include "slu_sdefs.h"
24 
25 #ifdef DEBUG
26 int num_drop_L;
27 #endif
28 
29 /*! \brief
30  *
31  * <pre>
32  * Purpose
33  * =======
34  *
35  * SGSITRF computes an ILU factorization of a general sparse m-by-n
36  * matrix A using partial pivoting with row interchanges.
37  * The factorization has the form
38  *     Pr * A = L * U
39  * where Pr is a row permutation matrix, L is lower triangular with unit
40  * diagonal elements (lower trapezoidal if A->nrow > A->ncol), and U is upper
41  * triangular (upper trapezoidal if A->nrow < A->ncol).
42  *
43  * See supermatrix.h for the definition of 'SuperMatrix' structure.
44  *
45  * Arguments
46  * =========
47  *
48  * options (input) superlu_options_t*
49  *	   The structure defines the input parameters to control
50  *	   how the ILU decomposition will be performed.
51  *
52  * A	    (input) SuperMatrix*
53  *	    Original matrix A, permuted by columns, of dimension
54  *	    (A->nrow, A->ncol). The type of A can be:
55  *	    Stype = SLU_NCP; Dtype = SLU_S; Mtype = SLU_GE.
56  *
57  * relax    (input) int
58  *	    To control degree of relaxing supernodes. If the number
59  *	    of nodes (columns) in a subtree of the elimination tree is less
60  *	    than relax, this subtree is considered as one supernode,
61  *	    regardless of the row structures of those columns.
62  *
63  * panel_size (input) int
64  *	    A panel consists of at most panel_size consecutive columns.
65  *
66  * etree    (input) int*, dimension (A->ncol)
67  *	    Elimination tree of A'*A.
68  *	    Note: etree is a vector of parent pointers for a forest whose
69  *	    vertices are the integers 0 to A->ncol-1; etree[root]==A->ncol.
70  *	    On input, the columns of A should be permuted so that the
71  *	    etree is in a certain postorder.
72  *
73  * work     (input/output) void*, size (lwork) (in bytes)
74  *	    User-supplied work space and space for the output data structures.
75  *	    Not referenced if lwork = 0;
76  *
77  * lwork   (input) int
78  *	   Specifies the size of work array in bytes.
79  *	   = 0:  allocate space internally by system malloc;
80  *	   > 0:  use user-supplied work array of length lwork in bytes,
81  *		 returns error if space runs out.
82  *	   = -1: the routine guesses the amount of space needed without
83  *		 performing the factorization, and returns it in
84  *		 *info; no other side effects.
85  *
86  * perm_c   (input) int*, dimension (A->ncol)
87  *	    Column permutation vector, which defines the
88  *	    permutation matrix Pc; perm_c[i] = j means column i of A is
89  *	    in position j in A*Pc.
90  *	    When searching for diagonal, perm_c[*] is applied to the
91  *	    row subscripts of A, so that diagonal threshold pivoting
92  *	    can find the diagonal of A, rather than that of A*Pc.
93  *
94  * perm_r   (input/output) int*, dimension (A->nrow)
95  *	    Row permutation vector which defines the permutation matrix Pr,
96  *	    perm_r[i] = j means row i of A is in position j in Pr*A.
97  *	    If options->Fact = SamePattern_SameRowPerm, the pivoting routine
98  *	       will try to use the input perm_r, unless a certain threshold
99  *	       criterion is violated. In that case, perm_r is overwritten by
100  *	       a new permutation determined by partial pivoting or diagonal
101  *	       threshold pivoting.
102  *	    Otherwise, perm_r is output argument;
103  *
104  * L	    (output) SuperMatrix*
105  *	    The factor L from the factorization Pr*A=L*U; use compressed row
106  *	    subscripts storage for supernodes, i.e., L has type:
107  *	    Stype = SLU_SC, Dtype = SLU_S, Mtype = SLU_TRLU.
108  *
109  * U	    (output) SuperMatrix*
110  *	    The factor U from the factorization Pr*A*Pc=L*U. Use column-wise
111  *	    storage scheme, i.e., U has types: Stype = SLU_NC,
112  *	    Dtype = SLU_S, Mtype = SLU_TRU.
113  *
114  * Glu      (input/output) GlobalLU_t *
115  *          If options->Fact == SamePattern_SameRowPerm, it is an input;
116  *              The matrix A will be factorized assuming that a
117  *              factorization of a matrix with the same sparsity pattern
118  *              and similar numerical values was performed prior to this one.
119  *              Therefore, this factorization will reuse both row and column
120  *		scaling factors R and C, both row and column permutation
121  *		vectors perm_r and perm_c, and the L & U data structures
122  *		set up from the previous factorization.
123  *          Otherwise, it is an output.
124  *
125  * stat     (output) SuperLUStat_t*
126  *	    Record the statistics on runtime and floating-point operation count.
127  *	    See slu_util.h for the definition of 'SuperLUStat_t'.
128  *
129  * info     (output) int*
130  *	    = 0: successful exit
131  *	    < 0: if info = -i, the i-th argument had an illegal value
132  *	    > 0: if info = i, and i is
133  *	       <= A->ncol: number of zero pivots. They are replaced by small
134  *		  entries according to options->ILU_FillTol.
135  *	       > A->ncol: number of bytes allocated when memory allocation
136  *		  failure occurred, plus A->ncol. If lwork = -1, it is
137  *		  the estimated amount of space needed, plus A->ncol.
138  *
139  * ======================================================================
140  *
141  * Local Working Arrays:
142  * ======================
143  *   m = number of rows in the matrix
144  *   n = number of columns in the matrix
145  *
146  *   marker[0:3*m-1]: marker[i] = j means that node i has been
147  *	reached when working on column j.
148  *	Storage: relative to original row subscripts
149  *	NOTE: There are 4 of them:
150  *	      marker/marker1 are used for panel dfs, see (ilu_)dpanel_dfs.c;
151  *	      marker2 is used for inner-factorization, see (ilu)_dcolumn_dfs.c;
152  *	      marker_relax(has its own space) is used for relaxed supernodes.
153  *
154  *   parent[0:m-1]: parent vector used during dfs
155  *	Storage: relative to new row subscripts
156  *
157  *   xplore[0:m-1]: xplore[i] gives the location of the next (dfs)
158  *	unexplored neighbor of i in lsub[*]
159  *
160  *   segrep[0:nseg-1]: contains the list of supernodal representatives
161  *	in topological order of the dfs. A supernode representative is the
162  *	last column of a supernode.
163  *	The maximum size of segrep[] is n.
164  *
165  *   repfnz[0:W*m-1]: for a nonzero segment U[*,j] that ends at a
166  *	supernodal representative r, repfnz[r] is the location of the first
167  *	nonzero in this segment.  It is also used during the dfs: repfnz[r]>0
168  *	indicates the supernode r has been explored.
169  *	NOTE: There are W of them, each used for one column of a panel.
170  *
171  *   panel_lsub[0:W*m-1]: temporary for the nonzeros row indices below
172  *	the panel diagonal. These are filled in during dpanel_dfs(), and are
173  *	used later in the inner LU factorization within the panel.
174  *	panel_lsub[]/dense[] pair forms the SPA data structure.
175  *	NOTE: There are W of them.
176  *
177  *   dense[0:W*m-1]: sparse accumulating (SPA) vector for intermediate values;
178  *		   NOTE: there are W of them.
179  *
180  *   tempv[0:*]: real temporary used for dense numeric kernels;
181  *	The size of this array is defined by NUM_TEMPV() in slu_util.h.
182  *	It is also used by the dropping routine ilu_ddrop_row().
183  * </pre>
184  */
185 
186 void
sgsitrf(superlu_options_t * options,SuperMatrix * A,int relax,int panel_size,int * etree,void * work,int lwork,int * perm_c,int * perm_r,SuperMatrix * L,SuperMatrix * U,GlobalLU_t * Glu,SuperLUStat_t * stat,int * info)187 sgsitrf(superlu_options_t *options, SuperMatrix *A, int relax, int panel_size,
188 	int *etree, void *work, int lwork, int *perm_c, int *perm_r,
189 	SuperMatrix *L, SuperMatrix *U,
190     	GlobalLU_t *Glu, /* persistent to facilitate multiple factorizations */
191 	SuperLUStat_t *stat, int *info)
192 {
193     /* Local working arrays */
194     NCPformat *Astore;
195     int       *iperm_r = NULL; /* inverse of perm_r; used when
196 				  options->Fact == SamePattern_SameRowPerm */
197     int       *iperm_c; /* inverse of perm_c */
198     int       *swap, *iswap; /* swap is used to store the row permutation
199 				during the factorization. Initially, it is set
200 				to iperm_c (row indeces of Pc*A*Pc').
201 				iswap is the inverse of swap. After the
202 				factorization, it is equal to perm_r. */
203     int       *iwork;
204     float   *swork;
205     int       *segrep, *repfnz, *parent, *xplore;
206     int       *panel_lsub; /* dense[]/panel_lsub[] pair forms a w-wide SPA */
207     int       *marker, *marker_relax;
208     float    *dense, *tempv;
209     int       *relax_end, *relax_fsupc;
210     float    *a;
211     int       *asub;
212     int       *xa_begin, *xa_end;
213     int       *xsup, *supno;
214     int       *xlsub, *xlusup, *xusub;
215     int       nzlumax;
216     float    *amax;
217     float    drop_sum;
218     float alpha, omega;  /* used in MILU, mimicing DRIC */
219     float    *swork2;	   /* used by the second dropping rule */
220 
221     /* Local scalars */
222     fact_t    fact = options->Fact;
223     double    diag_pivot_thresh = options->DiagPivotThresh;
224     double    drop_tol = options->ILU_DropTol; /* tau */
225     double    fill_ini = options->ILU_FillTol; /* tau^hat */
226     double    gamma = options->ILU_FillFactor;
227     int       drop_rule = options->ILU_DropRule;
228     milu_t    milu = options->ILU_MILU;
229     double    fill_tol;
230     int       pivrow;	/* pivotal row number in the original matrix A */
231     int       nseg1;	/* no of segments in U-column above panel row jcol */
232     int       nseg;	/* no of segments in each U-column */
233     register int jcol;
234     register int kcol;	/* end column of a relaxed snode */
235     register int icol;
236     register int i, k, jj, new_next, iinfo;
237     int       m, n, min_mn, jsupno, fsupc, nextlu, nextu;
238     int       w_def;	/* upper bound on panel width */
239     int       usepr, iperm_r_allocated = 0;
240     int       nnzL, nnzU;
241     int       *panel_histo = stat->panel_histo;
242     flops_t   *ops = stat->ops;
243 
244     int       last_drop;/* the last column which the dropping rules applied */
245     int       quota;
246     int       nnzAj;	/* number of nonzeros in A(:,1:j) */
247     int       nnzLj, nnzUj;
248     double    tol_L = drop_tol, tol_U = drop_tol;
249     float zero = 0.0;
250     float one = 1.0;
251 
252     /* Executable */
253     iinfo    = 0;
254     m	     = A->nrow;
255     n	     = A->ncol;
256     min_mn   = SUPERLU_MIN(m, n);
257     Astore   = A->Store;
258     a	     = Astore->nzval;
259     asub     = Astore->rowind;
260     xa_begin = Astore->colbeg;
261     xa_end   = Astore->colend;
262 
263     /* Allocate storage common to the factor routines */
264     *info = sLUMemInit(fact, work, lwork, m, n, Astore->nnz, panel_size,
265 		       gamma, L, U, Glu, &iwork, &swork);
266     if ( *info ) return;
267 
268     xsup    = Glu->xsup;
269     supno   = Glu->supno;
270     xlsub   = Glu->xlsub;
271     xlusup  = Glu->xlusup;
272     xusub   = Glu->xusub;
273 
274     SetIWork(m, n, panel_size, iwork, &segrep, &parent, &xplore,
275 	     &repfnz, &panel_lsub, &marker_relax, &marker);
276     sSetRWork(m, panel_size, swork, &dense, &tempv);
277 
278     usepr = (fact == SamePattern_SameRowPerm);
279     if ( usepr ) {
280 	/* Compute the inverse of perm_r */
281 	iperm_r = (int *) intMalloc(m);
282 	for (k = 0; k < m; ++k) iperm_r[perm_r[k]] = k;
283 	iperm_r_allocated = 1;
284     }
285 
286     iperm_c = (int *) intMalloc(n);
287     for (k = 0; k < n; ++k) iperm_c[perm_c[k]] = k;
288     swap = (int *)intMalloc(n);
289     for (k = 0; k < n; k++) swap[k] = iperm_c[k];
290     iswap = (int *)intMalloc(n);
291     for (k = 0; k < n; k++) iswap[k] = perm_c[k];
292     amax = (float *) SUPERLU_MALLOC(panel_size * sizeof(float));
293     if (drop_rule & DROP_SECONDARY)
294 	swork2 = SUPERLU_MALLOC(n * sizeof(float));
295     else
296 	swork2 = NULL;
297 
298     nnzAj = 0;
299     nnzLj = 0;
300     nnzUj = 0;
301     last_drop = SUPERLU_MAX(min_mn - 2 * sp_ienv(7), (int)(min_mn * 0.95));
302     alpha = pow((double)n, -1.0 / options->ILU_MILU_Dim);
303 
304     /* Identify relaxed snodes */
305     relax_end = (int *) intMalloc(n);
306     relax_fsupc = (int *) intMalloc(n);
307     if ( options->SymmetricMode == YES )
308 	ilu_heap_relax_snode(n, etree, relax, marker, relax_end, relax_fsupc);
309     else
310 	ilu_relax_snode(n, etree, relax, marker, relax_end, relax_fsupc);
311 
312     ifill (perm_r, m, EMPTY);
313     ifill (marker, m * NO_MARKER, EMPTY);
314     supno[0] = -1;
315     xsup[0]  = xlsub[0] = xusub[0] = xlusup[0] = 0;
316     w_def    = panel_size;
317 
318     /* Mark the rows used by relaxed supernodes */
319     ifill (marker_relax, m, EMPTY);
320     i = mark_relax(m, relax_end, relax_fsupc, xa_begin, xa_end,
321 	         asub, marker_relax);
322 #if ( PRNTlevel >= 1)
323     printf("%d relaxed supernodes.\n", i);
324 #endif
325 
326     /*
327      * Work on one "panel" at a time. A panel is one of the following:
328      *	   (a) a relaxed supernode at the bottom of the etree, or
329      *	   (b) panel_size contiguous columns, defined by the user
330      */
331     for (jcol = 0; jcol < min_mn; ) {
332 
333 	if ( relax_end[jcol] != EMPTY ) { /* start of a relaxed snode */
334 	    kcol = relax_end[jcol];	  /* end of the relaxed snode */
335 	    panel_histo[kcol-jcol+1]++;
336 
337 	    /* Drop small rows in the previous supernode. */
338 	    if (jcol > 0 && jcol < last_drop) {
339 		int first = xsup[supno[jcol - 1]];
340 		int last = jcol - 1;
341 		int quota;
342 
343 		/* Compute the quota */
344 		if (drop_rule & DROP_PROWS)
345 		    quota = gamma * Astore->nnz / m * (m - first) / m
346 			    * (last - first + 1);
347 		else if (drop_rule & DROP_COLUMN) {
348 		    int i;
349 		    quota = 0;
350 		    for (i = first; i <= last; i++)
351 			quota += xa_end[i] - xa_begin[i];
352 		    quota = gamma * quota * (m - first) / m;
353 		} else if (drop_rule & DROP_AREA)
354 		    quota = gamma * nnzAj * (1.0 - 0.5 * (last + 1.0) / m)
355 			    - nnzLj;
356 		else
357 		    quota = m * n;
358 		fill_tol = pow(fill_ini, 1.0 - 0.5 * (first + last) / min_mn);
359 
360 		/* Drop small rows */
361 		i = ilu_sdrop_row(options, first, last, tol_L, quota, &nnzLj,
362 				  &fill_tol, Glu, tempv, swork2, 0);
363 		/* Reset the parameters */
364 		if (drop_rule & DROP_DYNAMIC) {
365 		    if (gamma * nnzAj * (1.0 - 0.5 * (last + 1.0) / m)
366 			     < nnzLj)
367 			tol_L = SUPERLU_MIN(1.0, tol_L * 2.0);
368 		    else
369 			tol_L = SUPERLU_MAX(drop_tol, tol_L * 0.5);
370 		}
371 		if (fill_tol < 0) iinfo -= (int)fill_tol;
372 #ifdef DEBUG
373 		num_drop_L += i * (last - first + 1);
374 #endif
375 	    }
376 
377 	    /* --------------------------------------
378 	     * Factorize the relaxed supernode(jcol:kcol)
379 	     * -------------------------------------- */
380 	    /* Determine the union of the row structure of the snode */
381 	    if ( (*info = ilu_ssnode_dfs(jcol, kcol, asub, xa_begin, xa_end,
382 					 marker, Glu)) != 0 )
383 		return;
384 
385 	    nextu    = xusub[jcol];
386 	    nextlu   = xlusup[jcol];
387 	    jsupno   = supno[jcol];
388 	    fsupc    = xsup[jsupno];
389 	    new_next = nextlu + (xlsub[fsupc+1]-xlsub[fsupc])*(kcol-jcol+1);
390 	    nzlumax = Glu->nzlumax;
391 	    while ( new_next > nzlumax ) {
392 		if ((*info = sLUMemXpand(jcol, nextlu, LUSUP, &nzlumax, Glu)))
393 		    return;
394 	    }
395 
396 	    for (icol = jcol; icol <= kcol; icol++) {
397 		xusub[icol+1] = nextu;
398 
399 		amax[0] = 0.0;
400 		/* Scatter into SPA dense[*] */
401 		for (k = xa_begin[icol]; k < xa_end[icol]; k++) {
402 		    register float tmp = fabs(a[k]);
403 		    if (tmp > amax[0]) amax[0] = tmp;
404 		    dense[asub[k]] = a[k];
405 		}
406 		nnzAj += xa_end[icol] - xa_begin[icol];
407 		if (amax[0] == 0.0) {
408 		    amax[0] = fill_ini;
409 #if ( PRNTlevel >= 1)
410 		    printf("Column %d is entirely zero!\n", icol);
411 		    fflush(stdout);
412 #endif
413 		}
414 
415 		/* Numeric update within the snode */
416 		ssnode_bmod(icol, jsupno, fsupc, dense, tempv, Glu, stat);
417 
418 		if (usepr) pivrow = iperm_r[icol];
419 		fill_tol = pow(fill_ini, 1.0 - (double)icol / (double)min_mn);
420 		if ( (*info = ilu_spivotL(icol, diag_pivot_thresh, &usepr,
421 					  perm_r, iperm_c[icol], swap, iswap,
422 					  marker_relax, &pivrow,
423                                           amax[0] * fill_tol, milu, zero,
424                                           Glu, stat)) ) {
425 		    iinfo++;
426 		    marker[pivrow] = kcol;
427 		}
428 
429 	    }
430 
431 	    jcol = kcol + 1;
432 
433 	} else { /* Work on one panel of panel_size columns */
434 
435 	    /* Adjust panel_size so that a panel won't overlap with the next
436 	     * relaxed snode.
437 	     */
438 	    panel_size = w_def;
439 	    for (k = jcol + 1; k < SUPERLU_MIN(jcol+panel_size, min_mn); k++)
440 		if ( relax_end[k] != EMPTY ) {
441 		    panel_size = k - jcol;
442 		    break;
443 		}
444 	    if ( k == min_mn ) panel_size = min_mn - jcol;
445 	    panel_histo[panel_size]++;
446 
447 	    /* symbolic factor on a panel of columns */
448 	    ilu_spanel_dfs(m, panel_size, jcol, A, perm_r, &nseg1,
449                           dense, amax, panel_lsub, segrep, repfnz,
450                           marker, parent, xplore, Glu);
451 
452 	    /* numeric sup-panel updates in topological order */
453 	    spanel_bmod(m, panel_size, jcol, nseg1, dense,
454 			tempv, segrep, repfnz, Glu, stat);
455 
456 	    /* Sparse LU within the panel, and below panel diagonal */
457 	    for (jj = jcol; jj < jcol + panel_size; jj++) {
458 
459 		k = (jj - jcol) * m; /* column index for w-wide arrays */
460 
461 		nseg = nseg1;	/* Begin after all the panel segments */
462 
463 		nnzAj += xa_end[jj] - xa_begin[jj];
464 
465 		if ((*info = ilu_scolumn_dfs(m, jj, perm_r, &nseg,
466 					     &panel_lsub[k], segrep, &repfnz[k],
467 					     marker, parent, xplore, Glu)))
468 		    return;
469 
470 		/* Numeric updates */
471 		if ((*info = scolumn_bmod(jj, (nseg - nseg1), &dense[k],
472 					  tempv, &segrep[nseg1], &repfnz[k],
473 					  jcol, Glu, stat)) != 0) return;
474 
475 		/* Make a fill-in position if the column is entirely zero */
476 		if (xlsub[jj + 1] == xlsub[jj]) {
477 		    register int i, row;
478 		    int nextl;
479 		    int nzlmax = Glu->nzlmax;
480 		    int *lsub = Glu->lsub;
481 		    int *marker2 = marker + 2 * m;
482 
483 		    /* Allocate memory */
484 		    nextl = xlsub[jj] + 1;
485 		    if (nextl >= nzlmax) {
486 			int error = sLUMemXpand(jj, nextl, LSUB, &nzlmax, Glu);
487 			if (error) { *info = error; return; }
488 			lsub = Glu->lsub;
489 		    }
490 		    xlsub[jj + 1]++;
491 		    assert(xlusup[jj]==xlusup[jj+1]);
492 		    xlusup[jj + 1]++;
493 		    ((float *) Glu->lusup)[xlusup[jj]] = zero;
494 
495 		    /* Choose a row index (pivrow) for fill-in */
496 		    for (i = jj; i < n; i++)
497 			if (marker_relax[swap[i]] <= jj) break;
498 		    row = swap[i];
499 		    marker2[row] = jj;
500 		    lsub[xlsub[jj]] = row;
501 #ifdef DEBUG
502 		    printf("Fill col %d.\n", jj);
503 		    fflush(stdout);
504 #endif
505 		}
506 
507 		/* Computer the quota */
508 		if (drop_rule & DROP_PROWS)
509 		    quota = gamma * Astore->nnz / m * jj / m;
510 		else if (drop_rule & DROP_COLUMN)
511 		    quota = gamma * (xa_end[jj] - xa_begin[jj]) *
512 			    (jj + 1) / m;
513 		else if (drop_rule & DROP_AREA)
514 		    quota = gamma * 0.9 * nnzAj * 0.5 - nnzUj;
515 		else
516 		    quota = m;
517 
518 		/* Copy the U-segments to ucol[*] and drop small entries */
519 		if ((*info = ilu_scopy_to_ucol(jj, nseg, segrep, &repfnz[k],
520 					       perm_r, &dense[k], drop_rule,
521 					       milu, amax[jj - jcol] * tol_U,
522 					       quota, &drop_sum, &nnzUj, Glu,
523 					       swork2)) != 0)
524 		    return;
525 
526 		/* Reset the dropping threshold if required */
527 		if (drop_rule & DROP_DYNAMIC) {
528 		    if (gamma * 0.9 * nnzAj * 0.5 < nnzLj)
529 			tol_U = SUPERLU_MIN(1.0, tol_U * 2.0);
530 		    else
531 			tol_U = SUPERLU_MAX(drop_tol, tol_U * 0.5);
532 		}
533 
534 		if (drop_sum != zero)
535 		{
536 		    if (drop_sum > zero)
537 			omega = SUPERLU_MIN(2.0 * (1.0 - alpha)
538 				* amax[jj - jcol] / drop_sum, one);
539 		    else
540 			omega = SUPERLU_MAX(2.0 * (1.0 - alpha)
541 				* amax[jj - jcol] / drop_sum, -one);
542 		    drop_sum *= omega;
543                 }
544 		if (usepr) pivrow = iperm_r[jj];
545 		fill_tol = pow(fill_ini, 1.0 - (double)jj / (double)min_mn);
546 		if ( (*info = ilu_spivotL(jj, diag_pivot_thresh, &usepr, perm_r,
547 					  iperm_c[jj], swap, iswap,
548 					  marker_relax, &pivrow,
549 					  amax[jj - jcol] * fill_tol, milu,
550 					  drop_sum, Glu, stat)) ) {
551 		    iinfo++;
552 		    marker[m + pivrow] = jj;
553 		    marker[2 * m + pivrow] = jj;
554 		}
555 
556 		/* Reset repfnz[] for this column */
557 		resetrep_col (nseg, segrep, &repfnz[k]);
558 
559 		/* Start a new supernode, drop the previous one */
560 		if (jj > 0 && supno[jj] > supno[jj - 1] && jj < last_drop) {
561 		    int first = xsup[supno[jj - 1]];
562 		    int last = jj - 1;
563 		    int quota;
564 
565 		    /* Compute the quota */
566 		    if (drop_rule & DROP_PROWS)
567 			quota = gamma * Astore->nnz / m * (m - first) / m
568 				* (last - first + 1);
569 		    else if (drop_rule & DROP_COLUMN) {
570 			int i;
571 			quota = 0;
572 			for (i = first; i <= last; i++)
573 			    quota += xa_end[i] - xa_begin[i];
574 			quota = gamma * quota * (m - first) / m;
575 		    } else if (drop_rule & DROP_AREA)
576 			quota = gamma * nnzAj * (1.0 - 0.5 * (last + 1.0)
577 				/ m) - nnzLj;
578 		    else
579 			quota = m * n;
580 		    fill_tol = pow(fill_ini, 1.0 - 0.5 * (first + last) /
581 			    (double)min_mn);
582 
583 		    /* Drop small rows */
584 		    i = ilu_sdrop_row(options, first, last, tol_L, quota,
585 				      &nnzLj, &fill_tol, Glu, tempv, swork2,
586 				      1);
587 
588 		    /* Reset the parameters */
589 		    if (drop_rule & DROP_DYNAMIC) {
590 			if (gamma * nnzAj * (1.0 - 0.5 * (last + 1.0) / m)
591 				< nnzLj)
592 			    tol_L = SUPERLU_MIN(1.0, tol_L * 2.0);
593 			else
594 			    tol_L = SUPERLU_MAX(drop_tol, tol_L * 0.5);
595 		    }
596 		    if (fill_tol < 0) iinfo -= (int)fill_tol;
597 #ifdef DEBUG
598 		    num_drop_L += i * (last - first + 1);
599 #endif
600 		} /* if start a new supernode */
601 
602 	    } /* for */
603 
604 	    jcol += panel_size; /* Move to the next panel */
605 
606 	} /* else */
607 
608     } /* for */
609 
610     *info = iinfo;
611 
612     if ( m > n ) {
613 	k = 0;
614 	for (i = 0; i < m; ++i)
615 	    if ( perm_r[i] == EMPTY ) {
616 		perm_r[i] = n + k;
617 		++k;
618 	    }
619     }
620 
621     ilu_countnz(min_mn, &nnzL, &nnzU, Glu);
622     fixupL(min_mn, perm_r, Glu);
623 
624     sLUWorkFree(iwork, swork, Glu); /* Free work space and compress storage */
625 
626     if ( fact == SamePattern_SameRowPerm ) {
627 	/* L and U structures may have changed due to possibly different
628 	   pivoting, even though the storage is available.
629 	   There could also be memory expansions, so the array locations
630 	   may have changed, */
631 	((SCformat *)L->Store)->nnz = nnzL;
632 	((SCformat *)L->Store)->nsuper = Glu->supno[n];
633 	((SCformat *)L->Store)->nzval = (float *) Glu->lusup;
634 	((SCformat *)L->Store)->nzval_colptr = Glu->xlusup;
635 	((SCformat *)L->Store)->rowind = Glu->lsub;
636 	((SCformat *)L->Store)->rowind_colptr = Glu->xlsub;
637 	((NCformat *)U->Store)->nnz = nnzU;
638 	((NCformat *)U->Store)->nzval = (float *) Glu->ucol;
639 	((NCformat *)U->Store)->rowind = Glu->usub;
640 	((NCformat *)U->Store)->colptr = Glu->xusub;
641     } else {
642 	sCreate_SuperNode_Matrix(L, A->nrow, min_mn, nnzL,
643               (float *) Glu->lusup, Glu->xlusup,
644               Glu->lsub, Glu->xlsub, Glu->supno, Glu->xsup,
645 	      SLU_SC, SLU_S, SLU_TRLU);
646 	sCreate_CompCol_Matrix(U, min_mn, min_mn, nnzU,
647 	      (float *) Glu->ucol, Glu->usub, Glu->xusub,
648 	      SLU_NC, SLU_S, SLU_TRU);
649     }
650 
651     ops[FACT] += ops[TRSV] + ops[GEMV];
652     stat->expansions = --(Glu->num_expansions);
653 
654     if ( iperm_r_allocated ) SUPERLU_FREE (iperm_r);
655     SUPERLU_FREE (iperm_c);
656     SUPERLU_FREE (relax_end);
657     SUPERLU_FREE (swap);
658     SUPERLU_FREE (iswap);
659     SUPERLU_FREE (relax_fsupc);
660     SUPERLU_FREE (amax);
661     if ( swork2 ) SUPERLU_FREE (swork2);
662 
663 }
664