1 /*! \file
2 Copyright (c) 2003, The Regents of the University of California, through
3 Lawrence Berkeley National Laboratory (subject to receipt of any required
4 approvals from U.S. Dept. of Energy)
5 
6 All rights reserved.
7 
8 The source code is distributed under BSD license, see the file License.txt
9 at the top-level directory.
10 */
11 
12 /*! @file sgsitrf.c
13  * \brief Computes an ILU factorization of a general sparse matrix
14  *
15  * <pre>
16  * -- SuperLU routine (version 4.1) --
17  * Lawrence Berkeley National Laboratory.
18  * June 30, 2009
19  *
20  * </pre>
21  */
22 
23 #include "slu_sdefs.h"
24 #include "supermatrix.h"
25 #include "slu_util.h"
26 
27 #ifdef DEBUG
28 int num_drop_L;
29 #endif
30 
31 /*! \brief
32  *
33  * <pre>
34  * Purpose
35  * =======
36  *
37  * SGSITRF computes an ILU factorization of a general sparse m-by-n
38  * matrix A using partial pivoting with row interchanges.
39  * The factorization has the form
40  *     Pr * A = L * U
41  * where Pr is a row permutation matrix, L is lower triangular with unit
42  * diagonal elements (lower trapezoidal if A->nrow > A->ncol), and U is upper
43  * triangular (upper trapezoidal if A->nrow < A->ncol).
44  *
45  * See supermatrix.h for the definition of 'SuperMatrix' structure.
46  *
47  * Arguments
48  * =========
49  *
50  * options (input) superlu_options_t*
51  *	   The structure defines the input parameters to control
52  *	   how the ILU decomposition will be performed.
53  *
54  * A	    (input) SuperMatrix*
55  *	    Original matrix A, permuted by columns, of dimension
56  *	    (A->nrow, A->ncol). The type of A can be:
57  *	    Stype = SLU_NCP; Dtype = SLU_S; Mtype = SLU_GE.
58  *
59  * relax    (input) int
60  *	    To control degree of relaxing supernodes. If the number
61  *	    of nodes (columns) in a subtree of the elimination tree is less
62  *	    than relax, this subtree is considered as one supernode,
63  *	    regardless of the row structures of those columns.
64  *
65  * panel_size (input) int
66  *	    A panel consists of at most panel_size consecutive columns.
67  *
68  * etree    (input) int*, dimension (A->ncol)
69  *	    Elimination tree of A'*A.
70  *	    Note: etree is a vector of parent pointers for a forest whose
71  *	    vertices are the integers 0 to A->ncol-1; etree[root]==A->ncol.
72  *	    On input, the columns of A should be permuted so that the
73  *	    etree is in a certain postorder.
74  *
75  * work     (input/output) void*, size (lwork) (in bytes)
76  *	    User-supplied work space and space for the output data structures.
77  *	    Not referenced if lwork = 0;
78  *
79  * lwork   (input) int
80  *	   Specifies the size of work array in bytes.
81  *	   = 0:  allocate space internally by system malloc;
82  *	   > 0:  use user-supplied work array of length lwork in bytes,
83  *		 returns error if space runs out.
84  *	   = -1: the routine guesses the amount of space needed without
85  *		 performing the factorization, and returns it in
86  *		 *info; no other side effects.
87  *
88  * perm_c   (input) int*, dimension (A->ncol)
89  *	    Column permutation vector, which defines the
90  *	    permutation matrix Pc; perm_c[i] = j means column i of A is
91  *	    in position j in A*Pc.
92  *	    When searching for diagonal, perm_c[*] is applied to the
93  *	    row subscripts of A, so that diagonal threshold pivoting
94  *	    can find the diagonal of A, rather than that of A*Pc.
95  *
96  * perm_r   (input/output) int*, dimension (A->nrow)
97  *	    Row permutation vector which defines the permutation matrix Pr,
98  *	    perm_r[i] = j means row i of A is in position j in Pr*A.
99  *	    If options->Fact = SamePattern_SameRowPerm, the pivoting routine
100  *	       will try to use the input perm_r, unless a certain threshold
101  *	       criterion is violated. In that case, perm_r is overwritten by
102  *	       a new permutation determined by partial pivoting or diagonal
103  *	       threshold pivoting.
104  *	    Otherwise, perm_r is output argument;
105  *
106  * L	    (output) SuperMatrix*
107  *	    The factor L from the factorization Pr*A=L*U; use compressed row
108  *	    subscripts storage for supernodes, i.e., L has type:
109  *	    Stype = SLU_SC, Dtype = SLU_S, Mtype = SLU_TRLU.
110  *
111  * U	    (output) SuperMatrix*
112  *	    The factor U from the factorization Pr*A*Pc=L*U. Use column-wise
113  *	    storage scheme, i.e., U has types: Stype = SLU_NC,
114  *	    Dtype = SLU_S, Mtype = SLU_TRU.
115  *
116  * Glu      (input/output) GlobalLU_t *
117  *          If options->Fact == SamePattern_SameRowPerm, it is an input;
118  *              The matrix A will be factorized assuming that a
119  *              factorization of a matrix with the same sparsity pattern
120  *              and similar numerical values was performed prior to this one.
121  *              Therefore, this factorization will reuse both row and column
122  *		scaling factors R and C, both row and column permutation
123  *		vectors perm_r and perm_c, and the L & U data structures
124  *		set up from the previous factorization.
125  *          Otherwise, it is an output.
126  *
127  * stat     (output) SuperLUStat_t*
128  *	    Record the statistics on runtime and floating-point operation count.
129  *	    See slu_util.h for the definition of 'SuperLUStat_t'.
130  *
131  * info     (output) int*
132  *	    = 0: successful exit
133  *	    < 0: if info = -i, the i-th argument had an illegal value
134  *	    > 0: if info = i, and i is
135  *	       <= A->ncol: number of zero pivots. They are replaced by small
136  *		  entries according to options->ILU_FillTol.
137  *	       > A->ncol: number of bytes allocated when memory allocation
138  *		  failure occurred, plus A->ncol. If lwork = -1, it is
139  *		  the estimated amount of space needed, plus A->ncol.
140  *
141  * ======================================================================
142  *
143  * Local Working Arrays:
144  * ======================
145  *   m = number of rows in the matrix
146  *   n = number of columns in the matrix
147  *
148  *   marker[0:3*m-1]: marker[i] = j means that node i has been
149  *	reached when working on column j.
150  *	Storage: relative to original row subscripts
151  *	NOTE: There are 4 of them:
152  *	      marker/marker1 are used for panel dfs, see (ilu_)dpanel_dfs.c;
153  *	      marker2 is used for inner-factorization, see (ilu)_dcolumn_dfs.c;
154  *	      marker_relax(has its own space) is used for relaxed supernodes.
155  *
156  *   parent[0:m-1]: parent vector used during dfs
157  *	Storage: relative to new row subscripts
158  *
159  *   xplore[0:m-1]: xplore[i] gives the location of the next (dfs)
160  *	unexplored neighbor of i in lsub[*]
161  *
162  *   segrep[0:nseg-1]: contains the list of supernodal representatives
163  *	in topological order of the dfs. A supernode representative is the
164  *	last column of a supernode.
165  *	The maximum size of segrep[] is n.
166  *
167  *   repfnz[0:W*m-1]: for a nonzero segment U[*,j] that ends at a
168  *	supernodal representative r, repfnz[r] is the location of the first
169  *	nonzero in this segment.  It is also used during the dfs: repfnz[r]>0
170  *	indicates the supernode r has been explored.
171  *	NOTE: There are W of them, each used for one column of a panel.
172  *
173  *   panel_lsub[0:W*m-1]: temporary for the nonzeros row indices below
174  *	the panel diagonal. These are filled in during dpanel_dfs(), and are
175  *	used later in the inner LU factorization within the panel.
176  *	panel_lsub[]/dense[] pair forms the SPA data structure.
177  *	NOTE: There are W of them.
178  *
179  *   dense[0:W*m-1]: sparse accumulating (SPA) vector for intermediate values;
180  *		   NOTE: there are W of them.
181  *
182  *   tempv[0:*]: real temporary used for dense numeric kernels;
183  *	The size of this array is defined by NUM_TEMPV() in slu_util.h.
184  *	It is also used by the dropping routine ilu_ddrop_row().
185  * </pre>
186  */
187 
188 void
sgsitrf(superlu_options_t * options,SuperMatrix * A,int relax,int panel_size,int * etree,void * work,int lwork,int * perm_c,int * perm_r,SuperMatrix * L,SuperMatrix * U,GlobalLU_t * Glu,SuperLUStat_t * stat,int * info)189 sgsitrf(superlu_options_t *options, SuperMatrix *A, int relax, int panel_size,
190 	int *etree, void *work, int lwork, int *perm_c, int *perm_r,
191 	SuperMatrix *L, SuperMatrix *U,
192     	GlobalLU_t *Glu, /* persistent to facilitate multiple factorizations */
193 	SuperLUStat_t *stat, int *info)
194 {
195     /* Local working arrays */
196     NCPformat *Astore;
197     int       *iperm_r = NULL; /* inverse of perm_r; used when
198 				  options->Fact == SamePattern_SameRowPerm */
199     int       *iperm_c; /* inverse of perm_c */
200     int       *swap, *iswap; /* swap is used to store the row permutation
201 				during the factorization. Initially, it is set
202 				to iperm_c (row indeces of Pc*A*Pc').
203 				iswap is the inverse of swap. After the
204 				factorization, it is equal to perm_r. */
205     int       *iwork;
206     float   *swork;
207     int       *segrep, *repfnz, *parent, *xplore;
208     int       *panel_lsub; /* dense[]/panel_lsub[] pair forms a w-wide SPA */
209     int       *marker, *marker_relax;
210     float    *dense, *tempv;
211     int       *relax_end, *relax_fsupc;
212     float    *a;
213     int       *asub;
214     int       *xa_begin, *xa_end;
215     int       *xsup, *supno;
216     int       *xlsub, *xlusup, *xusub;
217     int       nzlumax;
218     float    *amax;
219     float    drop_sum;
220     float alpha, omega;  /* used in MILU, mimicing DRIC */
221     float    *swork2;	   /* used by the second dropping rule */
222 
223     /* Local scalars */
224     fact_t    fact = options->Fact;
225     double    diag_pivot_thresh = options->DiagPivotThresh;
226     double    drop_tol = options->ILU_DropTol; /* tau */
227     double    fill_ini = options->ILU_FillTol; /* tau^hat */
228     double    gamma = options->ILU_FillFactor;
229     int       drop_rule = options->ILU_DropRule;
230     milu_t    milu = options->ILU_MILU;
231     double    fill_tol;
232     int       pivrow;	/* pivotal row number in the original matrix A */
233     int       nseg1;	/* no of segments in U-column above panel row jcol */
234     int       nseg;	/* no of segments in each U-column */
235     register int jcol;
236     register int kcol;	/* end column of a relaxed snode */
237     register int icol;
238     register int i, k, jj, new_next, iinfo;
239     int       m, n, min_mn, jsupno, fsupc, nextlu, nextu;
240     int       w_def;	/* upper bound on panel width */
241     int       usepr, iperm_r_allocated = 0;
242     int       nnzL, nnzU;
243     int       *panel_histo = stat->panel_histo;
244     flops_t   *ops = stat->ops;
245 
246     int       last_drop;/* the last column which the dropping rules applied */
247     int       quota;
248     int       nnzAj;	/* number of nonzeros in A(:,1:j) */
249     int       nnzLj, nnzUj;
250     double    tol_L = drop_tol, tol_U = drop_tol;
251     float zero = 0.0;
252     float one = 1.0;
253 
254     /* Executable */
255     iinfo    = 0;
256     m	     = A->nrow;
257     n	     = A->ncol;
258     min_mn   = SUPERLU_MIN(m, n);
259     Astore   = A->Store;
260     a	     = Astore->nzval;
261     asub     = Astore->rowind;
262     xa_begin = Astore->colbeg;
263     xa_end   = Astore->colend;
264 
265     /* Allocate storage common to the factor routines */
266     *info = sLUMemInit(fact, work, lwork, m, n, Astore->nnz, panel_size,
267 		       gamma, L, U, Glu, &iwork, &swork);
268     if ( *info ) return;
269 
270     xsup    = Glu->xsup;
271     supno   = Glu->supno;
272     xlsub   = Glu->xlsub;
273     xlusup  = Glu->xlusup;
274     xusub   = Glu->xusub;
275 
276     SetIWork(m, n, panel_size, iwork, &segrep, &parent, &xplore,
277 	     &repfnz, &panel_lsub, &marker_relax, &marker);
278     sSetRWork(m, panel_size, swork, &dense, &tempv);
279 
280     usepr = (fact == SamePattern_SameRowPerm);
281     if ( usepr ) {
282 	/* Compute the inverse of perm_r */
283 	iperm_r = (int *) intMalloc(m);
284 	for (k = 0; k < m; ++k) iperm_r[perm_r[k]] = k;
285 	iperm_r_allocated = 1;
286     }
287 
288     iperm_c = (int *) intMalloc(n);
289     for (k = 0; k < n; ++k) iperm_c[perm_c[k]] = k;
290     swap = (int *)intMalloc(n);
291     for (k = 0; k < n; k++) swap[k] = iperm_c[k];
292     iswap = (int *)intMalloc(n);
293     for (k = 0; k < n; k++) iswap[k] = perm_c[k];
294     amax = (float *) floatMalloc(panel_size);
295     if (drop_rule & DROP_SECONDARY)
296 	swork2 = (float *)floatMalloc(n);
297     else
298 	swork2 = NULL;
299 
300     nnzAj = 0;
301     nnzLj = 0;
302     nnzUj = 0;
303     last_drop = SUPERLU_MAX(min_mn - 2 * sp_ienv(7), (int)(min_mn * 0.95));
304     alpha = pow((double)n, -1.0 / options->ILU_MILU_Dim);
305 
306     /* Identify relaxed snodes */
307     relax_end = (int *) intMalloc(n);
308     relax_fsupc = (int *) intMalloc(n);
309     if ( options->SymmetricMode == YES )
310 	ilu_heap_relax_snode(n, etree, relax, marker, relax_end, relax_fsupc);
311     else
312 	ilu_relax_snode(n, etree, relax, marker, relax_end, relax_fsupc);
313 
314     ifill (perm_r, m, EMPTY);
315     ifill (marker, m * NO_MARKER, EMPTY);
316     supno[0] = -1;
317     xsup[0]  = xlsub[0] = xusub[0] = xlusup[0] = 0;
318     w_def    = panel_size;
319 
320     /* Mark the rows used by relaxed supernodes */
321     ifill (marker_relax, m, EMPTY);
322     i = mark_relax(m, relax_end, relax_fsupc, xa_begin, xa_end,
323 	         asub, marker_relax);
324 #if ( PRNTlevel >= 1)
325     printf("%d relaxed supernodes.\n", i);
326 #endif
327 
328     /*
329      * Work on one "panel" at a time. A panel is one of the following:
330      *	   (a) a relaxed supernode at the bottom of the etree, or
331      *	   (b) panel_size contiguous columns, defined by the user
332      */
333     for (jcol = 0; jcol < min_mn; ) {
334 
335 	if ( relax_end[jcol] != EMPTY ) { /* start of a relaxed snode */
336 	    kcol = relax_end[jcol];	  /* end of the relaxed snode */
337 	    panel_histo[kcol-jcol+1]++;
338 
339 	    /* Drop small rows in the previous supernode. */
340 	    if (jcol > 0 && jcol < last_drop) {
341 		int first = xsup[supno[jcol - 1]];
342 		int last = jcol - 1;
343 		int quota;
344 
345 		/* Compute the quota */
346 		if (drop_rule & DROP_PROWS)
347 		    quota = gamma * Astore->nnz / m * (m - first) / m
348 			    * (last - first + 1);
349 		else if (drop_rule & DROP_COLUMN) {
350 		    int i;
351 		    quota = 0;
352 		    for (i = first; i <= last; i++)
353 			quota += xa_end[i] - xa_begin[i];
354 		    quota = gamma * quota * (m - first) / m;
355 		} else if (drop_rule & DROP_AREA)
356 		    quota = gamma * nnzAj * (1.0 - 0.5 * (last + 1.0) / m)
357 			    - nnzLj;
358 		else
359 		    quota = m * n;
360 		fill_tol = pow(fill_ini, 1.0 - 0.5 * (first + last) / min_mn);
361 
362 		/* Drop small rows */
363 		i = ilu_sdrop_row(options, first, last, tol_L, quota, &nnzLj,
364 				  &fill_tol, Glu, tempv, swork2, 0);
365 		/* Reset the parameters */
366 		if (drop_rule & DROP_DYNAMIC) {
367 		    if (gamma * nnzAj * (1.0 - 0.5 * (last + 1.0) / m)
368 			     < nnzLj)
369 			tol_L = SUPERLU_MIN(1.0, tol_L * 2.0);
370 		    else
371 			tol_L = SUPERLU_MAX(drop_tol, tol_L * 0.5);
372 		}
373 		if (fill_tol < 0) iinfo -= (int)fill_tol;
374 #ifdef DEBUG
375 		num_drop_L += i * (last - first + 1);
376 #endif
377 	    }
378 
379 	    /* --------------------------------------
380 	     * Factorize the relaxed supernode(jcol:kcol)
381 	     * -------------------------------------- */
382 	    /* Determine the union of the row structure of the snode */
383 	    if ( (*info = ilu_ssnode_dfs(jcol, kcol, asub, xa_begin, xa_end,
384 					 marker, Glu)) != 0 )
385 		return;
386 
387 	    nextu    = xusub[jcol];
388 	    nextlu   = xlusup[jcol];
389 	    jsupno   = supno[jcol];
390 	    fsupc    = xsup[jsupno];
391 	    new_next = nextlu + (xlsub[fsupc+1]-xlsub[fsupc])*(kcol-jcol+1);
392 	    nzlumax = Glu->nzlumax;
393 	    while ( new_next > nzlumax ) {
394 		if ((*info = sLUMemXpand(jcol, nextlu, LUSUP, &nzlumax, Glu)))
395 		    return;
396 	    }
397 
398 	    for (icol = jcol; icol <= kcol; icol++) {
399 		xusub[icol+1] = nextu;
400 
401 		amax[0] = 0.0;
402 		/* Scatter into SPA dense[*] */
403 		for (k = xa_begin[icol]; k < xa_end[icol]; k++) {
404 		    register float tmp = fabs(a[k]);
405 		    if (tmp > amax[0]) amax[0] = tmp;
406 		    dense[asub[k]] = a[k];
407 		}
408 		nnzAj += xa_end[icol] - xa_begin[icol];
409 		if (amax[0] == 0.0) {
410 		    amax[0] = fill_ini;
411 #if ( PRNTlevel >= 1)
412 		    printf("Column %d is entirely zero!\n", icol);
413 		    fflush(stdout);
414 #endif
415 		}
416 
417 		/* Numeric update within the snode */
418 		ssnode_bmod(icol, jsupno, fsupc, dense, tempv, Glu, stat);
419 
420 		if (usepr) pivrow = iperm_r[icol];
421 		fill_tol = pow(fill_ini, 1.0 - (double)icol / (double)min_mn);
422 		if ( (*info = ilu_spivotL(icol, diag_pivot_thresh, &usepr,
423 					  perm_r, iperm_c[icol], swap, iswap,
424 					  marker_relax, &pivrow,
425                                           amax[0] * fill_tol, milu, zero,
426                                           Glu, stat)) ) {
427 		    iinfo++;
428 		    marker[pivrow] = kcol;
429 		}
430 
431 	    }
432 
433 	    jcol = kcol + 1;
434 
435 	} else { /* Work on one panel of panel_size columns */
436 
437 	    /* Adjust panel_size so that a panel won't overlap with the next
438 	     * relaxed snode.
439 	     */
440 	    panel_size = w_def;
441 	    for (k = jcol + 1; k < SUPERLU_MIN(jcol+panel_size, min_mn); k++)
442 		if ( relax_end[k] != EMPTY ) {
443 		    panel_size = k - jcol;
444 		    break;
445 		}
446 	    if ( k == min_mn ) panel_size = min_mn - jcol;
447 	    panel_histo[panel_size]++;
448 
449 	    /* symbolic factor on a panel of columns */
450 	    ilu_spanel_dfs(m, panel_size, jcol, A, perm_r, &nseg1,
451                           dense, amax, panel_lsub, segrep, repfnz,
452                           marker, parent, xplore, Glu);
453 
454 	    /* numeric sup-panel updates in topological order */
455 	    spanel_bmod(m, panel_size, jcol, nseg1, dense,
456 			tempv, segrep, repfnz, Glu, stat);
457 
458 	    /* Sparse LU within the panel, and below panel diagonal */
459 	    for (jj = jcol; jj < jcol + panel_size; jj++) {
460 
461 		k = (jj - jcol) * m; /* column index for w-wide arrays */
462 
463 		nseg = nseg1;	/* Begin after all the panel segments */
464 
465 		nnzAj += xa_end[jj] - xa_begin[jj];
466 
467 		if ((*info = ilu_scolumn_dfs(m, jj, perm_r, &nseg,
468 					     &panel_lsub[k], segrep, &repfnz[k],
469 					     marker, parent, xplore, Glu)))
470 		    return;
471 
472 		/* Numeric updates */
473 		if ((*info = scolumn_bmod(jj, (nseg - nseg1), &dense[k],
474 					  tempv, &segrep[nseg1], &repfnz[k],
475 					  jcol, Glu, stat)) != 0) return;
476 
477 		/* Make a fill-in position if the column is entirely zero */
478 		if (xlsub[jj + 1] == xlsub[jj]) {
479 		    register int i, row;
480 		    int nextl;
481 		    int nzlmax = Glu->nzlmax;
482 		    int *lsub = Glu->lsub;
483 		    int *marker2 = marker + 2 * m;
484 
485 		    /* Allocate memory */
486 		    nextl = xlsub[jj] + 1;
487 		    if (nextl >= nzlmax) {
488 			int error = sLUMemXpand(jj, nextl, LSUB, &nzlmax, Glu);
489 			if (error) { *info = error; return; }
490 			lsub = Glu->lsub;
491 		    }
492 		    xlsub[jj + 1]++;
493 		    assert(xlusup[jj]==xlusup[jj+1]);
494 		    xlusup[jj + 1]++;
495 		    ((float *) Glu->lusup)[xlusup[jj]] = zero;
496 
497 		    /* Choose a row index (pivrow) for fill-in */
498 		    for (i = jj; i < n; i++)
499 			if (marker_relax[swap[i]] <= jj) break;
500 		    row = swap[i];
501 		    marker2[row] = jj;
502 		    lsub[xlsub[jj]] = row;
503 #ifdef DEBUG
504 		    printf("Fill col %d.\n", jj);
505 		    fflush(stdout);
506 #endif
507 		}
508 
509 		/* Computer the quota */
510 		if (drop_rule & DROP_PROWS)
511 		    quota = gamma * Astore->nnz / m * jj / m;
512 		else if (drop_rule & DROP_COLUMN)
513 		    quota = gamma * (xa_end[jj] - xa_begin[jj]) *
514 			    (jj + 1) / m;
515 		else if (drop_rule & DROP_AREA)
516 		    quota = gamma * 0.9 * nnzAj * 0.5 - nnzUj;
517 		else
518 		    quota = m;
519 
520 		/* Copy the U-segments to ucol[*] and drop small entries */
521 		if ((*info = ilu_scopy_to_ucol(jj, nseg, segrep, &repfnz[k],
522 					       perm_r, &dense[k], drop_rule,
523 					       milu, amax[jj - jcol] * tol_U,
524 					       quota, &drop_sum, &nnzUj, Glu,
525 					       swork2)) != 0)
526 		    return;
527 
528 		/* Reset the dropping threshold if required */
529 		if (drop_rule & DROP_DYNAMIC) {
530 		    if (gamma * 0.9 * nnzAj * 0.5 < nnzLj)
531 			tol_U = SUPERLU_MIN(1.0, tol_U * 2.0);
532 		    else
533 			tol_U = SUPERLU_MAX(drop_tol, tol_U * 0.5);
534 		}
535 
536 		if (drop_sum != zero)
537 		{
538 		    if (drop_sum > zero)
539 			omega = SUPERLU_MIN(2.0 * (1.0 - alpha)
540 				* amax[jj - jcol] / drop_sum, one);
541 		    else
542 			omega = SUPERLU_MAX(2.0 * (1.0 - alpha)
543 				* amax[jj - jcol] / drop_sum, -one);
544 		    drop_sum *= omega;
545                 }
546 		if (usepr) pivrow = iperm_r[jj];
547 		fill_tol = pow(fill_ini, 1.0 - (double)jj / (double)min_mn);
548 		if ( (*info = ilu_spivotL(jj, diag_pivot_thresh, &usepr, perm_r,
549 					  iperm_c[jj], swap, iswap,
550 					  marker_relax, &pivrow,
551 					  amax[jj - jcol] * fill_tol, milu,
552 					  drop_sum, Glu, stat)) ) {
553 		    iinfo++;
554 		    marker[m + pivrow] = jj;
555 		    marker[2 * m + pivrow] = jj;
556 		}
557 
558 		/* Reset repfnz[] for this column */
559 		resetrep_col (nseg, segrep, &repfnz[k]);
560 
561 		/* Start a new supernode, drop the previous one */
562 		if (jj > 0 && supno[jj] > supno[jj - 1] && jj < last_drop) {
563 		    int first = xsup[supno[jj - 1]];
564 		    int last = jj - 1;
565 		    int quota;
566 
567 		    /* Compute the quota */
568 		    if (drop_rule & DROP_PROWS)
569 			quota = gamma * Astore->nnz / m * (m - first) / m
570 				* (last - first + 1);
571 		    else if (drop_rule & DROP_COLUMN) {
572 			int i;
573 			quota = 0;
574 			for (i = first; i <= last; i++)
575 			    quota += xa_end[i] - xa_begin[i];
576 			quota = gamma * quota * (m - first) / m;
577 		    } else if (drop_rule & DROP_AREA)
578 			quota = gamma * nnzAj * (1.0 - 0.5 * (last + 1.0)
579 				/ m) - nnzLj;
580 		    else
581 			quota = m * n;
582 		    fill_tol = pow(fill_ini, 1.0 - 0.5 * (first + last) /
583 			    (double)min_mn);
584 
585 		    /* Drop small rows */
586 		    i = ilu_sdrop_row(options, first, last, tol_L, quota,
587 				      &nnzLj, &fill_tol, Glu, tempv, swork2,
588 				      1);
589 
590 		    /* Reset the parameters */
591 		    if (drop_rule & DROP_DYNAMIC) {
592 			if (gamma * nnzAj * (1.0 - 0.5 * (last + 1.0) / m)
593 				< nnzLj)
594 			    tol_L = SUPERLU_MIN(1.0, tol_L * 2.0);
595 			else
596 			    tol_L = SUPERLU_MAX(drop_tol, tol_L * 0.5);
597 		    }
598 		    if (fill_tol < 0) iinfo -= (int)fill_tol;
599 #ifdef DEBUG
600 		    num_drop_L += i * (last - first + 1);
601 #endif
602 		} /* if start a new supernode */
603 
604 	    } /* for */
605 
606 	    jcol += panel_size; /* Move to the next panel */
607 
608 	} /* else */
609 
610     } /* for */
611 
612     *info = iinfo;
613 
614     if ( m > n ) {
615 	k = 0;
616 	for (i = 0; i < m; ++i)
617 	    if ( perm_r[i] == EMPTY ) {
618 		perm_r[i] = n + k;
619 		++k;
620 	    }
621     }
622 
623     ilu_countnz(min_mn, &nnzL, &nnzU, Glu);
624     fixupL(min_mn, perm_r, Glu);
625 
626     sLUWorkFree(iwork, swork, Glu); /* Free work space and compress storage */
627 
628     if ( fact == SamePattern_SameRowPerm ) {
629 	/* L and U structures may have changed due to possibly different
630 	   pivoting, even though the storage is available.
631 	   There could also be memory expansions, so the array locations
632 	   may have changed, */
633 	((SCformat *)L->Store)->nnz = nnzL;
634 	((SCformat *)L->Store)->nsuper = Glu->supno[n];
635 	((SCformat *)L->Store)->nzval = (float *) Glu->lusup;
636 	((SCformat *)L->Store)->nzval_colptr = Glu->xlusup;
637 	((SCformat *)L->Store)->rowind = Glu->lsub;
638 	((SCformat *)L->Store)->rowind_colptr = Glu->xlsub;
639 	((NCformat *)U->Store)->nnz = nnzU;
640 	((NCformat *)U->Store)->nzval = (float *) Glu->ucol;
641 	((NCformat *)U->Store)->rowind = Glu->usub;
642 	((NCformat *)U->Store)->colptr = Glu->xusub;
643     } else {
644 	sCreate_SuperNode_Matrix(L, A->nrow, min_mn, nnzL,
645               (float *) Glu->lusup, Glu->xlusup,
646               Glu->lsub, Glu->xlsub, Glu->supno, Glu->xsup,
647 	      SLU_SC, SLU_S, SLU_TRLU);
648 	sCreate_CompCol_Matrix(U, min_mn, min_mn, nnzU,
649 	      (float *) Glu->ucol, Glu->usub, Glu->xusub,
650 	      SLU_NC, SLU_S, SLU_TRU);
651     }
652 
653     ops[FACT] += ops[TRSV] + ops[GEMV];
654     stat->expansions = --(Glu->num_expansions);
655 
656     if ( iperm_r_allocated ) SUPERLU_FREE (iperm_r);
657     SUPERLU_FREE (iperm_c);
658     SUPERLU_FREE (relax_end);
659     SUPERLU_FREE (swap);
660     SUPERLU_FREE (iswap);
661     SUPERLU_FREE (relax_fsupc);
662     SUPERLU_FREE (amax);
663     if ( swork2 ) SUPERLU_FREE (swork2);
664 
665 }
666