1 /* ========================================================================== */
2 /* === KLU_kernel =========================================================== */
3 /* ========================================================================== */
4 
5 /* Sparse left-looking LU factorization, with partial pivoting.  Based on
6  * Gilbert & Peierl's method, with a non-recursive DFS and with Eisenstat &
7  * Liu's symmetric pruning.  No user-callable routines are in this file.
8  */
9 
10 #include "klu_internal.h"
11 
12 /* ========================================================================== */
13 /* === dfs ================================================================== */
14 /* ========================================================================== */
15 
16 /* Does a depth-first-search, starting at node j. */
17 
dfs(Int j,Int k,Int Pinv[],Int Llen[],Int Lip[],Int Stack[],Int Flag[],Int Lpend[],Int top,Unit LU[],Int * Lik,Int * plength,Int Ap_pos[])18 static Int dfs
19 (
20     /* input, not modified on output: */
21     Int j,              /* node at which to start the DFS */
22     Int k,              /* mark value, for the Flag array */
23     Int Pinv [ ],       /* Pinv [i] = k if row i is kth pivot row, or EMPTY if
24                          * row i is not yet pivotal.  */
25     Int Llen [ ],       /* size n, Llen [k] = # nonzeros in column k of L */
26     Int Lip [ ],        /* size n, Lip [k] is position in LU of column k of L */
27 
28     /* workspace, not defined on input or output */
29     Int Stack [ ],      /* size n */
30 
31     /* input/output: */
32     Int Flag [ ],       /* Flag [i] == k means i is marked */
33     Int Lpend [ ],      /* for symmetric pruning */
34     Int top,            /* top of stack on input*/
35     Unit LU [],
36     Int *Lik,           /* Li row index array of the kth column */
37     Int *plength,
38 
39     /* other, not defined on input or output */
40     Int Ap_pos [ ]      /* keeps track of position in adj list during DFS */
41 )
42 {
43     Int i, pos, jnew, head, l_length ;
44     Int *Li ;
45 
46     l_length = *plength ;
47 
48     head = 0 ;
49     Stack [0] = j ;
50     ASSERT (Flag [j] != k) ;
51 
52     while (head >= 0)
53     {
54         j = Stack [head] ;
55         jnew = Pinv [j] ;
56         ASSERT (jnew >= 0 && jnew < k) ;        /* j is pivotal */
57 
58         if (Flag [j] != k)          /* a node is not yet visited */
59         {
60             /* first time that j has been visited */
61             Flag [j] = k ;
62             PRINTF (("[ start dfs at %d : new %d\n", j, jnew)) ;
63             /* set Ap_pos [head] to one past the last entry in col j to scan */
64             Ap_pos [head] =
65                 (Lpend [jnew] == EMPTY) ?  Llen [jnew] : Lpend [jnew] ;
66         }
67 
68         /* add the adjacent nodes to the recursive stack by iterating through
69          * until finding another non-visited pivotal node */
70         Li = (Int *) (LU + Lip [jnew]) ;
71         for (pos = --Ap_pos [head] ; pos >= 0 ; --pos)
72         {
73             i = Li [pos] ;
74             if (Flag [i] != k)
75             {
76                 /* node i is not yet visited */
77                 if (Pinv [i] >= 0)
78                 {
79                     /* keep track of where we left off in the scan of the
80                      * adjacency list of node j so we can restart j where we
81                      * left off. */
82                     Ap_pos [head] = pos ;
83 
84                     /* node i is pivotal; push it onto the recursive stack
85                      * and immediately break so we can recurse on node i. */
86                     Stack [++head] = i ;
87                     break ;
88                 }
89                 else
90                 {
91                     /* node i is not pivotal (no outgoing edges). */
92                     /* Flag as visited and store directly into L,
93                      * and continue with current node j. */
94                     Flag [i] = k ;
95                     Lik [l_length] = i ;
96                     l_length++ ;
97                 }
98             }
99         }
100 
101         if (pos == -1)
102         {
103             /* if all adjacent nodes of j are already visited, pop j from
104              * recursive stack and push j onto output stack */
105             head-- ;
106             Stack[--top] = j ;
107             PRINTF (("  end   dfs at %d ] head : %d\n", j, head)) ;
108         }
109     }
110 
111     *plength = l_length ;
112     return (top) ;
113 }
114 
115 
116 /* ========================================================================== */
117 /* === lsolve_symbolic ====================================================== */
118 /* ========================================================================== */
119 
120 /* Finds the pattern of x, for the solution of Lx=b */
121 
lsolve_symbolic(Int n,Int k,Int Ap[],Int Ai[],Int Q[],Int Pinv[],Int Stack[],Int Flag[],Int Lpend[],Int Ap_pos[],Unit LU[],Int lup,Int Llen[],Int Lip[],Int k1,Int PSinv[])122 static Int lsolve_symbolic
123 (
124     /* input, not modified on output: */
125     Int n,              /* L is n-by-n, where n >= 0 */
126     Int k,              /* also used as the mark value, for the Flag array */
127     Int Ap [ ],
128     Int Ai [ ],
129     Int Q [ ],
130     Int Pinv [ ],       /* Pinv [i] = k if i is kth pivot row, or EMPTY if row i
131                          * is not yet pivotal.  */
132 
133     /* workspace, not defined on input or output */
134     Int Stack [ ],      /* size n */
135 
136     /* workspace, defined on input and output */
137     Int Flag [ ],       /* size n.  Initially, all of Flag [0..n-1] < k.  After
138                          * lsolve_symbolic is done, Flag [i] == k if i is in
139                          * the pattern of the output, and Flag [0..n-1] <= k. */
140 
141     /* other */
142     Int Lpend [ ],      /* for symmetric pruning */
143     Int Ap_pos [ ],     /* workspace used in dfs */
144 
145     Unit LU [ ],        /* LU factors (pattern and values) */
146     Int lup,            /* pointer to free space in LU */
147     Int Llen [ ],       /* size n, Llen [k] = # nonzeros in column k of L */
148     Int Lip [ ],        /* size n, Lip [k] is position in LU of column k of L */
149 
150     /* ---- the following are only used in the BTF case --- */
151 
152     Int k1,             /* the block of A is from k1 to k2-1 */
153     Int PSinv [ ]       /* inverse of P from symbolic factorization */
154 )
155 {
156     Int *Lik ;
157     Int i, p, pend, oldcol, kglobal, top, l_length ;
158 
159     top = n ;
160     l_length = 0 ;
161     Lik = (Int *) (LU + lup);
162 
163     /* ---------------------------------------------------------------------- */
164     /* BTF factorization of A (k1:k2-1, k1:k2-1) */
165     /* ---------------------------------------------------------------------- */
166 
167     kglobal = k + k1 ;  /* column k of the block is col kglobal of A */
168     oldcol = Q [kglobal] ;      /* Q must be present for BTF case */
169     pend = Ap [oldcol+1] ;
170     for (p = Ap [oldcol] ; p < pend ; p++)
171     {
172         i = PSinv [Ai [p]] - k1 ;
173         if (i < 0) continue ;   /* skip entry outside the block */
174 
175         /* (i,k) is an entry in the block.  start a DFS at node i */
176         PRINTF (("\n ===== DFS at node %d in b, inew: %d\n", i, Pinv [i])) ;
177         if (Flag [i] != k)
178         {
179             if (Pinv [i] >= 0)
180             {
181                 top = dfs (i, k, Pinv, Llen, Lip, Stack, Flag,
182                            Lpend, top, LU, Lik, &l_length, Ap_pos) ;
183             }
184             else
185             {
186                 /* i is not pivotal, and not flagged. Flag and put in L */
187                 Flag [i] = k ;
188                 Lik [l_length] = i ;
189                 l_length++;
190             }
191         }
192     }
193 
194     /* If Llen [k] is zero, the matrix is structurally singular */
195     Llen [k] = l_length ;
196     return (top) ;
197 }
198 
199 
200 /* ========================================================================== */
201 /* === construct_column ===================================================== */
202 /* ========================================================================== */
203 
204 /* Construct the kth column of A, and the off-diagonal part, if requested.
205  * Scatter the numerical values into the workspace X, and construct the
206  * corresponding column of the off-diagonal matrix. */
207 
construct_column(Int k,Int Ap[],Int Ai[],Entry Ax[],Int Q[],Entry X[],Int k1,Int PSinv[],double Rs[],Int scale,Int Offp[],Int Offi[],Entry Offx[])208 static void construct_column
209 (
210     /* inputs, not modified on output */
211     Int k,          /* the column of A (or the column of the block) to get */
212     Int Ap [ ],
213     Int Ai [ ],
214     Entry Ax [ ],
215     Int Q [ ],      /* column pre-ordering */
216 
217     /* zero on input, modified on output */
218     Entry X [ ],
219 
220     /* ---- the following are only used in the BTF case --- */
221 
222     /* inputs, not modified on output */
223     Int k1,         /* the block of A is from k1 to k2-1 */
224     Int PSinv [ ],  /* inverse of P from symbolic factorization */
225     double Rs [ ],  /* scale factors for A */
226     Int scale,      /* 0: no scaling, nonzero: scale the rows with Rs */
227 
228     /* inputs, modified on output */
229     Int Offp [ ],   /* off-diagonal matrix (modified by this routine) */
230     Int Offi [ ],
231     Entry Offx [ ]
232 )
233 {
234     Entry aik ;
235     Int i, p, pend, oldcol, kglobal, poff, oldrow ;
236 
237     /* ---------------------------------------------------------------------- */
238     /* Scale and scatter the column into X. */
239     /* ---------------------------------------------------------------------- */
240 
241     kglobal = k + k1 ;          /* column k of the block is col kglobal of A */
242     poff = Offp [kglobal] ;     /* start of off-diagonal column */
243     oldcol = Q [kglobal] ;
244     pend = Ap [oldcol+1] ;
245 
246     if (scale <= 0)
247     {
248         /* no scaling */
249         for (p = Ap [oldcol] ; p < pend ; p++)
250         {
251             oldrow = Ai [p] ;
252             i = PSinv [oldrow] - k1 ;
253             aik = Ax [p] ;
254             if (i < 0)
255             {
256                 /* this is an entry in the off-diagonal part */
257                 Offi [poff] = oldrow ;
258                 Offx [poff] = aik ;
259                 poff++ ;
260             }
261             else
262             {
263                 /* (i,k) is an entry in the block.  scatter into X */
264                 X [i] = aik ;
265             }
266         }
267     }
268     else
269     {
270         /* row scaling */
271         for (p = Ap [oldcol] ; p < pend ; p++)
272         {
273             oldrow = Ai [p] ;
274             i = PSinv [oldrow] - k1 ;
275             aik = Ax [p] ;
276             SCALE_DIV (aik, Rs [oldrow]) ;
277             if (i < 0)
278             {
279                 /* this is an entry in the off-diagonal part */
280                 Offi [poff] = oldrow ;
281                 Offx [poff] = aik ;
282                 poff++ ;
283             }
284             else
285             {
286                 /* (i,k) is an entry in the block.  scatter into X */
287                 X [i] = aik ;
288             }
289         }
290     }
291 
292     Offp [kglobal+1] = poff ;   /* start of the next col of off-diag part */
293 }
294 
295 
296 /* ========================================================================== */
297 /* === lsolve_numeric ======================================================= */
298 /* ========================================================================== */
299 
300 /* Computes the numerical values of x, for the solution of Lx=b.  Note that x
301  * may include explicit zeros if numerical cancelation occurs.  L is assumed
302  * to be unit-diagonal, with possibly unsorted columns (but the first entry in
303  * the column must always be the diagonal entry). */
304 
lsolve_numeric(Int Pinv[],Unit * LU,Int Stack[],Int Lip[],Int top,Int n,Int Llen[],Entry X[])305 static void lsolve_numeric
306 (
307     /* input, not modified on output: */
308     Int Pinv [ ],       /* Pinv [i] = k if i is kth pivot row, or EMPTY if row i
309                          * is not yet pivotal.  */
310     Unit *LU,           /* LU factors (pattern and values) */
311     Int Stack [ ],      /* stack for dfs */
312     Int Lip [ ],        /* size n, Lip [k] is position in LU of column k of L */
313     Int top,            /* top of stack on input */
314     Int n,              /* A is n-by-n */
315     Int Llen [ ],       /* size n, Llen [k] = # nonzeros in column k of L */
316 
317     /* output, must be zero on input: */
318     Entry X [ ] /* size n, initially zero.  On output,
319                  * X [Ui [up1..up-1]] and X [Li [lp1..lp-1]]
320                  * contains the solution. */
321 
322 )
323 {
324     Entry xj ;
325     Entry *Lx ;
326     Int *Li ;
327     Int p, s, j, jnew, len ;
328 
329     /* solve Lx=b */
330     for (s = top ; s < n ; s++)
331     {
332         /* forward solve with column j of L */
333         j = Stack [s] ;
334         jnew = Pinv [j] ;
335         ASSERT (jnew >= 0) ;
336         xj = X [j] ;
337         GET_POINTER (LU, Lip, Llen, Li, Lx, jnew, len) ;
338         ASSERT (Lip [jnew] <= Lip [jnew+1]) ;
339         for (p = 0 ; p < len ; p++)
340         {
341             /*X [Li [p]] -= Lx [p] * xj ; */
342             MULT_SUB (X [Li [p]], Lx [p], xj) ;
343         }
344     }
345 }
346 
347 
348 /* ========================================================================== */
349 /* === lpivot =============================================================== */
350 /* ========================================================================== */
351 
352 /* Find a pivot via partial pivoting, and scale the column of L. */
353 
lpivot(Int diagrow,Int * p_pivrow,Entry * p_pivot,double * p_abs_pivot,double tol,Entry X[],Unit * LU,Int Lip[],Int Llen[],Int k,Int n,Int Pinv[],Int * p_firstrow,KLU_common * Common)354 static Int lpivot
355 (
356     Int diagrow,
357     Int *p_pivrow,
358     Entry *p_pivot,
359     double *p_abs_pivot,
360     double tol,
361     Entry X [ ],
362     Unit *LU,           /* LU factors (pattern and values) */
363     Int Lip [ ],
364     Int Llen [ ],
365     Int k,
366     Int n,
367 
368     Int Pinv [ ],       /* Pinv [i] = k if row i is kth pivot row, or EMPTY if
369                          * row i is not yet pivotal.  */
370 
371     Int *p_firstrow,
372     KLU_common *Common
373 )
374 {
375     Entry x, pivot, *Lx ;
376     double abs_pivot, xabs ;
377     Int p, i, ppivrow, pdiag, pivrow, *Li, last_row_index, firstrow, len ;
378 
379     pivrow = EMPTY ;
380     if (Llen [k] == 0)
381     {
382         /* matrix is structurally singular */
383         if (Common->halt_if_singular)
384         {
385             return (FALSE) ;
386         }
387         for (firstrow = *p_firstrow ; firstrow < n ; firstrow++)
388         {
389             PRINTF (("check %d\n", firstrow)) ;
390             if (Pinv [firstrow] < 0)
391             {
392                 /* found the lowest-numbered non-pivotal row.  Pick it. */
393                 pivrow = firstrow ;
394                 PRINTF (("Got pivotal row: %d\n", pivrow)) ;
395                 break ;
396             }
397         }
398         ASSERT (pivrow >= 0 && pivrow < n) ;
399         CLEAR (pivot) ;
400         *p_pivrow = pivrow ;
401         *p_pivot = pivot ;
402         *p_abs_pivot = 0 ;
403         *p_firstrow = firstrow ;
404         return (FALSE) ;
405     }
406 
407     pdiag = EMPTY ;
408     ppivrow = EMPTY ;
409     abs_pivot = EMPTY ;
410     i = Llen [k] - 1 ;
411     GET_POINTER (LU, Lip, Llen, Li, Lx, k, len) ;
412     last_row_index = Li [i] ;
413 
414     /* decrement the length by 1 */
415     Llen [k] = i ;
416     GET_POINTER (LU, Lip, Llen, Li, Lx, k, len) ;
417 
418     /* look in Li [0 ..Llen [k] - 1 ] for a pivot row */
419     for (p = 0 ; p < len ; p++)
420     {
421         /* gather the entry from X and store in L */
422         i = Li [p] ;
423         x = X [i] ;
424         CLEAR (X [i]) ;
425 
426         Lx [p] = x ;
427         /* xabs = ABS (x) ; */
428         ABS (xabs, x) ;
429 
430         /* find the diagonal */
431         if (i == diagrow)
432         {
433             pdiag = p ;
434         }
435 
436         /* find the partial-pivoting choice */
437         if (xabs > abs_pivot)
438         {
439             abs_pivot = xabs ;
440             ppivrow = p ;
441         }
442     }
443 
444     /* xabs = ABS (X [last_row_index]) ;*/
445     ABS (xabs, X [last_row_index]) ;
446     if (xabs > abs_pivot)
447     {
448         abs_pivot = xabs ;
449         ppivrow = EMPTY ;
450     }
451 
452     /* compare the diagonal with the largest entry */
453     if (last_row_index == diagrow)
454     {
455         if (xabs >= tol * abs_pivot)
456         {
457             abs_pivot = xabs ;
458             ppivrow = EMPTY ;
459         }
460     }
461     else if (pdiag != EMPTY)
462     {
463         /* xabs = ABS (Lx [pdiag]) ;*/
464         ABS (xabs, Lx [pdiag]) ;
465         if (xabs >= tol * abs_pivot)
466         {
467             /* the diagonal is large enough */
468             abs_pivot = xabs ;
469             ppivrow = pdiag ;
470         }
471     }
472 
473     if (ppivrow != EMPTY)
474     {
475         pivrow = Li [ppivrow] ;
476         pivot  = Lx [ppivrow] ;
477         /* overwrite the ppivrow values with last index values */
478         Li [ppivrow] = last_row_index ;
479         Lx [ppivrow] = X [last_row_index] ;
480     }
481     else
482     {
483         pivrow = last_row_index ;
484         pivot = X [last_row_index] ;
485     }
486     CLEAR (X [last_row_index]) ;
487 
488     *p_pivrow = pivrow ;
489     *p_pivot = pivot ;
490     *p_abs_pivot = abs_pivot ;
491     ASSERT (pivrow >= 0 && pivrow < n) ;
492 
493     if (IS_ZERO (pivot) && Common->halt_if_singular)
494     {
495         /* numerically singular case */
496         return (FALSE) ;
497     }
498 
499     /* divide L by the pivot value */
500     for (p = 0 ; p < Llen [k] ; p++)
501     {
502         /* Lx [p] /= pivot ; */
503         DIV (Lx [p], Lx [p], pivot) ;
504     }
505 
506     return (TRUE) ;
507 }
508 
509 
510 /* ========================================================================== */
511 /* === prune ================================================================ */
512 /* ========================================================================== */
513 
514 /* Prune the columns of L to reduce work in subsequent depth-first searches */
prune(Int Lpend[],Int Pinv[],Int k,Int pivrow,Unit * LU,Int Uip[],Int Lip[],Int Ulen[],Int Llen[])515 static void prune
516 (
517     /* input/output: */
518     Int Lpend [ ],      /* Lpend [j] marks symmetric pruning point for L(:,j) */
519 
520     /* input: */
521     Int Pinv [ ],       /* Pinv [i] = k if row i is kth pivot row, or EMPTY if
522                          * row i is not yet pivotal.  */
523     Int k,              /* prune using column k of U */
524     Int pivrow,         /* current pivot row */
525 
526     /* input/output: */
527     Unit *LU,           /* LU factors (pattern and values) */
528 
529     /* input */
530     Int Uip [ ],        /* size n, column pointers for U */
531     Int Lip [ ],        /* size n, column pointers for L */
532     Int Ulen [ ],       /* size n, column length of U */
533     Int Llen [ ]        /* size n, column length of L */
534 )
535 {
536     Entry x ;
537     Entry *Lx, *Ux ;
538     Int *Li, *Ui ;
539     Int p, i, j, p2, phead, ptail, llen, ulen ;
540 
541     /* check to see if any column of L can be pruned */
542     /* Ux is set but not used.  This OK. */
543     GET_POINTER (LU, Uip, Ulen, Ui, Ux, k, ulen) ;
544     for (p = 0 ; p < ulen ; p++)
545     {
546         j = Ui [p] ;
547         ASSERT (j < k) ;
548         PRINTF (("%d is pruned: %d. Lpend[j] %d Lip[j+1] %d\n",
549             j, Lpend [j] != EMPTY, Lpend [j], Lip [j+1])) ;
550         if (Lpend [j] == EMPTY)
551         {
552             /* scan column j of L for the pivot row */
553             GET_POINTER (LU, Lip, Llen, Li, Lx, j, llen) ;
554             for (p2 = 0 ; p2 < llen ; p2++)
555             {
556                 if (pivrow == Li [p2])
557                 {
558                     /* found it!  This column can be pruned */
559 #ifndef NDEBUG
560                     PRINTF (("==== PRUNE: col j %d of L\n", j)) ;
561                     {
562                         Int p3 ;
563                         for (p3 = 0 ; p3 < Llen [j] ; p3++)
564                         {
565                             PRINTF (("before: %i  pivotal: %d\n", Li [p3],
566                                         Pinv [Li [p3]] >= 0)) ;
567                         }
568                     }
569 #endif
570 
571                     /* partition column j of L.  The unit diagonal of L
572                      * is not stored in the column of L. */
573                     phead = 0 ;
574                     ptail = Llen [j] ;
575                     while (phead < ptail)
576                     {
577                         i = Li [phead] ;
578                         if (Pinv [i] >= 0)
579                         {
580                             /* leave at the head */
581                             phead++ ;
582                         }
583                         else
584                         {
585                             /* swap with the tail */
586                             ptail-- ;
587                             Li [phead] = Li [ptail] ;
588                             Li [ptail] = i ;
589                             x = Lx [phead] ;
590                             Lx [phead] = Lx [ptail] ;
591                             Lx [ptail] = x ;
592                         }
593                     }
594 
595                     /* set Lpend to one past the last entry in the
596                      * first part of the column of L.  Entries in
597                      * Li [0 ... Lpend [j]-1] are the only part of
598                      * column j of L that needs to be scanned in the DFS.
599                      * Lpend [j] was EMPTY; setting it >= 0 also flags
600                      * column j as pruned. */
601                     Lpend [j] = ptail ;
602 
603 #ifndef NDEBUG
604                     {
605                         Int p3 ;
606                         for (p3 = 0 ; p3 < Llen [j] ; p3++)
607                         {
608                             if (p3 == Lpend [j]) PRINTF (("----\n")) ;
609                             PRINTF (("after: %i  pivotal: %d\n", Li [p3],
610                                         Pinv [Li [p3]] >= 0)) ;
611                         }
612                     }
613 #endif
614 
615                     break ;
616                 }
617             }
618         }
619     }
620 }
621 
622 
623 /* ========================================================================== */
624 /* === KLU_kernel =========================================================== */
625 /* ========================================================================== */
626 
KLU_kernel(Int n,Int Ap[],Int Ai[],Entry Ax[],Int Q[],size_t lusize,Int Pinv[],Int P[],Unit ** p_LU,Entry Udiag[],Int Llen[],Int Ulen[],Int Lip[],Int Uip[],Int * lnz,Int * unz,Entry X[],Int Stack[],Int Flag[],Int Ap_pos[],Int Lpend[],Int k1,Int PSinv[],double Rs[],Int Offp[],Int Offi[],Entry Offx[],KLU_common * Common)627 size_t KLU_kernel   /* final size of LU on output */
628 (
629     /* input, not modified */
630     Int n,          /* A is n-by-n */
631     Int Ap [ ],     /* size n+1, column pointers for A */
632     Int Ai [ ],     /* size nz = Ap [n], row indices for A */
633     Entry Ax [ ],   /* size nz, values of A */
634     Int Q [ ],      /* size n, optional input permutation */
635     size_t lusize,  /* initial size of LU on input */
636 
637     /* output, not defined on input */
638     Int Pinv [ ],   /* size n, inverse row permutation, where Pinv [i] = k if
639                      * row i is the kth pivot row */
640     Int P [ ],      /* size n, row permutation, where P [k] = i if row i is the
641                      * kth pivot row. */
642     Unit **p_LU,        /* LU array, size lusize on input */
643     Entry Udiag [ ],    /* size n, diagonal of U */
644     Int Llen [ ],       /* size n, column length of L */
645     Int Ulen [ ],       /* size n, column length of U */
646     Int Lip [ ],        /* size n, column pointers for L */
647     Int Uip [ ],        /* size n, column pointers for U */
648     Int *lnz,           /* size of L*/
649     Int *unz,           /* size of U*/
650     /* workspace, not defined on input */
651     Entry X [ ],    /* size n, undefined on input, zero on output */
652 
653     /* workspace, not defined on input or output */
654     Int Stack [ ],  /* size n */
655     Int Flag [ ],   /* size n */
656     Int Ap_pos [ ],     /* size n */
657 
658     /* other workspace: */
659     Int Lpend [ ],                  /* size n workspace, for pruning only */
660 
661     /* inputs, not modified on output */
662     Int k1,             /* the block of A is from k1 to k2-1 */
663     Int PSinv [ ],      /* inverse of P from symbolic factorization */
664     double Rs [ ],      /* scale factors for A */
665 
666     /* inputs, modified on output */
667     Int Offp [ ],   /* off-diagonal matrix (modified by this routine) */
668     Int Offi [ ],
669     Entry Offx [ ],
670     /* --------------- */
671     KLU_common *Common
672 )
673 {
674     Entry pivot ;
675     double abs_pivot, xsize, nunits, tol, memgrow ;
676     Entry *Ux ;
677     Int *Li, *Ui ;
678     Unit *LU ;          /* LU factors (pattern and values) */
679     Int k, p, i, j, pivrow = 0, kbar, diagrow, firstrow, lup, top, scale, len ;
680     size_t newlusize ;
681 
682 #ifndef NDEBUG
683     Entry *Lx ;
684 #endif
685 
686     ASSERT (Common != NULL) ;
687     scale = Common->scale ;
688     tol = Common->tol ;
689     memgrow = Common->memgrow ;
690     *lnz = 0 ;
691     *unz = 0 ;
692     CLEAR (pivot) ;
693 
694     /* ---------------------------------------------------------------------- */
695     /* get initial Li, Lx, Ui, and Ux */
696     /* ---------------------------------------------------------------------- */
697 
698     PRINTF (("input: lusize %d \n", lusize)) ;
699     ASSERT (lusize > 0) ;
700     LU = *p_LU ;
701 
702     /* ---------------------------------------------------------------------- */
703     /* initializations */
704     /* ---------------------------------------------------------------------- */
705 
706     firstrow = 0 ;
707     lup = 0 ;
708 
709     for (k = 0 ; k < n ; k++)
710     {
711         /* X [k] = 0 ; */
712         CLEAR (X [k]) ;
713         Flag [k] = EMPTY ;
714         Lpend [k] = EMPTY ;     /* flag k as not pruned */
715     }
716 
717     /* ---------------------------------------------------------------------- */
718     /* mark all rows as non-pivotal and determine initial diagonal mapping */
719     /* ---------------------------------------------------------------------- */
720 
721     /* PSinv does the symmetric permutation, so don't do it here */
722     for (k = 0 ; k < n ; k++)
723     {
724         P [k] = k ;
725         Pinv [k] = FLIP (k) ;   /* mark all rows as non-pivotal */
726     }
727     /* initialize the construction of the off-diagonal matrix */
728     Offp [0] = 0 ;
729 
730     /* P [k] = row means that UNFLIP (Pinv [row]) = k, and visa versa.
731      * If row is pivotal, then Pinv [row] >= 0.  A row is initially "flipped"
732      * (Pinv [k] < EMPTY), and then marked "unflipped" when it becomes
733      * pivotal. */
734 
735 #ifndef NDEBUG
736     for (k = 0 ; k < n ; k++)
737     {
738         PRINTF (("Initial P [%d] = %d\n", k, P [k])) ;
739     }
740 #endif
741 
742     /* ---------------------------------------------------------------------- */
743     /* factorize */
744     /* ---------------------------------------------------------------------- */
745 
746     for (k = 0 ; k < n ; k++)
747     {
748 
749         PRINTF (("\n\n==================================== k: %d\n", k)) ;
750 
751         /* ------------------------------------------------------------------ */
752         /* determine if LU factors have grown too big */
753         /* ------------------------------------------------------------------ */
754 
755         /* (n - k) entries for L and k entries for U */
756         nunits = DUNITS (Int, n - k) + DUNITS (Int, k) +
757                  DUNITS (Entry, n - k) + DUNITS (Entry, k) ;
758 
759         /* LU can grow by at most 'nunits' entries if the column is dense */
760         PRINTF (("lup %d lusize %g lup+nunits: %g\n", lup, (double) lusize,
761             lup+nunits));
762         xsize = ((double) lup) + nunits ;
763         if (xsize > (double) lusize)
764         {
765             /* check here how much to grow */
766             xsize = (memgrow * ((double) lusize) + 4*n + 1) ;
767             if (INT_OVERFLOW (xsize))
768             {
769                 PRINTF (("Matrix is too large (Int overflow)\n")) ;
770                 Common->status = KLU_TOO_LARGE ;
771                 return (lusize) ;
772             }
773             newlusize = memgrow * lusize + 2*n + 1 ;
774             /* Future work: retry mechanism in case of malloc failure */
775             LU = KLU_realloc (newlusize, lusize, sizeof (Unit), LU, Common) ;
776             Common->nrealloc++ ;
777             *p_LU = LU ;
778             if (Common->status == KLU_OUT_OF_MEMORY)
779             {
780                 PRINTF (("Matrix is too large (LU)\n")) ;
781                 return (lusize) ;
782             }
783             lusize = newlusize ;
784             PRINTF (("inc LU to %d done\n", lusize)) ;
785         }
786 
787         /* ------------------------------------------------------------------ */
788         /* start the kth column of L and U */
789         /* ------------------------------------------------------------------ */
790 
791         Lip [k] = lup ;
792 
793         /* ------------------------------------------------------------------ */
794         /* compute the nonzero pattern of the kth column of L and U */
795         /* ------------------------------------------------------------------ */
796 
797 #ifndef NDEBUG
798         for (i = 0 ; i < n ; i++)
799         {
800             ASSERT (Flag [i] < k) ;
801             /* ASSERT (X [i] == 0) ; */
802             ASSERT (IS_ZERO (X [i])) ;
803         }
804 #endif
805 
806         top = lsolve_symbolic (n, k, Ap, Ai, Q, Pinv, Stack, Flag,
807                     Lpend, Ap_pos, LU, lup, Llen, Lip, k1, PSinv) ;
808 
809 #ifndef NDEBUG
810         PRINTF (("--- in U:\n")) ;
811         for (p = top ; p < n ; p++)
812         {
813             PRINTF (("pattern of X for U: %d : %d pivot row: %d\n",
814                 p, Stack [p], Pinv [Stack [p]])) ;
815             ASSERT (Flag [Stack [p]] == k) ;
816         }
817         PRINTF (("--- in L:\n")) ;
818         Li = (Int *) (LU + Lip [k]);
819         for (p = 0 ; p < Llen [k] ; p++)
820         {
821             PRINTF (("pattern of X in L: %d : %d pivot row: %d\n",
822                 p, Li [p], Pinv [Li [p]])) ;
823             ASSERT (Flag [Li [p]] == k) ;
824         }
825         p = 0 ;
826         for (i = 0 ; i < n ; i++)
827         {
828             ASSERT (Flag [i] <= k) ;
829             if (Flag [i] == k) p++ ;
830         }
831 #endif
832 
833         /* ------------------------------------------------------------------ */
834         /* get the column of the matrix to factorize and scatter into X */
835         /* ------------------------------------------------------------------ */
836 
837         construct_column (k, Ap, Ai, Ax, Q, X,
838             k1, PSinv, Rs, scale, Offp, Offi, Offx) ;
839 
840         /* ------------------------------------------------------------------ */
841         /* compute the numerical values of the kth column (s = L \ A (:,k)) */
842         /* ------------------------------------------------------------------ */
843 
844         lsolve_numeric (Pinv, LU, Stack, Lip, top, n, Llen, X) ;
845 
846 #ifndef NDEBUG
847         for (p = top ; p < n ; p++)
848         {
849             PRINTF (("X for U %d : ",  Stack [p])) ;
850             PRINT_ENTRY (X [Stack [p]]) ;
851         }
852         Li = (Int *) (LU + Lip [k]) ;
853         for (p = 0 ; p < Llen [k] ; p++)
854         {
855             PRINTF (("X for L %d : ", Li [p])) ;
856             PRINT_ENTRY (X [Li [p]]) ;
857         }
858 #endif
859 
860         /* ------------------------------------------------------------------ */
861         /* partial pivoting with diagonal preference */
862         /* ------------------------------------------------------------------ */
863 
864         /* determine what the "diagonal" is */
865         diagrow = P [k] ;   /* might already be pivotal */
866         PRINTF (("k %d, diagrow = %d, UNFLIP (diagrow) = %d\n",
867             k, diagrow, UNFLIP (diagrow))) ;
868 
869         /* find a pivot and scale the pivot column */
870         if (!lpivot (diagrow, &pivrow, &pivot, &abs_pivot, tol, X, LU, Lip,
871                     Llen, k, n, Pinv, &firstrow, Common))
872         {
873             /* matrix is structurally or numerically singular */
874             Common->status = KLU_SINGULAR ;
875             if (Common->numerical_rank == EMPTY)
876             {
877                 Common->numerical_rank = k+k1 ;
878                 Common->singular_col = Q [k+k1] ;
879             }
880             if (Common->halt_if_singular)
881             {
882                 /* do not continue the factorization */
883                 return (lusize) ;
884             }
885         }
886 
887         /* we now have a valid pivot row, even if the column has NaN's or
888          * has no entries on or below the diagonal at all. */
889         PRINTF (("\nk %d : Pivot row %d : ", k, pivrow)) ;
890         PRINT_ENTRY (pivot) ;
891         ASSERT (pivrow >= 0 && pivrow < n) ;
892         ASSERT (Pinv [pivrow] < 0) ;
893 
894         /* set the Uip pointer */
895         Uip [k] = Lip [k] + UNITS (Int, Llen [k]) + UNITS (Entry, Llen [k]) ;
896 
897         /* move the lup pointer to the position where indices of U
898          * should be stored */
899         lup += UNITS (Int, Llen [k]) + UNITS (Entry, Llen [k]) ;
900 
901         Ulen [k] = n - top ;
902 
903         /* extract Stack [top..n-1] to Ui and the values to Ux and clear X */
904         GET_POINTER (LU, Uip, Ulen, Ui, Ux, k, len) ;
905         for (p = top, i = 0 ; p < n ; p++, i++)
906         {
907             j = Stack [p] ;
908             Ui [i] = Pinv [j] ;
909             Ux [i] = X [j] ;
910             CLEAR (X [j]) ;
911         }
912 
913         /* position the lu index at the starting point for next column */
914         lup += UNITS (Int, Ulen [k]) + UNITS (Entry, Ulen [k]) ;
915 
916         /* U(k,k) = pivot */
917         Udiag [k] = pivot ;
918 
919         /* ------------------------------------------------------------------ */
920         /* log the pivot permutation */
921         /* ------------------------------------------------------------------ */
922 
923         ASSERT (UNFLIP (Pinv [diagrow]) < n) ;
924         ASSERT (P [UNFLIP (Pinv [diagrow])] == diagrow) ;
925 
926         if (pivrow != diagrow)
927         {
928             /* an off-diagonal pivot has been chosen */
929             Common->noffdiag++ ;
930             PRINTF ((">>>>>>>>>>>>>>>>> pivrow %d k %d off-diagonal\n",
931                         pivrow, k)) ;
932             if (Pinv [diagrow] < 0)
933             {
934                 /* the former diagonal row index, diagrow, has not yet been
935                  * chosen as a pivot row.  Log this diagrow as the "diagonal"
936                  * entry in the column kbar for which the chosen pivot row,
937                  * pivrow, was originally logged as the "diagonal" */
938                 kbar = FLIP (Pinv [pivrow]) ;
939                 P [kbar] = diagrow ;
940                 Pinv [diagrow] = FLIP (kbar) ;
941             }
942         }
943         P [k] = pivrow ;
944         Pinv [pivrow] = k ;
945 
946 #ifndef NDEBUG
947         for (i = 0 ; i < n ; i++) { ASSERT (IS_ZERO (X [i])) ;}
948         GET_POINTER (LU, Uip, Ulen, Ui, Ux, k, len) ;
949         for (p = 0 ; p < len ; p++)
950         {
951             PRINTF (("Column %d of U: %d : ", k, Ui [p])) ;
952             PRINT_ENTRY (Ux [p]) ;
953         }
954         GET_POINTER (LU, Lip, Llen, Li, Lx, k, len) ;
955         for (p = 0 ; p < len ; p++)
956         {
957             PRINTF (("Column %d of L: %d : ", k, Li [p])) ;
958             PRINT_ENTRY (Lx [p]) ;
959         }
960 #endif
961 
962         /* ------------------------------------------------------------------ */
963         /* symmetric pruning */
964         /* ------------------------------------------------------------------ */
965 
966         prune (Lpend, Pinv, k, pivrow, LU, Uip, Lip, Ulen, Llen) ;
967 
968         *lnz += Llen [k] + 1 ; /* 1 added to lnz for diagonal */
969         *unz += Ulen [k] + 1 ; /* 1 added to unz for diagonal */
970     }
971 
972     /* ---------------------------------------------------------------------- */
973     /* finalize column pointers for L and U, and put L in the pivotal order */
974     /* ---------------------------------------------------------------------- */
975 
976     for (p = 0 ; p < n ; p++)
977     {
978         Li = (Int *) (LU + Lip [p]) ;
979         for (i = 0 ; i < Llen [p] ; i++)
980         {
981             Li [i] = Pinv [Li [i]] ;
982         }
983     }
984 
985 #ifndef NDEBUG
986     for (i = 0 ; i < n ; i++)
987     {
988         PRINTF (("P [%d] = %d   Pinv [%d] = %d\n", i, P [i], i, Pinv [i])) ;
989     }
990     for (i = 0 ; i < n ; i++)
991     {
992         ASSERT (Pinv [i] >= 0 && Pinv [i] < n) ;
993         ASSERT (P [i] >= 0 && P [i] < n) ;
994         ASSERT (P [Pinv [i]] == i) ;
995         ASSERT (IS_ZERO (X [i])) ;
996     }
997 #endif
998 
999     /* ---------------------------------------------------------------------- */
1000     /* shrink the LU factors to just the required size */
1001     /* ---------------------------------------------------------------------- */
1002 
1003     newlusize = lup ;
1004     ASSERT ((size_t) newlusize <= lusize) ;
1005 
1006     /* this cannot fail, since the block is descreasing in size */
1007     LU = KLU_realloc (newlusize, lusize, sizeof (Unit), LU, Common) ;
1008     *p_LU = LU ;
1009     return (newlusize) ;
1010 }
1011