1 /* ========================================================================== */
2 /* === klu ================================================================== */
3 /* ========================================================================== */
4 
5 /* KLU: factorizes P*A into L*U, using the Gilbert-Peierls algorithm [1], with
6  * optional symmetric pruning by Eisenstat and Liu [2].  The code is by Tim
7  * Davis.  This algorithm is what appears as the default sparse LU routine in
8  * MATLAB version 6.0, and still appears in MATLAB 6.5 as [L,U,P] = lu (A).
9  * Note that no column ordering is provided (see COLAMD or AMD for suitable
10  * orderings).  SuperLU is based on this algorithm, except that it adds the
11  * use of dense matrix operations on "supernodes" (adjacent columns with
12  * identical).  This code doesn't use supernodes, thus its name ("Kent" LU,
13  * as in "Clark Kent", in contrast with Super-LU...).  This algorithm is slower
14  * than SuperLU and UMFPACK for large matrices with lots of nonzeros in their
15  * factors (such as for most finite-element problems).  However, for matrices
16  * with very sparse LU factors, this algorithm is typically faster than both
17  * SuperLU and UMFPACK, since in this case there is little chance to exploit
18  * dense matrix kernels (the BLAS).
19  *
20  * Only one block of A is factorized, in the BTF form.  The input n is the
21  * size of the block; k1 is the first row and column in the block.
22  *
23  * NOTE: no error checking is done on the inputs.  This version is not meant to
24  * be called directly by the user.  Use klu_factor instead.
25  *
26  * No fill-reducing ordering is provided.  The ordering quality of
27  * klu_kernel_factor is the responsibility of the caller.  The input A must
28  * pre-permuted to reduce fill-in, or fill-reducing input permutation Q must
29  * be provided.
30  *
31  * The input matrix A must be in compressed-column form, with either sorted
32  * or unsorted row indices.  Row indices for column j of A is in
33  * Ai [Ap [j] ... Ap [j+1]-1] and the same range of indices in Ax holds the
34  * numerical values.  No duplicate entries are allowed.
35  *
36  * Copyright 2004-2009, Tim Davis.  All rights reserved.  See the README
37  * file for details on permitted use.  Note that no code from The MathWorks,
38  * Inc, or from SuperLU, or from any other source appears here.  The code is
39  * written from scratch, from the algorithmic description in Gilbert & Peierls'
40  * and Eisenstat & Liu's journal papers [1,2].
41  *
42  * If an input permutation Q is provided, the factorization L*U = A (P,Q)
43  * is computed, where P is determined by partial pivoting, and Q is the input
44  * ordering.  If the pivot tolerance is less than 1, the "diagonal" entry that
45  * KLU attempts to choose is the diagonal of A (Q,Q).  In other words, the
46  * input permutation is applied symmetrically to the input matrix.  The output
47  * permutation P includes both the partial pivoting ordering and the input
48  * permutation.  If Q is NULL, then it is assumed to be the identity
49  * permutation.  Q is not modified.
50  *
51  * [1] Gilbert, J. R. and Peierls, T., "Sparse Partial Pivoting in Time
52  *      Proportional to Arithmetic Operations," SIAM J. Sci. Stat. Comp.,
53  *      vol 9, pp.  862-874, 1988.
54  * [2] Eisenstat, S. C. and Liu, J. W. H., "Exploiting Structural Symmetry in
55  *      Unsymmetric Sparse Symbolic Factorization," SIAM J. Matrix Analysis &
56  *      Applic., vol 13, pp.  202-211, 1992.
57  */
58 
59 /* ========================================================================== */
60 
61 #include "klu_internal.h"
62 
KLU_kernel_factor(Int n,Int Ap[],Int Ai[],Entry Ax[],Int Q[],double Lsize,Unit ** p_LU,Entry Udiag[],Int Llen[],Int Ulen[],Int Lip[],Int Uip[],Int P[],Int * lnz,Int * unz,Entry * X,Int * Work,Int k1,Int PSinv[],double Rs[],Int Offp[],Int Offi[],Entry Offx[],KLU_common * Common)63 size_t KLU_kernel_factor            /* 0 if failure, size of LU if OK */
64 (
65     /* inputs, not modified */
66     Int n,          /* A is n-by-n. n must be > 0. */
67     Int Ap [ ],     /* size n+1, column pointers for A */
68     Int Ai [ ],     /* size nz = Ap [n], row indices for A */
69     Entry Ax [ ],   /* size nz, values of A */
70     Int Q [ ],      /* size n, optional column permutation */
71     double Lsize,   /* estimate of number of nonzeros in L */
72 
73     /* outputs, not defined on input */
74     Unit **p_LU,        /* row indices and values of L and U */
75     Entry Udiag [ ],    /* size n, diagonal of U */
76     Int Llen [ ],       /* size n, column length of L */
77     Int Ulen [ ],       /* size n, column length of U */
78     Int Lip [ ],        /* size n, column pointers for L */
79     Int Uip [ ],        /* size n, column pointers for U */
80     Int P [ ],          /* row permutation, size n */
81     Int *lnz,           /* size of L */
82     Int *unz,           /* size of U */
83 
84     /* workspace, undefined on input */
85     Entry *X,       /* size n double's, zero on output */
86     Int *Work,      /* size 5n Int's */
87 
88     /* inputs, not modified on output */
89     Int k1,             /* the block of A is from k1 to k2-1 */
90     Int PSinv [ ],      /* inverse of P from symbolic factorization */
91     double Rs [ ],      /* scale factors for A */
92 
93     /* inputs, modified on output */
94     Int Offp [ ],   /* off-diagonal matrix (modified by this routine) */
95     Int Offi [ ],
96     Entry Offx [ ],
97     /* --------------- */
98     KLU_common *Common
99 )
100 {
101     double maxlnz, dunits ;
102     Unit *LU ;
103     Int *Pinv, *Lpend, *Stack, *Flag, *Ap_pos, *W ;
104     Int lsize, usize, anz, ok ;
105     size_t lusize ;
106     ASSERT (Common != NULL) ;
107 
108     /* ---------------------------------------------------------------------- */
109     /* get control parameters, or use defaults */
110     /* ---------------------------------------------------------------------- */
111 
112     n = MAX (1, n) ;
113     anz = Ap [n+k1] - Ap [k1] ;
114 
115     if (Lsize <= 0)
116     {
117         Lsize = -Lsize ;
118         Lsize = MAX (Lsize, 1.0) ;
119         lsize = Lsize * anz + n ;
120     }
121     else
122     {
123         lsize = Lsize ;
124     }
125 
126     usize = lsize ;
127 
128     lsize  = MAX (n+1, lsize) ;
129     usize  = MAX (n+1, usize) ;
130 
131     maxlnz = (((double) n) * ((double) n) + ((double) n)) / 2. ;
132     maxlnz = MIN (maxlnz, ((double) Int_MAX)) ;
133     lsize  = MIN (maxlnz, lsize) ;
134     usize  = MIN (maxlnz, usize) ;
135 
136     PRINTF (("Welcome to klu: n %d anz %d k1 %d lsize %d usize %d maxlnz %g\n",
137         n, anz, k1, lsize, usize, maxlnz)) ;
138 
139     /* ---------------------------------------------------------------------- */
140     /* allocate workspace and outputs */
141     /* ---------------------------------------------------------------------- */
142 
143     /* return arguments are not yet assigned */
144     *p_LU = (Unit *) NULL ;
145 
146     /* these computations are safe from size_t overflow */
147     W = Work ;
148     Pinv = (Int *) W ;      W += n ;
149     Stack = (Int *) W ;     W += n ;
150     Flag = (Int *) W ;      W += n ;
151     Lpend = (Int *) W ;     W += n ;
152     Ap_pos = (Int *) W ;    W += n ;
153 
154     dunits = DUNITS (Int, lsize) + DUNITS (Entry, lsize) +
155              DUNITS (Int, usize) + DUNITS (Entry, usize) ;
156     lusize = (size_t) dunits ;
157     ok = !INT_OVERFLOW (dunits) ;
158     LU = ok ? KLU_malloc (lusize, sizeof (Unit), Common) : NULL ;
159     if (LU == NULL)
160     {
161         /* out of memory, or problem too large */
162         Common->status = KLU_OUT_OF_MEMORY ;
163         lusize = 0 ;
164         return (lusize) ;
165     }
166 
167     /* ---------------------------------------------------------------------- */
168     /* factorize */
169     /* ---------------------------------------------------------------------- */
170 
171     /* with pruning, and non-recursive depth-first-search */
172     lusize = KLU_kernel (n, Ap, Ai, Ax, Q, lusize,
173             Pinv, P, &LU, Udiag, Llen, Ulen, Lip, Uip, lnz, unz,
174             X, Stack, Flag, Ap_pos, Lpend,
175             k1, PSinv, Rs, Offp, Offi, Offx, Common) ;
176 
177     /* ---------------------------------------------------------------------- */
178     /* return LU factors, or return nothing if an error occurred */
179     /* ---------------------------------------------------------------------- */
180 
181     if (Common->status < KLU_OK)
182     {
183         LU = KLU_free (LU, lusize, sizeof (Unit), Common) ;
184         lusize = 0 ;
185     }
186     *p_LU = LU ;
187     PRINTF ((" in klu noffdiag %d\n", Common->noffdiag)) ;
188     return (lusize) ;
189 }
190 
191 
192 /* ========================================================================== */
193 /* === KLU_lsolve =========================================================== */
194 /* ========================================================================== */
195 
196 /* Solve Lx=b.  Assumes L is unit lower triangular and where the unit diagonal
197  * entry is NOT stored.  Overwrites B  with the solution X.  B is n-by-nrhs
198  * and is stored in ROW form with row dimension nrhs.  nrhs must be in the
199  * range 1 to 4. */
200 
KLU_lsolve(Int n,Int Lip[],Int Llen[],Unit LU[],Int nrhs,Entry X[])201 void KLU_lsolve
202 (
203     /* inputs, not modified: */
204     Int n,
205     Int Lip [ ],
206     Int Llen [ ],
207     Unit LU [ ],
208     Int nrhs,
209     /* right-hand-side on input, solution to Lx=b on output */
210     Entry X [ ]
211 )
212 {
213     Entry x [4], lik ;
214     Int *Li ;
215     Entry *Lx ;
216     Int k, p, len, i ;
217 
218     switch (nrhs)
219     {
220 
221         case 1:
222             for (k = 0 ; k < n ; k++)
223             {
224                 x [0] = X [k] ;
225                 GET_POINTER (LU, Lip, Llen, Li, Lx, k, len) ;
226                 /* unit diagonal of L is not stored*/
227                 for (p = 0 ; p < len ; p++)
228                 {
229                     /* X [Li [p]] -= Lx [p] * x [0] ; */
230                     MULT_SUB (X [Li [p]], Lx [p], x [0]) ;
231                 }
232             }
233             break ;
234 
235         case 2:
236 
237             for (k = 0 ; k < n ; k++)
238             {
239                 x [0] = X [2*k    ] ;
240                 x [1] = X [2*k + 1] ;
241                 GET_POINTER (LU, Lip, Llen, Li, Lx, k, len) ;
242                 for (p = 0 ; p < len ; p++)
243                 {
244                     i = Li [p] ;
245                     lik = Lx [p] ;
246                     MULT_SUB (X [2*i], lik, x [0]) ;
247                     MULT_SUB (X [2*i + 1], lik, x [1]) ;
248                 }
249             }
250             break ;
251 
252         case 3:
253 
254             for (k = 0 ; k < n ; k++)
255             {
256                 x [0] = X [3*k    ] ;
257                 x [1] = X [3*k + 1] ;
258                 x [2] = X [3*k + 2] ;
259                 GET_POINTER (LU, Lip, Llen, Li, Lx, k, len) ;
260                 for (p = 0 ; p < len ; p++)
261                 {
262                     i = Li [p] ;
263                     lik = Lx [p] ;
264                     MULT_SUB (X [3*i], lik, x [0]) ;
265                     MULT_SUB (X [3*i + 1], lik, x [1]) ;
266                     MULT_SUB (X [3*i + 2], lik, x [2]) ;
267                 }
268             }
269             break ;
270 
271         case 4:
272 
273             for (k = 0 ; k < n ; k++)
274             {
275                 x [0] = X [4*k    ] ;
276                 x [1] = X [4*k + 1] ;
277                 x [2] = X [4*k + 2] ;
278                 x [3] = X [4*k + 3] ;
279                 GET_POINTER (LU, Lip, Llen, Li, Lx, k, len) ;
280                 for (p = 0 ; p < len ; p++)
281                 {
282                     i = Li [p] ;
283                     lik = Lx [p] ;
284                     MULT_SUB (X [4*i], lik, x [0]) ;
285                     MULT_SUB (X [4*i + 1], lik, x [1]) ;
286                     MULT_SUB (X [4*i + 2], lik, x [2]) ;
287                     MULT_SUB (X [4*i + 3], lik, x [3]) ;
288                 }
289             }
290             break ;
291 
292     }
293 }
294 
295 /* ========================================================================== */
296 /* === KLU_usolve =========================================================== */
297 /* ========================================================================== */
298 
299 /* Solve Ux=b.  Assumes U is non-unit upper triangular and where the diagonal
300  * entry is NOT stored.  Overwrites B with the solution X.  B is n-by-nrhs
301  * and is stored in ROW form with row dimension nrhs.  nrhs must be in the
302  * range 1 to 4. */
303 
KLU_usolve(Int n,Int Uip[],Int Ulen[],Unit LU[],Entry Udiag[],Int nrhs,Entry X[])304 void KLU_usolve
305 (
306     /* inputs, not modified: */
307     Int n,
308     Int Uip [ ],
309     Int Ulen [ ],
310     Unit LU [ ],
311     Entry Udiag [ ],
312     Int nrhs,
313     /* right-hand-side on input, solution to Ux=b on output */
314     Entry X [ ]
315 )
316 {
317     Entry x [4], uik, ukk ;
318     Int *Ui ;
319     Entry *Ux ;
320     Int k, p, len, i ;
321 
322     switch (nrhs)
323     {
324 
325         case 1:
326 
327             for (k = n-1 ; k >= 0 ; k--)
328             {
329                 GET_POINTER (LU, Uip, Ulen, Ui, Ux, k, len) ;
330                 /* x [0] = X [k] / Udiag [k] ; */
331                 DIV (x [0], X [k], Udiag [k]) ;
332                 X [k] = x [0] ;
333                 for (p = 0 ; p < len ; p++)
334                 {
335                     /* X [Ui [p]] -= Ux [p] * x [0] ; */
336                     MULT_SUB (X [Ui [p]], Ux [p], x [0]) ;
337 
338                 }
339             }
340 
341             break ;
342 
343         case 2:
344 
345             for (k = n-1 ; k >= 0 ; k--)
346             {
347                 GET_POINTER (LU, Uip, Ulen, Ui, Ux, k, len) ;
348                 ukk = Udiag [k] ;
349                 /* x [0] = X [2*k    ] / ukk ;
350                 x [1] = X [2*k + 1] / ukk ; */
351                 DIV (x [0], X [2*k], ukk) ;
352                 DIV (x [1], X [2*k + 1], ukk) ;
353 
354                 X [2*k    ] = x [0] ;
355                 X [2*k + 1] = x [1] ;
356                 for (p = 0 ; p < len ; p++)
357                 {
358                     i = Ui [p] ;
359                     uik = Ux [p] ;
360                     /* X [2*i    ] -= uik * x [0] ;
361                     X [2*i + 1] -= uik * x [1] ; */
362                     MULT_SUB (X [2*i], uik, x [0]) ;
363                     MULT_SUB (X [2*i + 1], uik, x [1]) ;
364                 }
365             }
366 
367             break ;
368 
369         case 3:
370 
371             for (k = n-1 ; k >= 0 ; k--)
372             {
373                 GET_POINTER (LU, Uip, Ulen, Ui, Ux, k, len) ;
374                 ukk = Udiag [k] ;
375 
376                 DIV (x [0], X [3*k], ukk) ;
377                 DIV (x [1], X [3*k + 1], ukk) ;
378                 DIV (x [2], X [3*k + 2], ukk) ;
379 
380                 X [3*k    ] = x [0] ;
381                 X [3*k + 1] = x [1] ;
382                 X [3*k + 2] = x [2] ;
383                 for (p = 0 ; p < len ; p++)
384                 {
385                     i = Ui [p] ;
386                     uik = Ux [p] ;
387                     MULT_SUB (X [3*i], uik, x [0]) ;
388                     MULT_SUB (X [3*i + 1], uik, x [1]) ;
389                     MULT_SUB (X [3*i + 2], uik, x [2]) ;
390                 }
391             }
392 
393             break ;
394 
395         case 4:
396 
397             for (k = n-1 ; k >= 0 ; k--)
398             {
399                 GET_POINTER (LU, Uip, Ulen, Ui, Ux, k, len) ;
400                 ukk = Udiag [k] ;
401 
402                 DIV (x [0], X [4*k], ukk) ;
403                 DIV (x [1], X [4*k + 1], ukk) ;
404                 DIV (x [2], X [4*k + 2], ukk) ;
405                 DIV (x [3], X [4*k + 3], ukk) ;
406 
407                 X [4*k    ] = x [0] ;
408                 X [4*k + 1] = x [1] ;
409                 X [4*k + 2] = x [2] ;
410                 X [4*k + 3] = x [3] ;
411                 for (p = 0 ; p < len ; p++)
412                 {
413                     i = Ui [p] ;
414                     uik = Ux [p] ;
415 
416                     MULT_SUB (X [4*i], uik, x [0]) ;
417                     MULT_SUB (X [4*i + 1], uik, x [1]) ;
418                     MULT_SUB (X [4*i + 2], uik, x [2]) ;
419                     MULT_SUB (X [4*i + 3], uik, x [3]) ;
420                 }
421             }
422 
423             break ;
424 
425     }
426 }
427 
428 
429 /* ========================================================================== */
430 /* === KLU_ltsolve ========================================================== */
431 /* ========================================================================== */
432 
433 /* Solve L'x=b.  Assumes L is unit lower triangular and where the unit diagonal
434  * entry is NOT stored.  Overwrites B with the solution X.  B is n-by-nrhs
435  * and is stored in ROW form with row dimension nrhs.  nrhs must in the
436  * range 1 to 4. */
437 
KLU_ltsolve(Int n,Int Lip[],Int Llen[],Unit LU[],Int nrhs,Int conj_solve,Entry X[])438 void KLU_ltsolve
439 (
440     /* inputs, not modified: */
441     Int n,
442     Int Lip [ ],
443     Int Llen [ ],
444     Unit LU [ ],
445     Int nrhs,
446 #ifdef COMPLEX
447     Int conj_solve,
448 #endif
449     /* right-hand-side on input, solution to L'x=b on output */
450     Entry X [ ]
451 )
452 {
453     Entry x [4], lik ;
454     Int *Li ;
455     Entry *Lx ;
456     Int k, p, len, i ;
457 
458     switch (nrhs)
459     {
460 
461         case 1:
462 
463             for (k = n-1 ; k >= 0 ; k--)
464             {
465                 GET_POINTER (LU, Lip, Llen, Li, Lx, k, len) ;
466                 x [0] = X [k] ;
467                 for (p = 0 ; p < len ; p++)
468                 {
469 #ifdef COMPLEX
470                     if (conj_solve)
471                     {
472                         /* x [0] -= CONJ (Lx [p]) * X [Li [p]] ; */
473                         MULT_SUB_CONJ (x [0], X [Li [p]], Lx [p]) ;
474                     }
475                     else
476 #endif
477                     {
478                         /*x [0] -= Lx [p] * X [Li [p]] ;*/
479                         MULT_SUB (x [0], Lx [p], X [Li [p]]) ;
480                     }
481                 }
482                 X [k] = x [0] ;
483             }
484             break ;
485 
486         case 2:
487 
488             for (k = n-1 ; k >= 0 ; k--)
489             {
490                 x [0] = X [2*k    ] ;
491                 x [1] = X [2*k + 1] ;
492                 GET_POINTER (LU, Lip, Llen, Li, Lx, k, len) ;
493                 for (p = 0 ; p < len ; p++)
494                 {
495                     i = Li [p] ;
496 #ifdef COMPLEX
497                     if (conj_solve)
498                     {
499                         CONJ (lik, Lx [p]) ;
500                     }
501                     else
502 #endif
503                     {
504                         lik = Lx [p] ;
505                     }
506                     MULT_SUB (x [0], lik, X [2*i]) ;
507                     MULT_SUB (x [1], lik, X [2*i + 1]) ;
508                 }
509                 X [2*k    ] = x [0] ;
510                 X [2*k + 1] = x [1] ;
511             }
512             break ;
513 
514         case 3:
515 
516             for (k = n-1 ; k >= 0 ; k--)
517             {
518                 x [0] = X [3*k    ] ;
519                 x [1] = X [3*k + 1] ;
520                 x [2] = X [3*k + 2] ;
521                 GET_POINTER (LU, Lip, Llen, Li, Lx, k, len) ;
522                 for (p = 0 ; p < len ; p++)
523                 {
524                     i = Li [p] ;
525 #ifdef COMPLEX
526                     if (conj_solve)
527                     {
528                         CONJ (lik, Lx [p]) ;
529                     }
530                     else
531 #endif
532                     {
533                         lik = Lx [p] ;
534                     }
535                     MULT_SUB (x [0], lik, X [3*i]) ;
536                     MULT_SUB (x [1], lik, X [3*i + 1]) ;
537                     MULT_SUB (x [2], lik, X [3*i + 2]) ;
538                 }
539                 X [3*k    ] = x [0] ;
540                 X [3*k + 1] = x [1] ;
541                 X [3*k + 2] = x [2] ;
542             }
543             break ;
544 
545         case 4:
546 
547             for (k = n-1 ; k >= 0 ; k--)
548             {
549                 x [0] = X [4*k    ] ;
550                 x [1] = X [4*k + 1] ;
551                 x [2] = X [4*k + 2] ;
552                 x [3] = X [4*k + 3] ;
553                 GET_POINTER (LU, Lip, Llen, Li, Lx, k, len) ;
554                 for (p = 0 ; p < len ; p++)
555                 {
556                     i = Li [p] ;
557 #ifdef COMPLEX
558                     if (conj_solve)
559                     {
560                         CONJ (lik, Lx [p]) ;
561                     }
562                     else
563 #endif
564                     {
565                         lik = Lx [p] ;
566                     }
567                     MULT_SUB (x [0], lik, X [4*i]) ;
568                     MULT_SUB (x [1], lik, X [4*i + 1]) ;
569                     MULT_SUB (x [2], lik, X [4*i + 2]) ;
570                     MULT_SUB (x [3], lik, X [4*i + 3]) ;
571                 }
572                 X [4*k    ] = x [0] ;
573                 X [4*k + 1] = x [1] ;
574                 X [4*k + 2] = x [2] ;
575                 X [4*k + 3] = x [3] ;
576             }
577             break ;
578     }
579 }
580 
581 
582 /* ========================================================================== */
583 /* === KLU_utsolve ========================================================== */
584 /* ========================================================================== */
585 
586 /* Solve U'x=b.  Assumes U is non-unit upper triangular and where the diagonal
587  * entry is stored (and appears last in each column of U).  Overwrites B
588  * with the solution X.  B is n-by-nrhs and is stored in ROW form with row
589  * dimension nrhs.  nrhs must be in the range 1 to 4. */
590 
KLU_utsolve(Int n,Int Uip[],Int Ulen[],Unit LU[],Entry Udiag[],Int nrhs,Int conj_solve,Entry X[])591 void KLU_utsolve
592 (
593     /* inputs, not modified: */
594     Int n,
595     Int Uip [ ],
596     Int Ulen [ ],
597     Unit LU [ ],
598     Entry Udiag [ ],
599     Int nrhs,
600 #ifdef COMPLEX
601     Int conj_solve,
602 #endif
603     /* right-hand-side on input, solution to Ux=b on output */
604     Entry X [ ]
605 )
606 {
607     Entry x [4], uik, ukk ;
608     Int k, p, len, i ;
609     Int *Ui ;
610     Entry *Ux ;
611 
612     switch (nrhs)
613     {
614 
615         case 1:
616 
617             for (k = 0 ; k < n ; k++)
618             {
619                 GET_POINTER (LU, Uip, Ulen, Ui, Ux, k, len) ;
620                 x [0] = X [k] ;
621                 for (p = 0 ; p < len ; p++)
622                 {
623 #ifdef COMPLEX
624                     if (conj_solve)
625                     {
626                         /* x [0] -= CONJ (Ux [p]) * X [Ui [p]] ; */
627                         MULT_SUB_CONJ (x [0], X [Ui [p]], Ux [p]) ;
628                     }
629                     else
630 #endif
631                     {
632                         /* x [0] -= Ux [p] * X [Ui [p]] ; */
633                         MULT_SUB (x [0], Ux [p], X [Ui [p]]) ;
634                     }
635                 }
636 #ifdef COMPLEX
637                 if (conj_solve)
638                 {
639                     CONJ (ukk, Udiag [k]) ;
640                 }
641                 else
642 #endif
643                 {
644                     ukk = Udiag [k] ;
645                 }
646                 DIV (X [k], x [0], ukk) ;
647             }
648             break ;
649 
650         case 2:
651 
652             for (k = 0 ; k < n ; k++)
653             {
654                 GET_POINTER (LU, Uip, Ulen, Ui, Ux, k, len) ;
655                 x [0] = X [2*k    ] ;
656                 x [1] = X [2*k + 1] ;
657                 for (p = 0 ; p < len ; p++)
658                 {
659                     i = Ui [p] ;
660 #ifdef COMPLEX
661                     if (conj_solve)
662                     {
663                         CONJ (uik, Ux [p]) ;
664                     }
665                     else
666 #endif
667                     {
668                         uik = Ux [p] ;
669                     }
670                     MULT_SUB (x [0], uik, X [2*i]) ;
671                     MULT_SUB (x [1], uik, X [2*i + 1]) ;
672                 }
673 #ifdef COMPLEX
674                 if (conj_solve)
675                 {
676                     CONJ (ukk, Udiag [k]) ;
677                 }
678                 else
679 #endif
680                 {
681                     ukk = Udiag [k] ;
682                 }
683                 DIV (X [2*k], x [0], ukk) ;
684                 DIV (X [2*k + 1], x [1], ukk) ;
685             }
686             break ;
687 
688         case 3:
689 
690             for (k = 0 ; k < n ; k++)
691             {
692                 GET_POINTER (LU, Uip, Ulen, Ui, Ux, k, len) ;
693                 x [0] = X [3*k    ] ;
694                 x [1] = X [3*k + 1] ;
695                 x [2] = X [3*k + 2] ;
696                 for (p = 0 ; p < len ; p++)
697                 {
698                     i = Ui [p] ;
699 #ifdef COMPLEX
700                     if (conj_solve)
701                     {
702                         CONJ (uik, Ux [p]) ;
703                     }
704                     else
705 #endif
706                     {
707                         uik = Ux [p] ;
708                     }
709                     MULT_SUB (x [0], uik, X [3*i]) ;
710                     MULT_SUB (x [1], uik, X [3*i + 1]) ;
711                     MULT_SUB (x [2], uik, X [3*i + 2]) ;
712                 }
713 #ifdef COMPLEX
714                 if (conj_solve)
715                 {
716                     CONJ (ukk, Udiag [k]) ;
717                 }
718                 else
719 #endif
720                 {
721                     ukk = Udiag [k] ;
722                 }
723                 DIV (X [3*k], x [0], ukk) ;
724                 DIV (X [3*k + 1], x [1], ukk) ;
725                 DIV (X [3*k + 2], x [2], ukk) ;
726             }
727             break ;
728 
729         case 4:
730 
731             for (k = 0 ; k < n ; k++)
732             {
733                 GET_POINTER (LU, Uip, Ulen, Ui, Ux, k, len) ;
734                 x [0] = X [4*k    ] ;
735                 x [1] = X [4*k + 1] ;
736                 x [2] = X [4*k + 2] ;
737                 x [3] = X [4*k + 3] ;
738                 for (p = 0 ; p < len ; p++)
739                 {
740                     i = Ui [p] ;
741 #ifdef COMPLEX
742                     if (conj_solve)
743                     {
744                         CONJ (uik, Ux [p]) ;
745                     }
746                     else
747 #endif
748                     {
749                         uik = Ux [p] ;
750                     }
751                     MULT_SUB (x [0], uik, X [4*i]) ;
752                     MULT_SUB (x [1], uik, X [4*i + 1]) ;
753                     MULT_SUB (x [2], uik, X [4*i + 2]) ;
754                     MULT_SUB (x [3], uik, X [4*i + 3]) ;
755                 }
756 #ifdef COMPLEX
757                 if (conj_solve)
758                 {
759                     CONJ (ukk, Udiag [k]) ;
760                 }
761                 else
762 #endif
763                 {
764                     ukk = Udiag [k] ;
765                 }
766                 DIV (X [4*k], x [0], ukk) ;
767                 DIV (X [4*k + 1], x [1], ukk) ;
768                 DIV (X [4*k + 2], x [2], ukk) ;
769                 DIV (X [4*k + 3], x [3], ukk) ;
770             }
771             break ;
772     }
773 }
774