1 /* ========================================================================== */
2 /* === klu ================================================================== */
3 /* ========================================================================== */
4
5 /* KLU: factorizes P*A into L*U, using the Gilbert-Peierls algorithm [1], with
6 * optional symmetric pruning by Eisenstat and Liu [2]. The code is by Tim
7 * Davis. This algorithm is what appears as the default sparse LU routine in
8 * MATLAB version 6.0, and still appears in MATLAB 6.5 as [L,U,P] = lu (A).
9 * Note that no column ordering is provided (see COLAMD or AMD for suitable
10 * orderings). SuperLU is based on this algorithm, except that it adds the
11 * use of dense matrix operations on "supernodes" (adjacent columns with
12 * identical). This code doesn't use supernodes, thus its name ("Kent" LU,
13 * as in "Clark Kent", in contrast with Super-LU...). This algorithm is slower
14 * than SuperLU and UMFPACK for large matrices with lots of nonzeros in their
15 * factors (such as for most finite-element problems). However, for matrices
16 * with very sparse LU factors, this algorithm is typically faster than both
17 * SuperLU and UMFPACK, since in this case there is little chance to exploit
18 * dense matrix kernels (the BLAS).
19 *
20 * Only one block of A is factorized, in the BTF form. The input n is the
21 * size of the block; k1 is the first row and column in the block.
22 *
23 * NOTE: no error checking is done on the inputs. This version is not meant to
24 * be called directly by the user. Use klu_factor instead.
25 *
26 * No fill-reducing ordering is provided. The ordering quality of
27 * klu_kernel_factor is the responsibility of the caller. The input A must
28 * pre-permuted to reduce fill-in, or fill-reducing input permutation Q must
29 * be provided.
30 *
31 * The input matrix A must be in compressed-column form, with either sorted
32 * or unsorted row indices. Row indices for column j of A is in
33 * Ai [Ap [j] ... Ap [j+1]-1] and the same range of indices in Ax holds the
34 * numerical values. No duplicate entries are allowed.
35 *
36 * Copyright 2004-2009, Tim Davis. All rights reserved. See the README
37 * file for details on permitted use. Note that no code from The MathWorks,
38 * Inc, or from SuperLU, or from any other source appears here. The code is
39 * written from scratch, from the algorithmic description in Gilbert & Peierls'
40 * and Eisenstat & Liu's journal papers [1,2].
41 *
42 * If an input permutation Q is provided, the factorization L*U = A (P,Q)
43 * is computed, where P is determined by partial pivoting, and Q is the input
44 * ordering. If the pivot tolerance is less than 1, the "diagonal" entry that
45 * KLU attempts to choose is the diagonal of A (Q,Q). In other words, the
46 * input permutation is applied symmetrically to the input matrix. The output
47 * permutation P includes both the partial pivoting ordering and the input
48 * permutation. If Q is NULL, then it is assumed to be the identity
49 * permutation. Q is not modified.
50 *
51 * [1] Gilbert, J. R. and Peierls, T., "Sparse Partial Pivoting in Time
52 * Proportional to Arithmetic Operations," SIAM J. Sci. Stat. Comp.,
53 * vol 9, pp. 862-874, 1988.
54 * [2] Eisenstat, S. C. and Liu, J. W. H., "Exploiting Structural Symmetry in
55 * Unsymmetric Sparse Symbolic Factorization," SIAM J. Matrix Analysis &
56 * Applic., vol 13, pp. 202-211, 1992.
57 */
58
59 /* ========================================================================== */
60
61 #include "klu_internal.h"
62
KLU_kernel_factor(Int n,Int Ap[],Int Ai[],Entry Ax[],Int Q[],double Lsize,Unit ** p_LU,Entry Udiag[],Int Llen[],Int Ulen[],Int Lip[],Int Uip[],Int P[],Int * lnz,Int * unz,Entry * X,Int * Work,Int k1,Int PSinv[],double Rs[],Int Offp[],Int Offi[],Entry Offx[],KLU_common * Common)63 size_t KLU_kernel_factor /* 0 if failure, size of LU if OK */
64 (
65 /* inputs, not modified */
66 Int n, /* A is n-by-n. n must be > 0. */
67 Int Ap [ ], /* size n+1, column pointers for A */
68 Int Ai [ ], /* size nz = Ap [n], row indices for A */
69 Entry Ax [ ], /* size nz, values of A */
70 Int Q [ ], /* size n, optional column permutation */
71 double Lsize, /* estimate of number of nonzeros in L */
72
73 /* outputs, not defined on input */
74 Unit **p_LU, /* row indices and values of L and U */
75 Entry Udiag [ ], /* size n, diagonal of U */
76 Int Llen [ ], /* size n, column length of L */
77 Int Ulen [ ], /* size n, column length of U */
78 Int Lip [ ], /* size n, column pointers for L */
79 Int Uip [ ], /* size n, column pointers for U */
80 Int P [ ], /* row permutation, size n */
81 Int *lnz, /* size of L */
82 Int *unz, /* size of U */
83
84 /* workspace, undefined on input */
85 Entry *X, /* size n double's, zero on output */
86 Int *Work, /* size 5n Int's */
87
88 /* inputs, not modified on output */
89 Int k1, /* the block of A is from k1 to k2-1 */
90 Int PSinv [ ], /* inverse of P from symbolic factorization */
91 double Rs [ ], /* scale factors for A */
92
93 /* inputs, modified on output */
94 Int Offp [ ], /* off-diagonal matrix (modified by this routine) */
95 Int Offi [ ],
96 Entry Offx [ ],
97 /* --------------- */
98 KLU_common *Common
99 )
100 {
101 double maxlnz, dunits ;
102 Unit *LU ;
103 Int *Pinv, *Lpend, *Stack, *Flag, *Ap_pos, *W ;
104 Int lsize, usize, anz, ok ;
105 size_t lusize ;
106 ASSERT (Common != NULL) ;
107
108 /* ---------------------------------------------------------------------- */
109 /* get control parameters, or use defaults */
110 /* ---------------------------------------------------------------------- */
111
112 n = MAX (1, n) ;
113 anz = Ap [n+k1] - Ap [k1] ;
114
115 if (Lsize <= 0)
116 {
117 Lsize = -Lsize ;
118 Lsize = MAX (Lsize, 1.0) ;
119 lsize = Lsize * anz + n ;
120 }
121 else
122 {
123 lsize = Lsize ;
124 }
125
126 usize = lsize ;
127
128 lsize = MAX (n+1, lsize) ;
129 usize = MAX (n+1, usize) ;
130
131 maxlnz = (((double) n) * ((double) n) + ((double) n)) / 2. ;
132 maxlnz = MIN (maxlnz, ((double) Int_MAX)) ;
133 lsize = MIN (maxlnz, lsize) ;
134 usize = MIN (maxlnz, usize) ;
135
136 PRINTF (("Welcome to klu: n %d anz %d k1 %d lsize %d usize %d maxlnz %g\n",
137 n, anz, k1, lsize, usize, maxlnz)) ;
138
139 /* ---------------------------------------------------------------------- */
140 /* allocate workspace and outputs */
141 /* ---------------------------------------------------------------------- */
142
143 /* return arguments are not yet assigned */
144 *p_LU = (Unit *) NULL ;
145
146 /* these computations are safe from size_t overflow */
147 W = Work ;
148 Pinv = (Int *) W ; W += n ;
149 Stack = (Int *) W ; W += n ;
150 Flag = (Int *) W ; W += n ;
151 Lpend = (Int *) W ; W += n ;
152 Ap_pos = (Int *) W ; W += n ;
153
154 dunits = DUNITS (Int, lsize) + DUNITS (Entry, lsize) +
155 DUNITS (Int, usize) + DUNITS (Entry, usize) ;
156 lusize = (size_t) dunits ;
157 ok = !INT_OVERFLOW (dunits) ;
158 LU = ok ? KLU_malloc (lusize, sizeof (Unit), Common) : NULL ;
159 if (LU == NULL)
160 {
161 /* out of memory, or problem too large */
162 Common->status = KLU_OUT_OF_MEMORY ;
163 lusize = 0 ;
164 return (lusize) ;
165 }
166
167 /* ---------------------------------------------------------------------- */
168 /* factorize */
169 /* ---------------------------------------------------------------------- */
170
171 /* with pruning, and non-recursive depth-first-search */
172 lusize = KLU_kernel (n, Ap, Ai, Ax, Q, lusize,
173 Pinv, P, &LU, Udiag, Llen, Ulen, Lip, Uip, lnz, unz,
174 X, Stack, Flag, Ap_pos, Lpend,
175 k1, PSinv, Rs, Offp, Offi, Offx, Common) ;
176
177 /* ---------------------------------------------------------------------- */
178 /* return LU factors, or return nothing if an error occurred */
179 /* ---------------------------------------------------------------------- */
180
181 if (Common->status < KLU_OK)
182 {
183 LU = KLU_free (LU, lusize, sizeof (Unit), Common) ;
184 lusize = 0 ;
185 }
186 *p_LU = LU ;
187 PRINTF ((" in klu noffdiag %d\n", Common->noffdiag)) ;
188 return (lusize) ;
189 }
190
191
192 /* ========================================================================== */
193 /* === KLU_lsolve =========================================================== */
194 /* ========================================================================== */
195
196 /* Solve Lx=b. Assumes L is unit lower triangular and where the unit diagonal
197 * entry is NOT stored. Overwrites B with the solution X. B is n-by-nrhs
198 * and is stored in ROW form with row dimension nrhs. nrhs must be in the
199 * range 1 to 4. */
200
KLU_lsolve(Int n,Int Lip[],Int Llen[],Unit LU[],Int nrhs,Entry X[])201 void KLU_lsolve
202 (
203 /* inputs, not modified: */
204 Int n,
205 Int Lip [ ],
206 Int Llen [ ],
207 Unit LU [ ],
208 Int nrhs,
209 /* right-hand-side on input, solution to Lx=b on output */
210 Entry X [ ]
211 )
212 {
213 Entry x [4], lik ;
214 Int *Li ;
215 Entry *Lx ;
216 Int k, p, len, i ;
217
218 switch (nrhs)
219 {
220
221 case 1:
222 for (k = 0 ; k < n ; k++)
223 {
224 x [0] = X [k] ;
225 GET_POINTER (LU, Lip, Llen, Li, Lx, k, len) ;
226 /* unit diagonal of L is not stored*/
227 for (p = 0 ; p < len ; p++)
228 {
229 /* X [Li [p]] -= Lx [p] * x [0] ; */
230 MULT_SUB (X [Li [p]], Lx [p], x [0]) ;
231 }
232 }
233 break ;
234
235 case 2:
236
237 for (k = 0 ; k < n ; k++)
238 {
239 x [0] = X [2*k ] ;
240 x [1] = X [2*k + 1] ;
241 GET_POINTER (LU, Lip, Llen, Li, Lx, k, len) ;
242 for (p = 0 ; p < len ; p++)
243 {
244 i = Li [p] ;
245 lik = Lx [p] ;
246 MULT_SUB (X [2*i], lik, x [0]) ;
247 MULT_SUB (X [2*i + 1], lik, x [1]) ;
248 }
249 }
250 break ;
251
252 case 3:
253
254 for (k = 0 ; k < n ; k++)
255 {
256 x [0] = X [3*k ] ;
257 x [1] = X [3*k + 1] ;
258 x [2] = X [3*k + 2] ;
259 GET_POINTER (LU, Lip, Llen, Li, Lx, k, len) ;
260 for (p = 0 ; p < len ; p++)
261 {
262 i = Li [p] ;
263 lik = Lx [p] ;
264 MULT_SUB (X [3*i], lik, x [0]) ;
265 MULT_SUB (X [3*i + 1], lik, x [1]) ;
266 MULT_SUB (X [3*i + 2], lik, x [2]) ;
267 }
268 }
269 break ;
270
271 case 4:
272
273 for (k = 0 ; k < n ; k++)
274 {
275 x [0] = X [4*k ] ;
276 x [1] = X [4*k + 1] ;
277 x [2] = X [4*k + 2] ;
278 x [3] = X [4*k + 3] ;
279 GET_POINTER (LU, Lip, Llen, Li, Lx, k, len) ;
280 for (p = 0 ; p < len ; p++)
281 {
282 i = Li [p] ;
283 lik = Lx [p] ;
284 MULT_SUB (X [4*i], lik, x [0]) ;
285 MULT_SUB (X [4*i + 1], lik, x [1]) ;
286 MULT_SUB (X [4*i + 2], lik, x [2]) ;
287 MULT_SUB (X [4*i + 3], lik, x [3]) ;
288 }
289 }
290 break ;
291
292 }
293 }
294
295 /* ========================================================================== */
296 /* === KLU_usolve =========================================================== */
297 /* ========================================================================== */
298
299 /* Solve Ux=b. Assumes U is non-unit upper triangular and where the diagonal
300 * entry is NOT stored. Overwrites B with the solution X. B is n-by-nrhs
301 * and is stored in ROW form with row dimension nrhs. nrhs must be in the
302 * range 1 to 4. */
303
KLU_usolve(Int n,Int Uip[],Int Ulen[],Unit LU[],Entry Udiag[],Int nrhs,Entry X[])304 void KLU_usolve
305 (
306 /* inputs, not modified: */
307 Int n,
308 Int Uip [ ],
309 Int Ulen [ ],
310 Unit LU [ ],
311 Entry Udiag [ ],
312 Int nrhs,
313 /* right-hand-side on input, solution to Ux=b on output */
314 Entry X [ ]
315 )
316 {
317 Entry x [4], uik, ukk ;
318 Int *Ui ;
319 Entry *Ux ;
320 Int k, p, len, i ;
321
322 switch (nrhs)
323 {
324
325 case 1:
326
327 for (k = n-1 ; k >= 0 ; k--)
328 {
329 GET_POINTER (LU, Uip, Ulen, Ui, Ux, k, len) ;
330 /* x [0] = X [k] / Udiag [k] ; */
331 DIV (x [0], X [k], Udiag [k]) ;
332 X [k] = x [0] ;
333 for (p = 0 ; p < len ; p++)
334 {
335 /* X [Ui [p]] -= Ux [p] * x [0] ; */
336 MULT_SUB (X [Ui [p]], Ux [p], x [0]) ;
337
338 }
339 }
340
341 break ;
342
343 case 2:
344
345 for (k = n-1 ; k >= 0 ; k--)
346 {
347 GET_POINTER (LU, Uip, Ulen, Ui, Ux, k, len) ;
348 ukk = Udiag [k] ;
349 /* x [0] = X [2*k ] / ukk ;
350 x [1] = X [2*k + 1] / ukk ; */
351 DIV (x [0], X [2*k], ukk) ;
352 DIV (x [1], X [2*k + 1], ukk) ;
353
354 X [2*k ] = x [0] ;
355 X [2*k + 1] = x [1] ;
356 for (p = 0 ; p < len ; p++)
357 {
358 i = Ui [p] ;
359 uik = Ux [p] ;
360 /* X [2*i ] -= uik * x [0] ;
361 X [2*i + 1] -= uik * x [1] ; */
362 MULT_SUB (X [2*i], uik, x [0]) ;
363 MULT_SUB (X [2*i + 1], uik, x [1]) ;
364 }
365 }
366
367 break ;
368
369 case 3:
370
371 for (k = n-1 ; k >= 0 ; k--)
372 {
373 GET_POINTER (LU, Uip, Ulen, Ui, Ux, k, len) ;
374 ukk = Udiag [k] ;
375
376 DIV (x [0], X [3*k], ukk) ;
377 DIV (x [1], X [3*k + 1], ukk) ;
378 DIV (x [2], X [3*k + 2], ukk) ;
379
380 X [3*k ] = x [0] ;
381 X [3*k + 1] = x [1] ;
382 X [3*k + 2] = x [2] ;
383 for (p = 0 ; p < len ; p++)
384 {
385 i = Ui [p] ;
386 uik = Ux [p] ;
387 MULT_SUB (X [3*i], uik, x [0]) ;
388 MULT_SUB (X [3*i + 1], uik, x [1]) ;
389 MULT_SUB (X [3*i + 2], uik, x [2]) ;
390 }
391 }
392
393 break ;
394
395 case 4:
396
397 for (k = n-1 ; k >= 0 ; k--)
398 {
399 GET_POINTER (LU, Uip, Ulen, Ui, Ux, k, len) ;
400 ukk = Udiag [k] ;
401
402 DIV (x [0], X [4*k], ukk) ;
403 DIV (x [1], X [4*k + 1], ukk) ;
404 DIV (x [2], X [4*k + 2], ukk) ;
405 DIV (x [3], X [4*k + 3], ukk) ;
406
407 X [4*k ] = x [0] ;
408 X [4*k + 1] = x [1] ;
409 X [4*k + 2] = x [2] ;
410 X [4*k + 3] = x [3] ;
411 for (p = 0 ; p < len ; p++)
412 {
413 i = Ui [p] ;
414 uik = Ux [p] ;
415
416 MULT_SUB (X [4*i], uik, x [0]) ;
417 MULT_SUB (X [4*i + 1], uik, x [1]) ;
418 MULT_SUB (X [4*i + 2], uik, x [2]) ;
419 MULT_SUB (X [4*i + 3], uik, x [3]) ;
420 }
421 }
422
423 break ;
424
425 }
426 }
427
428
429 /* ========================================================================== */
430 /* === KLU_ltsolve ========================================================== */
431 /* ========================================================================== */
432
433 /* Solve L'x=b. Assumes L is unit lower triangular and where the unit diagonal
434 * entry is NOT stored. Overwrites B with the solution X. B is n-by-nrhs
435 * and is stored in ROW form with row dimension nrhs. nrhs must in the
436 * range 1 to 4. */
437
KLU_ltsolve(Int n,Int Lip[],Int Llen[],Unit LU[],Int nrhs,Int conj_solve,Entry X[])438 void KLU_ltsolve
439 (
440 /* inputs, not modified: */
441 Int n,
442 Int Lip [ ],
443 Int Llen [ ],
444 Unit LU [ ],
445 Int nrhs,
446 #ifdef COMPLEX
447 Int conj_solve,
448 #endif
449 /* right-hand-side on input, solution to L'x=b on output */
450 Entry X [ ]
451 )
452 {
453 Entry x [4], lik ;
454 Int *Li ;
455 Entry *Lx ;
456 Int k, p, len, i ;
457
458 switch (nrhs)
459 {
460
461 case 1:
462
463 for (k = n-1 ; k >= 0 ; k--)
464 {
465 GET_POINTER (LU, Lip, Llen, Li, Lx, k, len) ;
466 x [0] = X [k] ;
467 for (p = 0 ; p < len ; p++)
468 {
469 #ifdef COMPLEX
470 if (conj_solve)
471 {
472 /* x [0] -= CONJ (Lx [p]) * X [Li [p]] ; */
473 MULT_SUB_CONJ (x [0], X [Li [p]], Lx [p]) ;
474 }
475 else
476 #endif
477 {
478 /*x [0] -= Lx [p] * X [Li [p]] ;*/
479 MULT_SUB (x [0], Lx [p], X [Li [p]]) ;
480 }
481 }
482 X [k] = x [0] ;
483 }
484 break ;
485
486 case 2:
487
488 for (k = n-1 ; k >= 0 ; k--)
489 {
490 x [0] = X [2*k ] ;
491 x [1] = X [2*k + 1] ;
492 GET_POINTER (LU, Lip, Llen, Li, Lx, k, len) ;
493 for (p = 0 ; p < len ; p++)
494 {
495 i = Li [p] ;
496 #ifdef COMPLEX
497 if (conj_solve)
498 {
499 CONJ (lik, Lx [p]) ;
500 }
501 else
502 #endif
503 {
504 lik = Lx [p] ;
505 }
506 MULT_SUB (x [0], lik, X [2*i]) ;
507 MULT_SUB (x [1], lik, X [2*i + 1]) ;
508 }
509 X [2*k ] = x [0] ;
510 X [2*k + 1] = x [1] ;
511 }
512 break ;
513
514 case 3:
515
516 for (k = n-1 ; k >= 0 ; k--)
517 {
518 x [0] = X [3*k ] ;
519 x [1] = X [3*k + 1] ;
520 x [2] = X [3*k + 2] ;
521 GET_POINTER (LU, Lip, Llen, Li, Lx, k, len) ;
522 for (p = 0 ; p < len ; p++)
523 {
524 i = Li [p] ;
525 #ifdef COMPLEX
526 if (conj_solve)
527 {
528 CONJ (lik, Lx [p]) ;
529 }
530 else
531 #endif
532 {
533 lik = Lx [p] ;
534 }
535 MULT_SUB (x [0], lik, X [3*i]) ;
536 MULT_SUB (x [1], lik, X [3*i + 1]) ;
537 MULT_SUB (x [2], lik, X [3*i + 2]) ;
538 }
539 X [3*k ] = x [0] ;
540 X [3*k + 1] = x [1] ;
541 X [3*k + 2] = x [2] ;
542 }
543 break ;
544
545 case 4:
546
547 for (k = n-1 ; k >= 0 ; k--)
548 {
549 x [0] = X [4*k ] ;
550 x [1] = X [4*k + 1] ;
551 x [2] = X [4*k + 2] ;
552 x [3] = X [4*k + 3] ;
553 GET_POINTER (LU, Lip, Llen, Li, Lx, k, len) ;
554 for (p = 0 ; p < len ; p++)
555 {
556 i = Li [p] ;
557 #ifdef COMPLEX
558 if (conj_solve)
559 {
560 CONJ (lik, Lx [p]) ;
561 }
562 else
563 #endif
564 {
565 lik = Lx [p] ;
566 }
567 MULT_SUB (x [0], lik, X [4*i]) ;
568 MULT_SUB (x [1], lik, X [4*i + 1]) ;
569 MULT_SUB (x [2], lik, X [4*i + 2]) ;
570 MULT_SUB (x [3], lik, X [4*i + 3]) ;
571 }
572 X [4*k ] = x [0] ;
573 X [4*k + 1] = x [1] ;
574 X [4*k + 2] = x [2] ;
575 X [4*k + 3] = x [3] ;
576 }
577 break ;
578 }
579 }
580
581
582 /* ========================================================================== */
583 /* === KLU_utsolve ========================================================== */
584 /* ========================================================================== */
585
586 /* Solve U'x=b. Assumes U is non-unit upper triangular and where the diagonal
587 * entry is stored (and appears last in each column of U). Overwrites B
588 * with the solution X. B is n-by-nrhs and is stored in ROW form with row
589 * dimension nrhs. nrhs must be in the range 1 to 4. */
590
KLU_utsolve(Int n,Int Uip[],Int Ulen[],Unit LU[],Entry Udiag[],Int nrhs,Int conj_solve,Entry X[])591 void KLU_utsolve
592 (
593 /* inputs, not modified: */
594 Int n,
595 Int Uip [ ],
596 Int Ulen [ ],
597 Unit LU [ ],
598 Entry Udiag [ ],
599 Int nrhs,
600 #ifdef COMPLEX
601 Int conj_solve,
602 #endif
603 /* right-hand-side on input, solution to Ux=b on output */
604 Entry X [ ]
605 )
606 {
607 Entry x [4], uik, ukk ;
608 Int k, p, len, i ;
609 Int *Ui ;
610 Entry *Ux ;
611
612 switch (nrhs)
613 {
614
615 case 1:
616
617 for (k = 0 ; k < n ; k++)
618 {
619 GET_POINTER (LU, Uip, Ulen, Ui, Ux, k, len) ;
620 x [0] = X [k] ;
621 for (p = 0 ; p < len ; p++)
622 {
623 #ifdef COMPLEX
624 if (conj_solve)
625 {
626 /* x [0] -= CONJ (Ux [p]) * X [Ui [p]] ; */
627 MULT_SUB_CONJ (x [0], X [Ui [p]], Ux [p]) ;
628 }
629 else
630 #endif
631 {
632 /* x [0] -= Ux [p] * X [Ui [p]] ; */
633 MULT_SUB (x [0], Ux [p], X [Ui [p]]) ;
634 }
635 }
636 #ifdef COMPLEX
637 if (conj_solve)
638 {
639 CONJ (ukk, Udiag [k]) ;
640 }
641 else
642 #endif
643 {
644 ukk = Udiag [k] ;
645 }
646 DIV (X [k], x [0], ukk) ;
647 }
648 break ;
649
650 case 2:
651
652 for (k = 0 ; k < n ; k++)
653 {
654 GET_POINTER (LU, Uip, Ulen, Ui, Ux, k, len) ;
655 x [0] = X [2*k ] ;
656 x [1] = X [2*k + 1] ;
657 for (p = 0 ; p < len ; p++)
658 {
659 i = Ui [p] ;
660 #ifdef COMPLEX
661 if (conj_solve)
662 {
663 CONJ (uik, Ux [p]) ;
664 }
665 else
666 #endif
667 {
668 uik = Ux [p] ;
669 }
670 MULT_SUB (x [0], uik, X [2*i]) ;
671 MULT_SUB (x [1], uik, X [2*i + 1]) ;
672 }
673 #ifdef COMPLEX
674 if (conj_solve)
675 {
676 CONJ (ukk, Udiag [k]) ;
677 }
678 else
679 #endif
680 {
681 ukk = Udiag [k] ;
682 }
683 DIV (X [2*k], x [0], ukk) ;
684 DIV (X [2*k + 1], x [1], ukk) ;
685 }
686 break ;
687
688 case 3:
689
690 for (k = 0 ; k < n ; k++)
691 {
692 GET_POINTER (LU, Uip, Ulen, Ui, Ux, k, len) ;
693 x [0] = X [3*k ] ;
694 x [1] = X [3*k + 1] ;
695 x [2] = X [3*k + 2] ;
696 for (p = 0 ; p < len ; p++)
697 {
698 i = Ui [p] ;
699 #ifdef COMPLEX
700 if (conj_solve)
701 {
702 CONJ (uik, Ux [p]) ;
703 }
704 else
705 #endif
706 {
707 uik = Ux [p] ;
708 }
709 MULT_SUB (x [0], uik, X [3*i]) ;
710 MULT_SUB (x [1], uik, X [3*i + 1]) ;
711 MULT_SUB (x [2], uik, X [3*i + 2]) ;
712 }
713 #ifdef COMPLEX
714 if (conj_solve)
715 {
716 CONJ (ukk, Udiag [k]) ;
717 }
718 else
719 #endif
720 {
721 ukk = Udiag [k] ;
722 }
723 DIV (X [3*k], x [0], ukk) ;
724 DIV (X [3*k + 1], x [1], ukk) ;
725 DIV (X [3*k + 2], x [2], ukk) ;
726 }
727 break ;
728
729 case 4:
730
731 for (k = 0 ; k < n ; k++)
732 {
733 GET_POINTER (LU, Uip, Ulen, Ui, Ux, k, len) ;
734 x [0] = X [4*k ] ;
735 x [1] = X [4*k + 1] ;
736 x [2] = X [4*k + 2] ;
737 x [3] = X [4*k + 3] ;
738 for (p = 0 ; p < len ; p++)
739 {
740 i = Ui [p] ;
741 #ifdef COMPLEX
742 if (conj_solve)
743 {
744 CONJ (uik, Ux [p]) ;
745 }
746 else
747 #endif
748 {
749 uik = Ux [p] ;
750 }
751 MULT_SUB (x [0], uik, X [4*i]) ;
752 MULT_SUB (x [1], uik, X [4*i + 1]) ;
753 MULT_SUB (x [2], uik, X [4*i + 2]) ;
754 MULT_SUB (x [3], uik, X [4*i + 3]) ;
755 }
756 #ifdef COMPLEX
757 if (conj_solve)
758 {
759 CONJ (ukk, Udiag [k]) ;
760 }
761 else
762 #endif
763 {
764 ukk = Udiag [k] ;
765 }
766 DIV (X [4*k], x [0], ukk) ;
767 DIV (X [4*k + 1], x [1], ukk) ;
768 DIV (X [4*k + 2], x [2], ukk) ;
769 DIV (X [4*k + 3], x [3], ukk) ;
770 }
771 break ;
772 }
773 }
774