1 /* ========================================================================== */
2 /* === Modify/t_cholmod_updown_numkr ======================================== */
3 /* ========================================================================== */
4
5 /* -----------------------------------------------------------------------------
6 * CHOLMOD/Modify Module. Copyright (C) 2005-2006,
7 * Timothy A. Davis and William W. Hager.
8 * http://www.suitesparse.com
9 * -------------------------------------------------------------------------- */
10
11 /* Supernodal numerical update/downdate of rank K = RANK, along a single path.
12 * This routine operates on a simplicial factor, but operates on adjacent
13 * columns of L that would fit within a single supernode. "Adjacent" means
14 * along a single path in the elimination tree; they may or may not be
15 * adjacent in the matrix L.
16 *
17 * external defines: NUMERIC, WDIM, RANK.
18 *
19 * WDIM is 1, 2, 4, or 8. RANK can be 1 to WDIM.
20 *
21 * A simple method is included (#define SIMPLE). The code works, but is slow.
22 * It is meant only to illustrate what this routine is doing.
23 *
24 * A rank-K update proceeds along a single path, using single-column, dual-
25 * column, or quad-column updates of L. If a column j and the next column
26 * in the path (its parent) do not have the same nonzero pattern, a single-
27 * column update is used. If they do, but the 3rd and 4th column from j do
28 * not have the same pattern, a dual-column update is used, in which the two
29 * columns are treated as if they were a single supernode of two columns. If
30 * there are 4 columns in the path that all have the same nonzero pattern, then
31 * a quad-column update is used. All three kinds of updates can be used along
32 * a single path, in a single call to this function.
33 *
34 * Single-column update:
35 *
36 * When updating a single column of L, each iteration of the for loop,
37 * below, processes four rows of W (all columns involved) and one column
38 * of L. Suppose we have a rank-5 update, and columns 2 through 6 of W
39 * are involved. In this case, W in this routine is a pointer to column
40 * 2 of the matrix W in the caller. W (in the caller, shown as 'W') is
41 * held in row-major order, and is 8-by-n (a dense matrix storage format),
42 * but shown below in column form to match the column of L. Suppose there
43 * are 13 nonzero entries in column 27 of L, with row indices 27 (the
44 * diagonal, D), 28, 30, 31, 42, 43, 44, 50, 51, 67, 81, 83, and 84. This
45 * pattern is held in Li [Lp [27] ... Lp [27 + Lnz [27] - 1], where
46 * Lnz [27] = 13. The modification of the current column j of L is done
47 * in the following order. A dot (.) means the entry of W is not accessed.
48 *
49 * W0 points to row 27 of W, and G is a 1-by-8 temporary vector.
50 *
51 * G[0] G[4]
52 * G x x x x x . . .
53 *
54 * W0
55 * |
56 * v
57 * 27 . . x x x x x . W0 points to W (27,2)
58 *
59 *
60 * row 'W' W column j = 27
61 * | | | of L
62 * v v v |
63 * first iteration of for loop: v
64 *
65 * 28 . . 1 5 9 13 17 . x
66 * 30 . . 2 6 10 14 18 . x
67 * 31 . . 3 7 11 15 19 . x
68 * 42 . . 4 8 12 16 20 . x
69 *
70 * second iteration of for loop:
71 *
72 * 43 . . 1 5 9 13 17 . x
73 * 44 . . 2 6 10 14 18 . x
74 * 50 . . 3 7 11 15 19 . x
75 * 51 . . 4 8 12 16 20 . x
76 *
77 * third iteration of for loop:
78 *
79 * 67 . . 1 5 9 13 17 . x
80 * 81 . . 2 6 10 14 18 . x
81 * 83 . . 3 7 11 15 19 . x
82 * 84 . . 4 8 12 16 20 . x
83 *
84 * If the number of offdiagonal nonzeros in column j of L is not divisible
85 * by 4, then the switch-statement does the work for the first nz % 4 rows.
86 *
87 * Dual-column update:
88 *
89 * In this case, two columns of L that are adjacent in the path are being
90 * updated, by 1 to 8 columns of W. Suppose columns j=27 and j=28 are
91 * adjacent columns in the path (they need not be j and j+1). Two rows
92 * of G and W are used as coefficients during the update: (G0, G1) and
93 * (W0, W1).
94 *
95 * G0 x x x x x . . .
96 * G1 x x x x x . . .
97 *
98 * 27 . . x x x x x . W0 points to W (27,2)
99 * 28 . . x x x x x . W1 points to W (28,2)
100 *
101 *
102 * row 'W' W0,W1 column j = 27
103 * | | | of L
104 * v v v |
105 * | |-- column j = 28 of L
106 * v v
107 * update L (j1,j):
108 *
109 * 28 . . 1 2 3 4 5 . x - ("-" is not stored in L)
110 *
111 * cleanup iteration since length is odd:
112 *
113 * 30 . . 1 2 3 4 5 . x x
114 *
115 * then each iteration does two rows of both columns of L:
116 *
117 * 31 . . 1 3 5 7 9 . x x
118 * 42 . . 2 4 6 8 10 . x x
119 *
120 * 43 . . 1 3 5 7 9 . x x
121 * 44 . . 2 4 6 8 10 . x x
122 *
123 * 50 . . 1 3 5 7 9 . x x
124 * 51 . . 2 4 6 8 10 . x x
125 *
126 * 67 . . 1 3 5 7 9 . x x
127 * 81 . . 2 4 6 8 10 . x x
128 *
129 * 83 . . 1 3 5 7 9 . x x
130 * 84 . . 2 4 6 8 10 . x x
131 *
132 * If the number of offdiagonal nonzeros in column j of L is not even,
133 * then the cleanup iteration does the work for the first row.
134 *
135 * Quad-column update:
136 *
137 * In this case, four columns of L that are adjacent in the path are being
138 * updated, by 1 to 8 columns of W. Suppose columns j=27, 28, 30, and 31
139 * are adjacent columns in the path (they need not be j, j+1, ...). Four
140 * rows of G and W are used as coefficients during the update: (G0 through
141 * G3) and (W0 through W3). j=27, j1=28, j2=30, and j3=31.
142 *
143 * G0 x x x x x . . .
144 * G1 x x x x x . . .
145 * G3 x x x x x . . .
146 * G4 x x x x x . . .
147 *
148 * 27 . . x x x x x . W0 points to W (27,2)
149 * 28 . . x x x x x . W1 points to W (28,2)
150 * 30 . . x x x x x . W2 points to W (30,2)
151 * 31 . . x x x x x . W3 points to W (31,2)
152 *
153 *
154 * row 'W' W0,W1,.. column j = 27
155 * | | | of L
156 * v v v |
157 * | |-- column j = 28 of L
158 * | | |-- column j = 30 of L
159 * | | | |-- column j = 31 of L
160 * v v v v
161 * update L (j1,j):
162 * 28 . . 1 2 3 4 5 . x - - -
163 *
164 * update L (j2,j):
165 * 30 . . 1 2 3 4 5 . # x - - (# denotes modified)
166 *
167 * update L (j2,j1)
168 * 30 . . 1 2 3 4 5 . x # - -
169 *
170 * update L (j3,j)
171 * 31 . . 1 2 3 4 5 . # x x -
172 *
173 * update L (j3,j1)
174 * 31 . . 1 2 3 4 5 . x # x -
175 *
176 * update L (j3,j2)
177 * 31 . . 1 2 3 4 5 . x x # -
178 *
179 * cleanup iteration since length is odd:
180 * 42 . . 1 2 3 4 5 . x x x x
181 *
182 *
183 * ----- CHOLMOD v1.1.1 did the following --------------------------------------
184 * then each iteration does two rows of all four colummns of L:
185 *
186 * 43 . . 1 3 5 7 9 . x x x x
187 * 44 . . 2 4 6 8 10 . x x x x
188 *
189 * 50 . . 1 3 5 7 9 . x x x x
190 * 51 . . 2 4 6 8 10 . x x x x
191 *
192 * 67 . . 1 3 5 7 9 . x x x x
193 * 81 . . 2 4 6 8 10 . x x x x
194 *
195 * 83 . . 1 3 5 7 9 . x x x x
196 * 84 . . 2 4 6 8 10 . x x x x
197 *
198 * ----- CHOLMOD v1.2.0 does the following -------------------------------------
199 * then each iteration does one rows of all four colummns of L:
200 *
201 * 43 . . 1 2 3 4 5 . x x x x
202 * 44 . . 1 2 3 4 5 . x x x x
203 * 50 . . 1 3 5 4 5 . x x x x
204 * 51 . . 1 2 3 4 5 . x x x x
205 * 67 . . 1 3 5 4 5 . x x x x
206 * 81 . . 1 2 3 4 5 . x x x x
207 * 83 . . 1 3 5 4 5 . x x x x
208 * 84 . . 1 2 3 4 5 . x x x x
209 *
210 * This file is included in t_cholmod_updown.c, only.
211 * It is not compiled separately. It contains no user-callable routines.
212 *
213 * workspace: Xwork (WDIM*nrow)
214 */
215
216 /* ========================================================================== */
217 /* === loop unrolling macros ================================================ */
218 /* ========================================================================== */
219
220 #undef RANK1
221 #undef RANK2
222 #undef RANK3
223 #undef RANK4
224 #undef RANK5
225 #undef RANK6
226 #undef RANK7
227 #undef RANK8
228
229 #define RANK1(statement) statement
230
231 #if RANK < 2
232 #define RANK2(statement)
233 #else
234 #define RANK2(statement) statement
235 #endif
236
237 #if RANK < 3
238 #define RANK3(statement)
239 #else
240 #define RANK3(statement) statement
241 #endif
242
243 #if RANK < 4
244 #define RANK4(statement)
245 #else
246 #define RANK4(statement) statement
247 #endif
248
249 #if RANK < 5
250 #define RANK5(statement)
251 #else
252 #define RANK5(statement) statement
253 #endif
254
255 #if RANK < 6
256 #define RANK6(statement)
257 #else
258 #define RANK6(statement) statement
259 #endif
260
261 #if RANK < 7
262 #define RANK7(statement)
263 #else
264 #define RANK7(statement) statement
265 #endif
266
267 #if RANK < 8
268 #define RANK8(statement)
269 #else
270 #define RANK8(statement) statement
271 #endif
272
273 #define FOR_ALL_K \
274 RANK1 (DO (0)) \
275 RANK2 (DO (1)) \
276 RANK3 (DO (2)) \
277 RANK4 (DO (3)) \
278 RANK5 (DO (4)) \
279 RANK6 (DO (5)) \
280 RANK7 (DO (6)) \
281 RANK8 (DO (7))
282
283 /* ========================================================================== */
284 /* === alpha/gamma ========================================================== */
285 /* ========================================================================== */
286
287 #undef ALPHA_GAMMA
288
289 #define ALPHA_GAMMA(Dj,Alpha,Gamma,W) \
290 { \
291 double dj = Dj ; \
292 if (update) \
293 { \
294 for (k = 0 ; k < RANK ; k++) \
295 { \
296 double w = W [k] ; \
297 double alpha = Alpha [k] ; \
298 double a = alpha + (w * w) / dj ; \
299 dj *= a ; \
300 Alpha [k] = a ; \
301 Gamma [k] = (- w / dj) ; \
302 dj /= alpha ; \
303 } \
304 } \
305 else \
306 { \
307 for (k = 0 ; k < RANK ; k++) \
308 { \
309 double w = W [k] ; \
310 double alpha = Alpha [k] ; \
311 double a = alpha - (w * w) / dj ; \
312 dj *= a ; \
313 Alpha [k] = a ; \
314 Gamma [k] = w / dj ; \
315 dj /= alpha ; \
316 } \
317 } \
318 Dj = ((use_dbound) ? (CHOLMOD(dbound) (dj, Common)) : (dj)) ; \
319 }
320
321 /* ========================================================================== */
322 /* === numeric update/downdate along one path =============================== */
323 /* ========================================================================== */
324
NUMERIC(WDIM,RANK)325 static void NUMERIC (WDIM, RANK)
326 (
327 int update, /* TRUE for update, FALSE for downdate */
328 Int j, /* first column in the path */
329 Int e, /* last column in the path */
330 double Alpha [ ], /* alpha, for each column of W */
331 double W [ ], /* W is an n-by-WDIM array, stored in row-major order */
332 cholmod_factor *L, /* with unit diagonal (diagonal not stored) */
333 cholmod_common *Common
334 )
335 {
336
337 #ifdef SIMPLE
338 #define w(row,col) W [WDIM*(row) + (col)]
339
340 /* ---------------------------------------------------------------------- */
341 /* concise but slow version for illustration only */
342 /* ---------------------------------------------------------------------- */
343
344 double Gamma [WDIM] ;
345 double *Lx ;
346 Int *Li, *Lp, *Lnz ;
347 Int p, k ;
348 Int use_dbound = IS_GT_ZERO (Common->dbound) ;
349
350 Li = L->i ;
351 Lx = L->x ;
352 Lp = L->p ;
353 Lnz = L->nz ;
354
355 /* walk up the etree from node j to its ancestor e */
356 for ( ; j <= e ; j = (Lnz [j] > 1) ? (Li [Lp [j] + 1]) : Int_max)
357 {
358 /* update the diagonal entry D (j,j) with each column of W */
359 ALPHA_GAMMA (Lx [Lp [j]], Alpha, Gamma, (&(w (j,0)))) ;
360 /* update column j of L */
361 for (p = Lp [j] + 1 ; p < Lp [j] + Lnz [j] ; p++)
362 {
363 /* update row Li [p] of column j of L with each column of W */
364 Int i = Li [p] ;
365 for (k = 0 ; k < RANK ; k++)
366 {
367 w (i,k) -= w (j,k) * Lx [p] ;
368 Lx [p] -= Gamma [k] * w (i,k) ;
369 }
370 }
371 /* clear workspace W */
372 for (k = 0 ; k < RANK ; k++)
373 {
374 w (j,k) = 0 ;
375 }
376 }
377
378 #else
379
380 /* ---------------------------------------------------------------------- */
381 /* dynamic supernodal version: supernodes detected dynamically */
382 /* ---------------------------------------------------------------------- */
383
384 double G0 [RANK], G1 [RANK], G2 [RANK], G3 [RANK] ;
385 double Z0 [RANK], Z1 [RANK], Z2 [RANK], Z3 [RANK] ;
386 double *W0, *W1, *W2, *W3, *Lx ;
387 Int *Li, *Lp, *Lnz ;
388 Int j1, j2, j3, p0, p1, p2, p3, parent, lnz, pend, k ;
389 Int use_dbound = IS_GT_ZERO (Common->dbound) ;
390
391 Li = L->i ;
392 Lx = L->x ;
393 Lp = L->p ;
394 Lnz = L->nz ;
395
396 /* walk up the etree from node j to its ancestor e */
397 for ( ; j <= e ; j = parent)
398 {
399
400 p0 = Lp [j] ; /* col j is Li,Lx [p0 ... p0+lnz-1] */
401 lnz = Lnz [j] ;
402
403 W0 = W + WDIM * j ; /* pointer to row j of W */
404 pend = p0 + lnz ;
405
406 /* for k = 0 to RANK-1 do: */
407 #define DO(k) Z0 [k] = W0 [k] ;
408 FOR_ALL_K
409 #undef DO
410
411 /* for k = 0 to RANK-1 do: */
412 #define DO(k) W0 [k] = 0 ;
413 FOR_ALL_K
414 #undef DO
415
416 /* update D (j,j) */
417 ALPHA_GAMMA (Lx [p0], Alpha, G0, Z0) ;
418 p0++ ;
419
420 /* determine how many columns of L to update at the same time */
421 parent = (lnz > 1) ? (Li [p0]) : Int_max ;
422 if (parent <= e && lnz == Lnz [parent] + 1)
423 {
424
425 /* -------------------------------------------------------------- */
426 /* node j and its parent j1 can be updated at the same time */
427 /* -------------------------------------------------------------- */
428
429 j1 = parent ;
430 j2 = (lnz > 2) ? (Li [p0+1]) : Int_max ;
431 j3 = (lnz > 3) ? (Li [p0+2]) : Int_max ;
432 W1 = W + WDIM * j1 ; /* pointer to row j1 of W */
433 p1 = Lp [j1] ;
434
435 /* for k = 0 to RANK-1 do: */
436 #define DO(k) Z1 [k] = W1 [k] ;
437 FOR_ALL_K
438 #undef DO
439
440 /* for k = 0 to RANK-1 do: */
441 #define DO(k) W1 [k] = 0 ;
442 FOR_ALL_K
443 #undef DO
444
445 /* update L (j1,j) */
446 {
447 double lx = Lx [p0] ;
448
449 /* for k = 0 to RANK-1 do: */
450 #define DO(k) \
451 Z1 [k] -= Z0 [k] * lx ; \
452 lx -= G0 [k] * Z1 [k] ;
453 FOR_ALL_K
454 #undef DO
455
456 Lx [p0++] = lx ;
457 }
458
459 /* update D (j1,j1) */
460 ALPHA_GAMMA (Lx [p1], Alpha, G1, Z1) ;
461 p1++ ;
462
463 /* -------------------------------------------------------------- */
464 /* update 2 or 4 columns of L */
465 /* -------------------------------------------------------------- */
466
467 if ((j2 <= e) && /* j2 in the current path */
468 (j3 <= e) && /* j3 in the current path */
469 (lnz == Lnz [j2] + 2) && /* column j2 matches */
470 (lnz == Lnz [j3] + 3)) /* column j3 matches */
471 {
472
473 /* ---------------------------------------------------------- */
474 /* update 4 columns of L */
475 /* ---------------------------------------------------------- */
476
477 /* p0 and p1 currently point to row j2 in cols j and j1 of L */
478
479 parent = (lnz > 4) ? (Li [p0+2]) : Int_max ;
480 W2 = W + WDIM * j2 ; /* pointer to row j2 of W */
481 W3 = W + WDIM * j3 ; /* pointer to row j3 of W */
482 p2 = Lp [j2] ;
483 p3 = Lp [j3] ;
484
485 /* for k = 0 to RANK-1 do: */
486 #define DO(k) Z2 [k] = W2 [k] ;
487 FOR_ALL_K
488 #undef DO
489
490 /* for k = 0 to RANK-1 do: */
491 #define DO(k) Z3 [k] = W3 [k] ;
492 FOR_ALL_K
493 #undef DO
494
495 /* for k = 0 to RANK-1 do: */
496 #define DO(k) W2 [k] = 0 ;
497 FOR_ALL_K
498 #undef DO
499
500 /* for k = 0 to RANK-1 do: */
501 #define DO(k) W3 [k] = 0 ;
502 FOR_ALL_K
503 #undef DO
504
505 /* update L (j2,j) and update L (j2,j1) */
506 {
507 double lx [2] ;
508 lx [0] = Lx [p0] ;
509 lx [1] = Lx [p1] ;
510
511 /* for k = 0 to RANK-1 do: */
512 #define DO(k) \
513 Z2 [k] -= Z0 [k] * lx [0] ; lx [0] -= G0 [k] * Z2 [k] ; \
514 Z2 [k] -= Z1 [k] * lx [1] ; lx [1] -= G1 [k] * Z2 [k] ;
515 FOR_ALL_K
516 #undef DO
517
518 Lx [p0++] = lx [0] ;
519 Lx [p1++] = lx [1] ;
520 }
521
522 /* update D (j2,j2) */
523 ALPHA_GAMMA (Lx [p2], Alpha, G2, Z2) ;
524 p2++ ;
525
526 /* update L (j3,j), L (j3,j1), and L (j3,j2) */
527 {
528 double lx [3] ;
529 lx [0] = Lx [p0] ;
530 lx [1] = Lx [p1] ;
531 lx [2] = Lx [p2] ;
532
533 /* for k = 0 to RANK-1 do: */
534 #define DO(k) \
535 Z3 [k] -= Z0 [k] * lx [0] ; lx [0] -= G0 [k] * Z3 [k] ; \
536 Z3 [k] -= Z1 [k] * lx [1] ; lx [1] -= G1 [k] * Z3 [k] ; \
537 Z3 [k] -= Z2 [k] * lx [2] ; lx [2] -= G2 [k] * Z3 [k] ;
538 FOR_ALL_K
539 #undef DO
540
541 Lx [p0++] = lx [0] ;
542 Lx [p1++] = lx [1] ;
543 Lx [p2++] = lx [2] ;
544 }
545
546 /* update D (j3,j3) */
547 ALPHA_GAMMA (Lx [p3], Alpha, G3, Z3) ;
548 p3++ ;
549
550 /* each iteration updates L (i, [j j1 j2 j3]) */
551 for ( ; p0 < pend ; p0++, p1++, p2++, p3++)
552 {
553 double lx [4], *w0 ;
554 lx [0] = Lx [p0] ;
555 lx [1] = Lx [p1] ;
556 lx [2] = Lx [p2] ;
557 lx [3] = Lx [p3] ;
558 w0 = W + WDIM * Li [p0] ;
559
560 /* for k = 0 to RANK-1 do: */
561 #define DO(k) \
562 w0 [k] -= Z0 [k] * lx [0] ; lx [0] -= G0 [k] * w0 [k] ; \
563 w0 [k] -= Z1 [k] * lx [1] ; lx [1] -= G1 [k] * w0 [k] ; \
564 w0 [k] -= Z2 [k] * lx [2] ; lx [2] -= G2 [k] * w0 [k] ; \
565 w0 [k] -= Z3 [k] * lx [3] ; lx [3] -= G3 [k] * w0 [k] ;
566 FOR_ALL_K
567 #undef DO
568
569 Lx [p0] = lx [0] ;
570 Lx [p1] = lx [1] ;
571 Lx [p2] = lx [2] ;
572 Lx [p3] = lx [3] ;
573 }
574 }
575 else
576 {
577
578 /* ---------------------------------------------------------- */
579 /* update 2 columns of L */
580 /* ---------------------------------------------------------- */
581
582 parent = j2 ;
583
584 /* cleanup iteration if length is odd */
585 if ((lnz - 2) % 2)
586 {
587 double lx [2] , *w0 ;
588 lx [0] = Lx [p0] ;
589 lx [1] = Lx [p1] ;
590 w0 = W + WDIM * Li [p0] ;
591
592 /* for k = 0 to RANK-1 do: */
593 #define DO(k) \
594 w0 [k] -= Z0 [k] * lx [0] ; lx [0] -= G0 [k] * w0 [k] ; \
595 w0 [k] -= Z1 [k] * lx [1] ; lx [1] -= G1 [k] * w0 [k] ;
596 FOR_ALL_K
597 #undef DO
598
599 Lx [p0++] = lx [0] ;
600 Lx [p1++] = lx [1] ;
601 }
602
603 for ( ; p0 < pend ; p0 += 2, p1 += 2)
604 {
605 double lx [2][2], w [2], *w0, *w1 ;
606 lx [0][0] = Lx [p0 ] ;
607 lx [1][0] = Lx [p0+1] ;
608 lx [0][1] = Lx [p1 ] ;
609 lx [1][1] = Lx [p1+1] ;
610 w0 = W + WDIM * Li [p0 ] ;
611 w1 = W + WDIM * Li [p0+1] ;
612
613 /* for k = 0 to RANK-1 do: */
614 #define DO(k) \
615 w [0] = w0 [k] - Z0 [k] * lx [0][0] ; \
616 w [1] = w1 [k] - Z0 [k] * lx [1][0] ; \
617 lx [0][0] -= G0 [k] * w [0] ; \
618 lx [1][0] -= G0 [k] * w [1] ; \
619 w0 [k] = w [0] -= Z1 [k] * lx [0][1] ; \
620 w1 [k] = w [1] -= Z1 [k] * lx [1][1] ; \
621 lx [0][1] -= G1 [k] * w [0] ; \
622 lx [1][1] -= G1 [k] * w [1] ;
623 FOR_ALL_K
624 #undef DO
625
626 Lx [p0 ] = lx [0][0] ;
627 Lx [p0+1] = lx [1][0] ;
628 Lx [p1 ] = lx [0][1] ;
629 Lx [p1+1] = lx [1][1] ;
630 }
631 }
632 }
633 else
634 {
635
636 /* -------------------------------------------------------------- */
637 /* update one column of L */
638 /* -------------------------------------------------------------- */
639
640 /* cleanup iteration if length is not a multiple of 4 */
641 switch ((lnz - 1) % 4)
642 {
643 case 1:
644 {
645 double lx , *w0 ;
646 lx = Lx [p0] ;
647 w0 = W + WDIM * Li [p0] ;
648
649 /* for k = 0 to RANK-1 do: */
650 #define DO(k) \
651 w0 [k] -= Z0 [k] * lx ; lx -= G0 [k] * w0 [k] ;
652 FOR_ALL_K
653 #undef DO
654
655 Lx [p0++] = lx ;
656 }
657 break ;
658
659 case 2:
660 {
661 double lx [2], *w0, *w1 ;
662 lx [0] = Lx [p0 ] ;
663 lx [1] = Lx [p0+1] ;
664 w0 = W + WDIM * Li [p0 ] ;
665 w1 = W + WDIM * Li [p0+1] ;
666
667 /* for k = 0 to RANK-1 do: */
668 #define DO(k) \
669 w0 [k] -= Z0 [k] * lx [0] ; \
670 w1 [k] -= Z0 [k] * lx [1] ; \
671 lx [0] -= G0 [k] * w0 [k] ; \
672 lx [1] -= G0 [k] * w1 [k] ;
673 FOR_ALL_K
674 #undef DO
675
676 Lx [p0++] = lx [0] ;
677 Lx [p0++] = lx [1] ;
678 }
679 break ;
680
681 case 3:
682 {
683 double lx [3], *w0, *w1, *w2 ;
684 lx [0] = Lx [p0 ] ;
685 lx [1] = Lx [p0+1] ;
686 lx [2] = Lx [p0+2] ;
687 w0 = W + WDIM * Li [p0 ] ;
688 w1 = W + WDIM * Li [p0+1] ;
689 w2 = W + WDIM * Li [p0+2] ;
690
691 /* for k = 0 to RANK-1 do: */
692 #define DO(k) \
693 w0 [k] -= Z0 [k] * lx [0] ; \
694 w1 [k] -= Z0 [k] * lx [1] ; \
695 w2 [k] -= Z0 [k] * lx [2] ; \
696 lx [0] -= G0 [k] * w0 [k] ; \
697 lx [1] -= G0 [k] * w1 [k] ; \
698 lx [2] -= G0 [k] * w2 [k] ;
699 FOR_ALL_K
700 #undef DO
701
702 Lx [p0++] = lx [0] ;
703 Lx [p0++] = lx [1] ;
704 Lx [p0++] = lx [2] ;
705 }
706 }
707
708 for ( ; p0 < pend ; p0 += 4)
709 {
710 double lx [4], *w0, *w1, *w2, *w3 ;
711 lx [0] = Lx [p0 ] ;
712 lx [1] = Lx [p0+1] ;
713 lx [2] = Lx [p0+2] ;
714 lx [3] = Lx [p0+3] ;
715 w0 = W + WDIM * Li [p0 ] ;
716 w1 = W + WDIM * Li [p0+1] ;
717 w2 = W + WDIM * Li [p0+2] ;
718 w3 = W + WDIM * Li [p0+3] ;
719
720 /* for k = 0 to RANK-1 do: */
721 #define DO(k) \
722 w0 [k] -= Z0 [k] * lx [0] ; \
723 w1 [k] -= Z0 [k] * lx [1] ; \
724 w2 [k] -= Z0 [k] * lx [2] ; \
725 w3 [k] -= Z0 [k] * lx [3] ; \
726 lx [0] -= G0 [k] * w0 [k] ; \
727 lx [1] -= G0 [k] * w1 [k] ; \
728 lx [2] -= G0 [k] * w2 [k] ; \
729 lx [3] -= G0 [k] * w3 [k] ;
730 FOR_ALL_K
731 #undef DO
732
733 Lx [p0 ] = lx [0] ;
734 Lx [p0+1] = lx [1] ;
735 Lx [p0+2] = lx [2] ;
736 Lx [p0+3] = lx [3] ;
737 }
738 }
739 }
740 #endif
741 }
742 /* prepare this file for another inclusion in t_cholmod_updown.c: */
743 #undef RANK
744