1 /* ========================================================================== */
2 /* === Modify/t_cholmod_updown_numkr ======================================== */
3 /* ========================================================================== */
4 
5 /* -----------------------------------------------------------------------------
6  * CHOLMOD/Modify Module.  Copyright (C) 2005-2006,
7  * Timothy A. Davis and William W. Hager.
8  * http://www.suitesparse.com
9  * -------------------------------------------------------------------------- */
10 
11 /* Supernodal numerical update/downdate of rank K = RANK, along a single path.
12  * This routine operates on a simplicial factor, but operates on adjacent
13  * columns of L that would fit within a single supernode.  "Adjacent" means
14  * along a single path in the elimination tree; they may or may not be
15  * adjacent in the matrix L.
16  *
17  * external defines:  NUMERIC, WDIM, RANK.
18  *
19  * WDIM is 1, 2, 4, or 8.  RANK can be 1 to WDIM.
20  *
21  * A simple method is included (#define SIMPLE).  The code works, but is slow.
22  * It is meant only to illustrate what this routine is doing.
23  *
24  * A rank-K update proceeds along a single path, using single-column, dual-
25  * column, or quad-column updates of L.  If a column j and the next column
26  * in the path (its parent) do not have the same nonzero pattern, a single-
27  * column update is used.  If they do, but the 3rd and 4th column from j do
28  * not have the same pattern, a dual-column update is used, in which the two
29  * columns are treated as if they were a single supernode of two columns.  If
30  * there are 4 columns in the path that all have the same nonzero pattern, then
31  * a quad-column update is used.  All three kinds of updates can be used along
32  * a single path, in a single call to this function.
33  *
34  * Single-column update:
35  *
36  *	When updating a single column of L, each iteration of the for loop,
37  *	below, processes four rows of W (all columns involved) and one column
38  *	of L.  Suppose we have a rank-5 update, and columns 2 through 6 of W
39  *	are involved.  In this case, W in this routine is a pointer to column
40  *	2 of the matrix W in the caller.  W (in the caller, shown as 'W') is
41  *	held in row-major order, and is 8-by-n (a dense matrix storage format),
42  *	but shown below in column form to match the column of L.  Suppose there
43  *	are 13 nonzero entries in column 27 of L, with row indices 27 (the
44  *	diagonal, D), 28, 30, 31, 42, 43, 44, 50, 51, 67, 81, 83, and 84.  This
45  *	pattern is held in Li [Lp [27] ... Lp [27 + Lnz [27] - 1], where
46  *	Lnz [27] = 13.   The modification of the current column j of L is done
47  *	in the following order.  A dot (.) means the entry of W is not accessed.
48  *
49  *	W0 points to row 27 of W, and G is a 1-by-8 temporary vector.
50  *
51  *		     G[0]        G[4]
52  *	G	      x  x  x  x  x  .  .  .
53  *
54  *		      W0
55  *		      |
56  *		      v
57  *	27      .  .  x  x  x  x  x  .   W0 points to W (27,2)
58  *
59  *
60  *	row    'W'    W                      column j = 27
61  *	|       |     |                      of L
62  *	v       v     v                      |
63  *	first iteration of for loop:         v
64  *
65  *	28      .  .  1  5  9 13 17  .       x
66  *	30      .  .  2  6 10 14 18  .       x
67  *	31      .  .  3  7 11 15 19  .       x
68  *	42      .  .  4  8 12 16 20  .       x
69  *
70  *	second iteration of for loop:
71  *
72  *	43      .  .  1  5  9 13 17  .       x
73  *	44      .  .  2  6 10 14 18  .       x
74  *	50      .  .  3  7 11 15 19  .       x
75  *	51      .  .  4  8 12 16 20  .       x
76  *
77  *	third iteration of for loop:
78  *
79  *	67      .  .  1  5  9 13 17  .       x
80  *	81      .  .  2  6 10 14 18  .       x
81  *	83      .  .  3  7 11 15 19  .       x
82  *	84      .  .  4  8 12 16 20  .       x
83  *
84  *	If the number of offdiagonal nonzeros in column j of L is not divisible
85  *	by 4, then the switch-statement does the work for the first nz % 4 rows.
86  *
87  * Dual-column update:
88  *
89  *	In this case, two columns of L that are adjacent in the path are being
90  *	updated, by 1 to 8 columns of W.  Suppose columns j=27 and j=28 are
91  *	adjacent columns in the path (they need not be j and j+1).  Two rows
92  *	of G and W are used as coefficients during the update: (G0, G1) and
93  *	(W0, W1).
94  *
95  *	G0	      x  x  x  x  x  .  .  .
96  *	G1	      x  x  x  x  x  .  .  .
97  *
98  *	27      .  .  x  x  x  x  x  .   W0 points to W (27,2)
99  *	28      .  .  x  x  x  x  x  .   W1 points to W (28,2)
100  *
101  *
102  *	row    'W'    W0,W1                  column j = 27
103  *	|       |     |                      of L
104  *	v       v     v                      |
105  *					     | |-- column j = 28 of L
106  *					     v v
107  *	update L (j1,j):
108  *
109  *	28      .  .  1  2  3  4  5  .       x -    ("-" is not stored in L)
110  *
111  *	cleanup iteration since length is odd:
112  *
113  *	30      .  .  1  2  3  4  5  .       x x
114  *
115  *	then each iteration does two rows of both columns of L:
116  *
117  *	31      .  .  1  3  5  7  9  .       x x
118  *	42      .  .  2  4  6  8 10  .       x x
119  *
120  *	43      .  .  1  3  5  7  9  .       x x
121  *	44      .  .  2  4  6  8 10  .       x x
122  *
123  *	50      .  .  1  3  5  7  9  .       x x
124  *	51      .  .  2  4  6  8 10  .       x x
125  *
126  *	67      .  .  1  3  5  7  9  .       x x
127  *	81      .  .  2  4  6  8 10  .       x x
128  *
129  *	83      .  .  1  3  5  7  9  .       x x
130  *	84      .  .  2  4  6  8 10  .       x x
131  *
132  *	If the number of offdiagonal nonzeros in column j of L is not even,
133  *	then the cleanup iteration does the work for the first row.
134  *
135  * Quad-column update:
136  *
137  *	In this case, four columns of L that are adjacent in the path are being
138  *	updated, by 1 to 8 columns of W.  Suppose columns j=27, 28, 30, and 31
139  *	are adjacent columns in the path (they need not be j, j+1, ...).  Four
140  *	rows of G and W are used as coefficients during the update: (G0 through
141  *	G3) and (W0 through W3).  j=27, j1=28, j2=30, and j3=31.
142  *
143  *	G0	      x  x  x  x  x  .  .  .
144  *	G1	      x  x  x  x  x  .  .  .
145  *	G3	      x  x  x  x  x  .  .  .
146  *	G4	      x  x  x  x  x  .  .  .
147  *
148  *	27      .  .  x  x  x  x  x  .   W0 points to W (27,2)
149  *	28      .  .  x  x  x  x  x  .   W1 points to W (28,2)
150  *	30      .  .  x  x  x  x  x  .   W2 points to W (30,2)
151  *	31      .  .  x  x  x  x  x  .   W3 points to W (31,2)
152  *
153  *
154  *	row    'W'    W0,W1,..               column j = 27
155  *	|       |     |                      of L
156  *	v       v     v                      |
157  *					     | |-- column j = 28 of L
158  *					     | | |-- column j = 30 of L
159  *					     | | | |-- column j =  31 of L
160  *					     v v v v
161  *	update L (j1,j):
162  *	28      .  .  1  2  3  4  5  .       x - - -
163  *
164  *	update L (j2,j):
165  *	30      .  .  1  2  3  4  5  .       # x - -	 (# denotes modified)
166  *
167  *	update L (j2,j1)
168  *	30      .  .  1  2  3  4  5  .       x # - -
169  *
170  *	update L (j3,j)
171  *	31      .  .  1  2  3  4  5  .       # x x -
172  *
173  *	update L (j3,j1)
174  *	31      .  .  1  2  3  4  5  .       x # x -
175  *
176  *	update L (j3,j2)
177  *	31      .  .  1  2  3  4  5  .       x x # -
178  *
179  *	cleanup iteration since length is odd:
180  *	42      .  .  1  2  3  4  5  .       x x x x
181  *
182  *
183  * ----- CHOLMOD v1.1.1 did the following --------------------------------------
184  *	then each iteration does two rows of all four colummns of L:
185  *
186  *	43      .  .  1  3  5  7  9  .       x x x x
187  *	44      .  .  2  4  6  8 10  .       x x x x
188  *
189  *	50      .  .  1  3  5  7  9  .       x x x x
190  *	51      .  .  2  4  6  8 10  .       x x x x
191  *
192  *	67      .  .  1  3  5  7  9  .       x x x x
193  *	81      .  .  2  4  6  8 10  .       x x x x
194  *
195  *	83      .  .  1  3  5  7  9  .       x x x x
196  *	84      .  .  2  4  6  8 10  .       x x x x
197  *
198  * ----- CHOLMOD v1.2.0 does the following -------------------------------------
199  *	then each iteration does one rows of all four colummns of L:
200  *
201  *	43      .  .  1  2  3  4  5  .       x x x x
202  *	44      .  .  1  2  3  4  5  .       x x x x
203  *	50      .  .  1  3  5  4  5  .       x x x x
204  *	51      .  .  1  2  3  4  5  .       x x x x
205  *	67      .  .  1  3  5  4  5  .       x x x x
206  *	81      .  .  1  2  3  4  5  .       x x x x
207  *	83      .  .  1  3  5  4  5  .       x x x x
208  *	84      .  .  1  2  3  4  5  .       x x x x
209  *
210  * This file is included in t_cholmod_updown.c, only.
211  * It is not compiled separately.  It contains no user-callable routines.
212  *
213  * workspace: Xwork (WDIM*nrow)
214  */
215 
216 /* ========================================================================== */
217 /* === loop unrolling macros ================================================ */
218 /* ========================================================================== */
219 
220 #undef RANK1
221 #undef RANK2
222 #undef RANK3
223 #undef RANK4
224 #undef RANK5
225 #undef RANK6
226 #undef RANK7
227 #undef RANK8
228 
229 #define RANK1(statement) statement
230 
231 #if RANK < 2
232 #define RANK2(statement)
233 #else
234 #define RANK2(statement) statement
235 #endif
236 
237 #if RANK < 3
238 #define RANK3(statement)
239 #else
240 #define RANK3(statement) statement
241 #endif
242 
243 #if RANK < 4
244 #define RANK4(statement)
245 #else
246 #define RANK4(statement) statement
247 #endif
248 
249 #if RANK < 5
250 #define RANK5(statement)
251 #else
252 #define RANK5(statement) statement
253 #endif
254 
255 #if RANK < 6
256 #define RANK6(statement)
257 #else
258 #define RANK6(statement) statement
259 #endif
260 
261 #if RANK < 7
262 #define RANK7(statement)
263 #else
264 #define RANK7(statement) statement
265 #endif
266 
267 #if RANK < 8
268 #define RANK8(statement)
269 #else
270 #define RANK8(statement) statement
271 #endif
272 
273 #define FOR_ALL_K \
274     RANK1 (DO (0)) \
275     RANK2 (DO (1)) \
276     RANK3 (DO (2)) \
277     RANK4 (DO (3)) \
278     RANK5 (DO (4)) \
279     RANK6 (DO (5)) \
280     RANK7 (DO (6)) \
281     RANK8 (DO (7))
282 
283 /* ========================================================================== */
284 /* === alpha/gamma ========================================================== */
285 /* ========================================================================== */
286 
287 #undef ALPHA_GAMMA
288 
289 #define ALPHA_GAMMA(Dj,Alpha,Gamma,W) \
290 { \
291     double dj = Dj ; \
292     if (update) \
293     { \
294 	for (k = 0 ; k < RANK ; k++) \
295 	{ \
296 	    double w = W [k] ; \
297 	    double alpha = Alpha [k] ; \
298 	    double a = alpha + (w * w) / dj ; \
299 	    dj *= a ; \
300 	    Alpha [k] = a ; \
301 	    Gamma [k] = (- w / dj) ; \
302 	    dj /= alpha ; \
303 	} \
304     } \
305     else \
306     { \
307 	for (k = 0 ; k < RANK ; k++) \
308 	{ \
309 	    double w = W [k] ; \
310 	    double alpha = Alpha [k] ; \
311 	    double a = alpha - (w * w) / dj ; \
312 	    dj *= a ; \
313 	    Alpha [k] = a ; \
314 	    Gamma [k] = w / dj ; \
315 	    dj /= alpha ; \
316 	} \
317     } \
318     Dj = ((use_dbound) ? (CHOLMOD(dbound) (dj, Common)) : (dj)) ; \
319 }
320 
321 /* ========================================================================== */
322 /* === numeric update/downdate along one path =============================== */
323 /* ========================================================================== */
324 
NUMERIC(WDIM,RANK)325 static void NUMERIC (WDIM, RANK)
326 (
327     int update,		/* TRUE for update, FALSE for downdate */
328     Int j,		/* first column in the path */
329     Int e,		/* last column in the path */
330     double Alpha [ ],	/* alpha, for each column of W */
331     double W [ ],	/* W is an n-by-WDIM array, stored in row-major order */
332     cholmod_factor *L,	/* with unit diagonal (diagonal not stored) */
333     cholmod_common *Common
334 )
335 {
336 
337 #ifdef SIMPLE
338 #define w(row,col) W [WDIM*(row) + (col)]
339 
340     /* ---------------------------------------------------------------------- */
341     /* concise but slow version for illustration only */
342     /* ---------------------------------------------------------------------- */
343 
344     double Gamma [WDIM] ;
345     double *Lx ;
346     Int *Li, *Lp, *Lnz ;
347     Int p, k ;
348     Int use_dbound = IS_GT_ZERO (Common->dbound) ;
349 
350     Li = L->i ;
351     Lx = L->x ;
352     Lp = L->p ;
353     Lnz = L->nz ;
354 
355     /* walk up the etree from node j to its ancestor e */
356     for ( ; j <= e ; j = (Lnz [j] > 1) ? (Li [Lp [j] + 1]) : Int_max)
357     {
358 	/* update the diagonal entry D (j,j) with each column of W */
359 	ALPHA_GAMMA (Lx [Lp [j]], Alpha, Gamma, (&(w (j,0)))) ;
360 	/* update column j of L */
361 	for (p = Lp [j] + 1 ; p < Lp [j] + Lnz [j] ; p++)
362 	{
363 	    /* update row Li [p] of column j of L with each column of W */
364 	    Int i = Li [p] ;
365 	    for (k = 0 ; k < RANK ; k++)
366 	    {
367 		w (i,k) -= w (j,k) * Lx [p] ;
368 		Lx [p] -= Gamma [k] * w (i,k) ;
369 	    }
370 	}
371 	/* clear workspace W */
372 	for (k = 0 ; k < RANK ; k++)
373 	{
374 	    w (j,k) = 0 ;
375 	}
376     }
377 
378 #else
379 
380     /* ---------------------------------------------------------------------- */
381     /* dynamic supernodal version: supernodes detected dynamically */
382     /* ---------------------------------------------------------------------- */
383 
384     double G0 [RANK], G1 [RANK], G2 [RANK], G3 [RANK] ;
385     double Z0 [RANK], Z1 [RANK], Z2 [RANK], Z3 [RANK] ;
386     double *W0, *W1, *W2, *W3, *Lx ;
387     Int *Li, *Lp, *Lnz ;
388     Int j1, j2, j3, p0, p1, p2, p3, parent, lnz, pend, k ;
389     Int use_dbound = IS_GT_ZERO (Common->dbound) ;
390 
391     Li = L->i ;
392     Lx = L->x ;
393     Lp = L->p ;
394     Lnz = L->nz ;
395 
396     /* walk up the etree from node j to its ancestor e */
397     for ( ; j <= e ; j = parent)
398     {
399 
400 	p0 = Lp [j] ;		/* col j is Li,Lx [p0 ... p0+lnz-1] */
401 	lnz = Lnz [j] ;
402 
403 	W0 = W + WDIM * j ;	/* pointer to row j of W */
404 	pend = p0 + lnz ;
405 
406 	/* for k = 0 to RANK-1 do: */
407 	#define DO(k) Z0 [k] = W0 [k] ;
408 	FOR_ALL_K
409 	#undef DO
410 
411 	/* for k = 0 to RANK-1 do: */
412 	#define DO(k) W0 [k] = 0 ;
413 	FOR_ALL_K
414 	#undef DO
415 
416 	/* update D (j,j) */
417 	ALPHA_GAMMA (Lx [p0], Alpha, G0, Z0) ;
418 	p0++ ;
419 
420 	/* determine how many columns of L to update at the same time */
421 	parent = (lnz > 1) ? (Li [p0]) : Int_max ;
422 	if (parent <= e && lnz == Lnz [parent] + 1)
423 	{
424 
425 	    /* -------------------------------------------------------------- */
426 	    /* node j and its parent j1 can be updated at the same time */
427 	    /* -------------------------------------------------------------- */
428 
429 	    j1 = parent ;
430 	    j2 = (lnz > 2) ? (Li [p0+1]) : Int_max ;
431 	    j3 = (lnz > 3) ? (Li [p0+2]) : Int_max ;
432 	    W1 = W + WDIM * j1 ;	/* pointer to row j1 of W */
433 	    p1 = Lp [j1] ;
434 
435 	    /* for k = 0 to RANK-1 do: */
436 	    #define DO(k) Z1 [k] = W1 [k] ;
437 	    FOR_ALL_K
438 	    #undef DO
439 
440 	    /* for k = 0 to RANK-1 do: */
441 	    #define DO(k) W1 [k] = 0 ;
442 	    FOR_ALL_K
443 	    #undef DO
444 
445 	    /* update L (j1,j) */
446 	    {
447 		double lx = Lx [p0] ;
448 
449 		/* for k = 0 to RANK-1 do: */
450 		#define DO(k) \
451 		    Z1 [k] -= Z0 [k] * lx ; \
452 		    lx -= G0 [k] * Z1 [k] ;
453 		FOR_ALL_K
454 		#undef DO
455 
456 		Lx [p0++] = lx ;
457 	    }
458 
459 	    /* update D (j1,j1) */
460 	    ALPHA_GAMMA (Lx [p1], Alpha, G1, Z1) ;
461 	    p1++ ;
462 
463 	    /* -------------------------------------------------------------- */
464 	    /* update 2 or 4 columns of L */
465 	    /* -------------------------------------------------------------- */
466 
467 	    if ((j2 <= e) &&			/* j2 in the current path */
468 		(j3 <= e) &&			/* j3 in the current path */
469 		(lnz == Lnz [j2] + 2) &&	/* column j2 matches */
470 		(lnz == Lnz [j3] + 3))		/* column j3 matches */
471 	    {
472 
473 		/* ---------------------------------------------------------- */
474 		/* update 4 columns of L */
475 		/* ---------------------------------------------------------- */
476 
477 		/* p0 and p1 currently point to row j2 in cols j and j1 of L */
478 
479 		parent = (lnz > 4) ? (Li [p0+2]) : Int_max ;
480 		W2 = W + WDIM * j2 ;	    /* pointer to row j2 of W */
481 		W3 = W + WDIM * j3 ;	    /* pointer to row j3 of W */
482 		p2 = Lp [j2] ;
483 		p3 = Lp [j3] ;
484 
485 		/* for k = 0 to RANK-1 do: */
486 		#define DO(k) Z2 [k] = W2 [k] ;
487 		FOR_ALL_K
488 		#undef DO
489 
490 		/* for k = 0 to RANK-1 do: */
491 		#define DO(k) Z3 [k] = W3 [k] ;
492 		FOR_ALL_K
493 		#undef DO
494 
495 		/* for k = 0 to RANK-1 do: */
496 		#define DO(k) W2 [k] = 0 ;
497 		FOR_ALL_K
498 		#undef DO
499 
500 		/* for k = 0 to RANK-1 do: */
501 		#define DO(k) W3 [k] = 0 ;
502 		FOR_ALL_K
503 		#undef DO
504 
505 		/* update L (j2,j) and update L (j2,j1) */
506 		{
507 		    double lx [2] ;
508 		    lx [0] = Lx [p0] ;
509 		    lx [1] = Lx [p1] ;
510 
511 		    /* for k = 0 to RANK-1 do: */
512 		    #define DO(k) \
513 		    Z2 [k] -= Z0 [k] * lx [0] ; lx [0] -= G0 [k] * Z2 [k] ; \
514 		    Z2 [k] -= Z1 [k] * lx [1] ; lx [1] -= G1 [k] * Z2 [k] ;
515 		    FOR_ALL_K
516 		    #undef DO
517 
518 		    Lx [p0++] = lx [0] ;
519 		    Lx [p1++] = lx [1] ;
520 		}
521 
522 		/* update D (j2,j2) */
523 		ALPHA_GAMMA (Lx [p2], Alpha, G2, Z2) ;
524 		p2++ ;
525 
526 		/* update L (j3,j), L (j3,j1), and L (j3,j2) */
527 		{
528 		    double lx [3] ;
529 		    lx [0] = Lx [p0] ;
530 		    lx [1] = Lx [p1] ;
531 		    lx [2] = Lx [p2] ;
532 
533 		    /* for k = 0 to RANK-1 do: */
534 		    #define DO(k) \
535 		    Z3 [k] -= Z0 [k] * lx [0] ; lx [0] -= G0 [k] * Z3 [k] ; \
536 		    Z3 [k] -= Z1 [k] * lx [1] ; lx [1] -= G1 [k] * Z3 [k] ; \
537 		    Z3 [k] -= Z2 [k] * lx [2] ; lx [2] -= G2 [k] * Z3 [k] ;
538 		    FOR_ALL_K
539 		    #undef DO
540 
541 		    Lx [p0++] = lx [0] ;
542 		    Lx [p1++] = lx [1] ;
543 		    Lx [p2++] = lx [2] ;
544 		}
545 
546 		/* update D (j3,j3) */
547 		ALPHA_GAMMA (Lx [p3], Alpha, G3, Z3) ;
548 		p3++ ;
549 
550 		/* each iteration updates L (i, [j j1 j2 j3]) */
551 		for ( ; p0 < pend ; p0++, p1++, p2++, p3++)
552 		{
553 		    double lx [4], *w0 ;
554 		    lx [0] = Lx [p0] ;
555 		    lx [1] = Lx [p1] ;
556 		    lx [2] = Lx [p2] ;
557 		    lx [3] = Lx [p3] ;
558 		    w0 = W + WDIM * Li [p0] ;
559 
560 		    /* for k = 0 to RANK-1 do: */
561 		    #define DO(k) \
562 		    w0 [k] -= Z0 [k] * lx [0] ; lx [0] -= G0 [k] * w0 [k] ; \
563 		    w0 [k] -= Z1 [k] * lx [1] ; lx [1] -= G1 [k] * w0 [k] ; \
564 		    w0 [k] -= Z2 [k] * lx [2] ; lx [2] -= G2 [k] * w0 [k] ; \
565 		    w0 [k] -= Z3 [k] * lx [3] ; lx [3] -= G3 [k] * w0 [k] ;
566 		    FOR_ALL_K
567 		    #undef DO
568 
569 		    Lx [p0] = lx [0] ;
570 		    Lx [p1] = lx [1] ;
571 		    Lx [p2] = lx [2] ;
572 		    Lx [p3] = lx [3] ;
573 		}
574 	    }
575 	    else
576 	    {
577 
578 		/* ---------------------------------------------------------- */
579 		/* update 2 columns of L */
580 		/* ---------------------------------------------------------- */
581 
582 		parent = j2 ;
583 
584 		/* cleanup iteration if length is odd */
585 		if ((lnz - 2) % 2)
586 		{
587 		    double lx [2] , *w0 ;
588 		    lx [0] = Lx [p0] ;
589 		    lx [1] = Lx [p1] ;
590 		    w0 = W + WDIM * Li [p0] ;
591 
592 		    /* for k = 0 to RANK-1 do: */
593 		    #define DO(k) \
594 		    w0 [k] -= Z0 [k] * lx [0] ; lx [0] -= G0 [k] * w0 [k] ; \
595 		    w0 [k] -= Z1 [k] * lx [1] ; lx [1] -= G1 [k] * w0 [k] ;
596 		    FOR_ALL_K
597 		    #undef DO
598 
599 		    Lx [p0++] = lx [0] ;
600 		    Lx [p1++] = lx [1] ;
601 		}
602 
603 		for ( ; p0 < pend ; p0 += 2, p1 += 2)
604 		{
605 		    double lx [2][2], w [2], *w0, *w1 ;
606 		    lx [0][0] = Lx [p0  ] ;
607 		    lx [1][0] = Lx [p0+1] ;
608 		    lx [0][1] = Lx [p1  ] ;
609 		    lx [1][1] = Lx [p1+1] ;
610 		    w0 = W + WDIM * Li [p0  ] ;
611 		    w1 = W + WDIM * Li [p0+1] ;
612 
613 		    /* for k = 0 to RANK-1 do: */
614 		    #define DO(k) \
615 		    w [0] = w0 [k] - Z0 [k] * lx [0][0] ; \
616 		    w [1] = w1 [k] - Z0 [k] * lx [1][0] ; \
617 		    lx [0][0] -= G0 [k] * w [0] ; \
618 		    lx [1][0] -= G0 [k] * w [1] ; \
619 		    w0 [k] = w [0] -= Z1 [k] * lx [0][1] ; \
620 		    w1 [k] = w [1] -= Z1 [k] * lx [1][1] ; \
621 		    lx [0][1] -= G1 [k] * w [0] ; \
622 		    lx [1][1] -= G1 [k] * w [1] ;
623 		    FOR_ALL_K
624 		    #undef DO
625 
626 		    Lx [p0  ] = lx [0][0] ;
627 		    Lx [p0+1] = lx [1][0] ;
628 		    Lx [p1  ] = lx [0][1] ;
629 		    Lx [p1+1] = lx [1][1] ;
630 		}
631 	    }
632 	}
633 	else
634 	{
635 
636 	    /* -------------------------------------------------------------- */
637 	    /* update one column of L */
638 	    /* -------------------------------------------------------------- */
639 
640 	    /* cleanup iteration if length is not a multiple of 4 */
641 	    switch ((lnz - 1) % 4)
642 	    {
643 		case 1:
644 		{
645 		    double lx , *w0 ;
646 		    lx = Lx [p0] ;
647 		    w0 = W + WDIM * Li [p0] ;
648 
649 		    /* for k = 0 to RANK-1 do: */
650 		    #define DO(k) \
651 		    w0 [k] -= Z0 [k] * lx ; lx -= G0 [k] * w0 [k] ;
652 		    FOR_ALL_K
653 		    #undef DO
654 
655 		    Lx [p0++] = lx ;
656 		}
657 		break ;
658 
659 		case 2:
660 		{
661 		    double lx [2], *w0, *w1 ;
662 		    lx [0] = Lx [p0  ] ;
663 		    lx [1] = Lx [p0+1] ;
664 		    w0 = W + WDIM * Li [p0  ] ;
665 		    w1 = W + WDIM * Li [p0+1] ;
666 
667 		    /* for k = 0 to RANK-1 do: */
668 		    #define DO(k) \
669 		    w0 [k] -= Z0 [k] * lx [0] ; \
670 		    w1 [k] -= Z0 [k] * lx [1] ; \
671 		    lx [0] -= G0 [k] * w0 [k] ; \
672 		    lx [1] -= G0 [k] * w1 [k] ;
673 		    FOR_ALL_K
674 		    #undef DO
675 
676 		    Lx [p0++] = lx [0] ;
677 		    Lx [p0++] = lx [1] ;
678 		}
679 		break ;
680 
681 		case 3:
682 		{
683 		    double lx [3], *w0, *w1, *w2 ;
684 		    lx [0] = Lx [p0  ] ;
685 		    lx [1] = Lx [p0+1] ;
686 		    lx [2] = Lx [p0+2] ;
687 		    w0 = W + WDIM * Li [p0  ] ;
688 		    w1 = W + WDIM * Li [p0+1] ;
689 		    w2 = W + WDIM * Li [p0+2] ;
690 
691 		    /* for k = 0 to RANK-1 do: */
692 		    #define DO(k) \
693 		    w0 [k] -= Z0 [k] * lx [0] ; \
694 		    w1 [k] -= Z0 [k] * lx [1] ; \
695 		    w2 [k] -= Z0 [k] * lx [2] ; \
696 		    lx [0] -= G0 [k] * w0 [k] ; \
697 		    lx [1] -= G0 [k] * w1 [k] ; \
698 		    lx [2] -= G0 [k] * w2 [k] ;
699 		    FOR_ALL_K
700 		    #undef DO
701 
702 		    Lx [p0++] = lx [0] ;
703 		    Lx [p0++] = lx [1] ;
704 		    Lx [p0++] = lx [2] ;
705 		}
706 	    }
707 
708 	    for ( ; p0 < pend ; p0 += 4)
709 	    {
710 		double lx [4], *w0, *w1, *w2, *w3 ;
711 		lx [0] = Lx [p0  ] ;
712 		lx [1] = Lx [p0+1] ;
713 		lx [2] = Lx [p0+2] ;
714 		lx [3] = Lx [p0+3] ;
715 		w0 = W + WDIM * Li [p0  ] ;
716 		w1 = W + WDIM * Li [p0+1] ;
717 		w2 = W + WDIM * Li [p0+2] ;
718 		w3 = W + WDIM * Li [p0+3] ;
719 
720 		/* for k = 0 to RANK-1 do: */
721 		#define DO(k) \
722 		w0 [k] -= Z0 [k] * lx [0] ; \
723 		w1 [k] -= Z0 [k] * lx [1] ; \
724 		w2 [k] -= Z0 [k] * lx [2] ; \
725 		w3 [k] -= Z0 [k] * lx [3] ; \
726 		lx [0] -= G0 [k] * w0 [k] ; \
727 		lx [1] -= G0 [k] * w1 [k] ; \
728 		lx [2] -= G0 [k] * w2 [k] ; \
729 		lx [3] -= G0 [k] * w3 [k] ;
730 		FOR_ALL_K
731 		#undef DO
732 
733 		Lx [p0  ] = lx [0] ;
734 		Lx [p0+1] = lx [1] ;
735 		Lx [p0+2] = lx [2] ;
736 		Lx [p0+3] = lx [3] ;
737 	    }
738 	}
739     }
740 #endif
741 }
742 /* prepare this file for another inclusion in t_cholmod_updown.c: */
743 #undef RANK
744