1 // =============================================================================
2 // === spqr_front ==============================================================
3 // =============================================================================
4
5 /* Given an m-by-n frontal matrix, use Householder reflections to reduce it
6 to upper trapezoidal form. Columns 0:npiv-1 are checked against tol.
7
8 0 # x x x x x x
9 1 # x x x x x x
10 2 # x x x x x x
11 3 # x x x x x x
12 4 - # x x x x x <- Stair [0] = 4
13 5 # x x x x x
14 6 # x x x x x
15 7 # x x x x x
16 8 - # x x x x <- Stair [1] = 8
17 9 # x x x x
18 10 # x x x x
19 11 # x x x x
20 12 - # x x x <- Stair [2] = 12
21 13 # x x x
22 14 - # x x <- Stair [3] = 14
23 15 - # x <- Stair [4] = 15
24 16 - # <- Stair [5] = 16
25 - <- Stair [6] = 17
26
27 Suppose npiv = 3, and no columns fall below tol:
28
29 0 R r r r r r r
30 1 h R r r r r r
31 2 h h R r r r r
32 3 h h h C c c c
33 4 - h h h C c c <- Stair [0] = 4
34 5 h h h h C c
35 6 h h h h h C
36 7 h h h h h h
37 8 - h h h h h <- Stair [1] = 8
38 9 h h h h h
39 10 h h h h h
40 11 h h h h h
41 12 - h h h h <- Stair [2] = 12
42 13 h h h h
43 14 - h h h <- Stair [3] = 14
44 15 - h h <- Stair [4] = 15
45 16 - h <- Stair [5] = 16
46 - <- Stair [6] = 17
47
48 where r is an entry in the R block, c is an entry in the C (contribution)
49 block, and h is an entry in the H block (Householder vectors). Diagonal
50 entries are capitalized.
51
52 Suppose the 2nd column has a norm <= tol; the result is a "squeezed" R:
53
54 0 R r r r r r r <- Stair [1] = 0 to denote a dead pivot column
55 1 h - R r r r r
56 2 h - h C c c c
57 3 h - h h C c c
58 4 - - h h h C c <- Stair [0] = 4
59 5 - h h h h C
60 6 - h h h h h
61 7 - h h h h h
62 8 - h h h h h
63 9 h h h h h
64 10 h h h h h
65 11 h h h h h
66 12 - h h h h <- Stair [2] = 12
67 13 h h h h
68 14 - h h h <- Stair [3] = 14
69 15 - h h <- Stair [4] = 15
70 16 - h <- Stair [5] = 16
71 - <- Stair [6] = 17
72
73 where "diagonal" entries are capitalized. The 2nd H vector is missing
74 (it is H2 = I, identity). The 2nd column of R is not deleted. The
75 contribution block is always triangular, but the first npiv columns of
76 the R can become "staggered". Columns npiv to n-1 in the R block are
77 always the same length.
78
79 If columns are found "dead", the staircase may be updated. Stair[k] is
80 set to zero if k is dead. Also, Stair[k] is increased, if necessary, to
81 ensure that R and C reside within the staircase. For example:
82
83 0 0 0
84 0 0 x
85
86 with npiv = 2 has a Stair = [ 0 1 2 ] on output, to reflect the C block:
87
88 - C c
89 - - C
90
91 A tol of zero means that any nonzero norm (however small) is accepted;
92 only exact zero columns are flagged as dead. A negative tol means that
93 the norms are ignored; a column is never flagged dead. The default tol
94 is set elsewhere as 20 * (m+1) * eps * max column 2-norm of A.
95
96 LAPACK's dlarf* routines are used to construct and apply the Householder
97 reflections. The panel size (block size) is provided as an input
98 parameter, which defines the number of Householder vectors in a panel.
99 However, when the front is small (or when the remaining part
100 of a front is small) the block size is increased to include the entire
101 front. "Small" is defined, below, as fronts with fewer than 5000 entries.
102
103 NOTE: this function does not check its inputs. If the caller runs out of
104 memory and passes NULL pointers, this function will segfault.
105 */
106
107 #include "spqr.hpp"
108
109 #define SMALL 5000
110 #define MINCHUNK 4
111 #define MINCHUNK_RATIO 4
112
113 // =============================================================================
114 // === spqr_private_house ======================================================
115 // =============================================================================
116
117 // Construct a Householder reflection H = I - tau * v * v' such that H*x is
118 // reduced to zero except for the first element. Returns X [0] = the first
119 // entry of H*x, and X [1:n-1] = the Householder vector V [1:n-1], where
120 // V [0] = 1. If X [1:n-1] is zero, then the H=I (tau = 0) is returned,
121 // and V [1:n-1] is all zero. In MATLAB (1-based indexing), ignoring the
122 // rescaling done in dlarfg/zlarfg, this function computes the following:
123
124 /*
125 function [x, tau] = house (x)
126 n = length (x) ;
127 beta = norm (x) ;
128 if (x (1) > 0)
129 beta = -beta ;
130 end
131 tau = (beta - x (1)) / beta ;
132 x (2:n) = x (2:n) / (x (1) - beta) ;
133 x (1) = beta ;
134 */
135
136 // Note that for the complex case, the reflection must be applied as H'*x,
137 // which requires that tau be conjugated in spqr_private_apply1.
138 //
139 // This function performs about 3*n+2 flops
140
spqr_private_larfg(Long n,double * X,cholmod_common * cc)141 inline double spqr_private_larfg (Long n, double *X, cholmod_common *cc)
142 {
143 double tau = 0 ;
144 BLAS_INT N = n, one = 1 ;
145 if (CHECK_BLAS_INT && !EQ (N,n))
146 {
147 cc->blas_ok = FALSE ;
148 }
149 if (!CHECK_BLAS_INT || cc->blas_ok)
150 {
151 LAPACK_DLARFG (&N, X, X + 1, &one, &tau) ;
152 }
153 return (tau) ;
154 }
155
156
spqr_private_larfg(Long n,Complex * X,cholmod_common * cc)157 inline Complex spqr_private_larfg (Long n, Complex *X, cholmod_common *cc)
158 {
159 Complex tau = 0 ;
160 BLAS_INT N = n, one = 1 ;
161 if (CHECK_BLAS_INT && !EQ (N,n))
162 {
163 cc->blas_ok = FALSE ;
164 }
165 if (!CHECK_BLAS_INT || cc->blas_ok)
166 {
167 LAPACK_ZLARFG (&N, X, X + 1, &one, &tau) ;
168 }
169 return (tau) ;
170 }
171
172
spqr_private_house(Long n,Entry * X,cholmod_common * cc)173 template <typename Entry> Entry spqr_private_house // returns tau
174 (
175 // inputs, not modified
176 Long n,
177
178 // input/output
179 Entry *X, // size n
180
181 cholmod_common *cc
182 )
183 {
184 return (spqr_private_larfg (n, X, cc)) ;
185 }
186
187
188 // =============================================================================
189 // === spqr_private_apply1 =====================================================
190 // =============================================================================
191
192 // Apply a single Householder reflection; C = C - tau * v * v' * C. The
193 // algorithm used by dlarf is:
194
195 /*
196 function C = apply1 (C, v, tau)
197 w = C'*v ;
198 C = C - tau*v*w' ;
199 */
200
201 // For the complex case, we need to apply H', which requires that tau be
202 // conjugated.
203 //
204 // If applied to a single column, this function performs 2*n-1 flops to
205 // compute w, and 2*n+1 to apply it to C, for a total of 4*n flops.
206
spqr_private_larf(Long m,Long n,double * V,double tau,double * C,Long ldc,double * W,cholmod_common * cc)207 inline void spqr_private_larf (Long m, Long n, double *V, double tau,
208 double *C, Long ldc, double *W, cholmod_common *cc)
209 {
210 BLAS_INT M = m, N = n, LDC = ldc, one = 1 ;
211 char left = 'L' ;
212 if (CHECK_BLAS_INT && !(EQ (M,m) && EQ (N,n) && EQ (LDC,ldc)))
213 {
214 cc->blas_ok = FALSE ;
215
216 }
217 if (!CHECK_BLAS_INT || cc->blas_ok)
218 {
219 LAPACK_DLARF (&left, &M, &N, V, &one, &tau, C, &LDC, W) ;
220 }
221 }
222
spqr_private_larf(Long m,Long n,Complex * V,Complex tau,Complex * C,Long ldc,Complex * W,cholmod_common * cc)223 inline void spqr_private_larf (Long m, Long n, Complex *V, Complex tau,
224 Complex *C, Long ldc, Complex *W, cholmod_common *cc)
225 {
226 BLAS_INT M = m, N = n, LDC = ldc, one = 1 ;
227 char left = 'L' ;
228 Complex conj_tau = spqr_conj (tau) ;
229 if (CHECK_BLAS_INT && !(EQ (M,m) && EQ (N,n) && EQ (LDC,ldc)))
230 {
231 cc->blas_ok = FALSE ;
232 }
233 if (!CHECK_BLAS_INT || cc->blas_ok)
234 {
235 LAPACK_ZLARF (&left, &M, &N, V, &one, &conj_tau, C, &LDC, W) ;
236 }
237 }
238
239
spqr_private_apply1(Long m,Long n,Long ldc,Entry * V,Entry tau,Entry * C,Entry * W,cholmod_common * cc)240 template <typename Entry> void spqr_private_apply1
241 (
242 // inputs, not modified
243 Long m, // C is m-by-n
244 Long n,
245 Long ldc, // leading dimension of C
246 Entry *V, // size m, Householder vector V
247 Entry tau, // Householder coefficient
248
249 // input/output
250 Entry *C, // size m-by-n
251
252 // workspace, not defined on input or output
253 Entry *W, // size n
254
255 cholmod_common *cc
256 )
257 {
258 Entry vsave ;
259 if (m <= 0 || n <= 0)
260 {
261 return ; // nothing to do
262 }
263 vsave = V [0] ; // temporarily restore unit diagonal of V
264 V [0] = 1 ;
265 spqr_private_larf (m, n, V, tau, C, ldc, W, cc) ;
266 V [0] = vsave ; // restore V [0]
267 }
268
269
270 // =============================================================================
271 // === spqr_front ==============================================================
272 // =============================================================================
273
274 // Factorize a front F into a sequence of Householder vectors H, and an upper
275 // trapezoidal matrix R. R may be a squeezed upper trapezoidal matrix if any
276 // of the leading npiv columns are linearly dependent. Returns the row index
277 // rank that indicates the first entry in C, which is F (rank,npiv), or 0
278 // on error.
279
spqr_front(Long m,Long n,Long npiv,double tol,Long ntol,Long fchunk,Entry * F,Long * Stair,char * Rdead,Entry * Tau,Entry * W,double * wscale,double * wssq,cholmod_common * cc)280 template <typename Entry> Long spqr_front
281 (
282 // input, not modified
283 Long m, // F is m-by-n with leading dimension m
284 Long n,
285 Long npiv, // number of pivot columns
286 double tol, // a column is flagged as dead if its norm is <= tol
287 Long ntol, // apply tol only to first ntol pivot columns
288 Long fchunk, // block size for compact WY Householder reflections,
289 // treated as 1 if fchunk <= 1
290
291 // input/output
292 Entry *F, // frontal matrix F of size m-by-n
293 Long *Stair, // size n, entries F (Stair[k]:m-1, k) are all zero,
294 // for each k = 0:n-1, and remain zero on output.
295 char *Rdead, // size npiv; all zero on input. If k is dead,
296 // Rdead [k] is set to 1
297
298 // output, not defined on input
299 Entry *Tau, // size n, Householder coefficients
300
301 // workspace, undefined on input and output
302 Entry *W, // size b*n, where b = min (fchunk,n,m)
303
304 // input/output
305 double *wscale,
306 double *wssq,
307
308 cholmod_common *cc
309 )
310 {
311 Entry tau ;
312 double wk ;
313 Entry *V ;
314 Long k, t, g, g1, nv, k1, k2, i, t0, vzeros, mleft, nleft, vsize, minchunk,
315 rank ;
316
317 // NOTE: inputs are not checked for NULL (except if debugging enabled)
318
319 ASSERT (F != NULL) ;
320 ASSERT (Stair != NULL) ;
321 ASSERT (Rdead != NULL) ;
322 ASSERT (Tau != NULL) ;
323 ASSERT (W != NULL) ;
324 ASSERT (m >= 0 && n >= 0) ;
325
326 npiv = MAX (0, npiv) ; // npiv must be between 0 and n
327 npiv = MIN (n, npiv) ;
328
329 g1 = 0 ; // row index of first queued-up Householder
330 k1 = 0 ; // pending Householders are in F (g1:t, k1:k2-1)
331 k2 = 0 ;
332 V = F ; // Householder vectors start here
333 g = 0 ; // number of good Householders found
334 nv = 0 ; // number of Householder reflections queued up
335 vzeros = 0 ; // number of explicit zeros in queued-up H's
336 t = 0 ; // staircase of current column
337 fchunk = MAX (fchunk, 1) ;
338 minchunk = MAX (MINCHUNK, fchunk/MINCHUNK_RATIO) ;
339 rank = MIN (m,npiv) ; // F (rank,npiv) is the first entry in C. rank
340 // is also the number of rows in the R block,
341 // and the number of good pivot columns found.
342
343 ntol = MIN (ntol, npiv) ; // Note ntol can be negative, which means to
344 // not use tol at all.
345
346 PR (("Front %ld by %ld with %ld pivots\n", m, n, npiv)) ;
347 for (k = 0 ; k < n ; k++)
348 {
349
350 // ---------------------------------------------------------------------
351 // reduce the kth column of F to eliminate all but "diagonal" F (g,k)
352 // ---------------------------------------------------------------------
353
354 // get the staircase for the kth column, and operate on the "diagonal"
355 // F (g,k); eliminate F (g+1:t-1, k) to zero
356 t0 = t ; // t0 = staircase of column k-1
357 t = Stair [k] ; // t = staircase of this column k
358
359 PR (("k %ld g %ld m %ld n %ld npiv %ld\n", k, g, m, n, npiv)) ;
360 if (g >= m)
361 {
362 // F (g,k) is outside the matrix, so we're done. If this happens
363 // when k < npiv, then there is no contribution block.
364 PR (("hit the wall, npiv: %ld k %ld rank %ld\n", npiv, k, rank)) ;
365 for ( ; k < npiv ; k++)
366 {
367 Rdead [k] = 1 ;
368 Stair [k] = 0 ; // remaining pivot columns all dead
369 Tau [k] = 0 ;
370 }
371 for ( ; k < n ; k++)
372 {
373 Stair [k] = m ; // non-pivotal columns
374 Tau [k] = 0 ;
375 }
376 ASSERT (nv == 0) ; // there can be no pending updates
377 return (rank) ;
378 }
379
380 // if t < g+1, then this column is all zero; fix staircase so that R is
381 // always inside the staircase.
382 t = MAX (g+1,t) ;
383 Stair [k] = t ;
384
385 // ---------------------------------------------------------------------
386 // If t just grew a lot since the last t, apply H now to all of F
387 // ---------------------------------------------------------------------
388
389 // vzeros is the number of zero entries in V after including the next
390 // Householder vector. If it would exceed 50% of the size of V,
391 // apply the pending Householder reflections now, but only if
392 // enough vectors have accumulated.
393
394 vzeros += nv * (t - t0) ;
395 if (nv >= minchunk)
396 {
397 vsize = (nv*(nv+1))/2 + nv*(t-g1-nv) ;
398 if (vzeros > MAX (16, vsize/2))
399 {
400 // apply pending block of Householder reflections
401 PR (("(1) apply k1 %ld k2 %ld\n", k1, k2)) ;
402 spqr_larftb (
403 0, // method 0: Left, Transpose
404 t0-g1, n-k2, nv, m, m,
405 V, // F (g1:t-1, k1:k1+nv-1)
406 &Tau [k1], // Tau (k1:k-1)
407 &F [INDEX (g1,k2,m)], // F (g1:t-1, k2:n-1)
408 W, cc) ; // size nv*nv + nv*(n-k2)
409 nv = 0 ; // clear queued-up Householder reflections
410 vzeros = 0 ;
411 }
412 }
413
414 // ---------------------------------------------------------------------
415 // find a Householder reflection that reduces column k
416 // ---------------------------------------------------------------------
417
418 tau = spqr_private_house (t-g, &F [INDEX (g,k,m)], cc) ;
419
420 // ---------------------------------------------------------------------
421 // check to see if the kth column is OK
422 // ---------------------------------------------------------------------
423
424 if (k < ntol && (wk = spqr_abs (F [INDEX (g,k,m)], cc)) <= tol)
425 {
426
427 // -----------------------------------------------------------------
428 // norm (F (g:t-1, k)) is too tiny; the kth pivot column is dead
429 // -----------------------------------------------------------------
430
431 // keep track of the 2-norm of w, the dead column 2-norms
432 if (wk != 0)
433 {
434 // see also LAPACK's dnrm2 function
435 if ((*wscale) == 0)
436 {
437 // this is the nonzero first entry in w
438 (*wssq) = 1 ;
439 }
440 if ((*wscale) < wk)
441 {
442 double rr = (*wscale) / wk ;
443 (*wssq) = 1 + (*wssq) * rr * rr ;
444 (*wscale) = wk ;
445 }
446 else
447 {
448 double rr = wk / (*wscale) ;
449 (*wssq) += rr * rr ;
450 }
451 }
452
453 // zero out F (g:m-1,k) and flag it as dead
454 for (i = g ; i < m ; i++)
455 {
456 // This is not strictly necessary. On output, entries outside
457 // the staircase are ignored.
458 F [INDEX (i,k,m)] = 0 ;
459 }
460 Stair [k] = 0 ;
461 Tau [k] = 0 ;
462 Rdead [k] = 1 ;
463
464 if (nv > 0)
465 {
466 // apply pending block of Householder reflections
467 PR (("(2) apply k1 %ld k2 %ld\n", k1, k2)) ;
468 spqr_larftb (
469 0, // method 0: Left, Transpose
470 t0-g1, n-k2, nv, m, m,
471 V, // F (g1:t-1, k1:k1+nv-1)
472 &Tau [k1], // Tau (k1:k-1)
473 &F [INDEX (g1,k2,m)], // F (g1:t-1, k2:n-1)
474 W, cc) ; // size nv*nv + nv*(n-k2)
475 nv = 0 ; // clear queued-up Householder reflections
476 vzeros = 0 ;
477 }
478
479 }
480 else
481 {
482
483 // -----------------------------------------------------------------
484 // one more good pivot column found
485 // -----------------------------------------------------------------
486
487 Tau [k] = tau ; // save the Householder coefficient
488 if (nv == 0)
489 {
490 // start the queue of pending Householder updates, and define
491 // the current panel as k1:k2-1
492 g1 = g ; // first row of V
493 k1 = k ; // first column of V
494 k2 = MIN (n, k+fchunk) ; // k2-1 is last col in panel
495 V = &F [INDEX (g1,k1,m)] ; // pending V starts here
496
497 // check for switch to unblocked code
498 mleft = m-g1 ; // number of rows left
499 nleft = n-k1 ; // number of columns left
500 if (mleft * (nleft-(fchunk+4)) < SMALL || mleft <= fchunk/2
501 || fchunk <= 1)
502 {
503 // remaining matrix is small; switch to unblocked code by
504 // including the rest of the matrix in the panel. Always
505 // use unblocked code if fchunk <= 1.
506 k2 = n ;
507 }
508 }
509 nv++ ; // one more pending update; V is F (g1:t-1, k1:k1+nv-1)
510
511 // -----------------------------------------------------------------
512 // keep track of "pure" flops, for performance testing only
513 // -----------------------------------------------------------------
514
515 // The Householder vector is of length t-g, including the unit
516 // diagonal, and takes 3*(t-g) flops to compute. It will be
517 // applied as a block, but compute the "pure" flops by assuming
518 // that this single Householder vector is computed and then applied
519 // just by itself to the rest of the frontal matrix (columns
520 // k+1:n-1, or n-k-1 columns). Applying the Householder reflection
521 // to just one column takes 4*(t-g) flops. This computation only
522 // works if TBB is disabled, merely because it uses a global
523 // variable to keep track of the flop count. If TBB is used, this
524 // computation may result in a race condition; it is disabled in
525 // that case.
526
527 FLOP_COUNT ((t-g) * (3 + 4 * (n-k-1))) ;
528
529 // -----------------------------------------------------------------
530 // apply the kth Householder reflection to the current panel
531 // -----------------------------------------------------------------
532
533 // F (g:t-1, k+1:k2-1) -= v * tau * v' * F (g:t-1, k+1:k2-1), where
534 // v is stored in F (g:t-1,k). This applies just one reflection
535 // to the current panel.
536 PR (("apply 1: k %ld\n", k)) ;
537 spqr_private_apply1 (t-g, k2-k-1, m, &F [INDEX (g,k,m)], tau,
538 &F [INDEX (g,k+1,m)], W, cc) ;
539
540 g++ ; // one more pivot found
541
542 // -----------------------------------------------------------------
543 // apply the Householder reflections if end of panel reached
544 // -----------------------------------------------------------------
545
546 if (k == k2-1 || g == m) // or if last pivot is found
547 {
548 // apply pending block of Householder reflections
549 PR (("(3) apply k1 %ld k2 %ld\n", k1, k2)) ;
550 spqr_larftb (
551 0, // method 0: Left, Transpose
552 t-g1, n-k2, nv, m, m,
553 V, // F (g1:t-1, k1:k1+nv-1)
554 &Tau [k1], // Tau (k1:k2-1)
555 &F [INDEX (g1,k2,m)], // F (g1:t-1, k2:n-1)
556 W, cc) ; // size nv*nv + nv*(n-k2)
557 nv = 0 ; // clear queued-up Householder reflections
558 vzeros = 0 ;
559 }
560 }
561
562 // ---------------------------------------------------------------------
563 // determine the rank of the pivot columns
564 // ---------------------------------------------------------------------
565
566 if (k == npiv-1)
567 {
568 // the rank is the number of good columns found in the first
569 // npiv columns. It is also the number of rows in the R block.
570 // F (rank,npiv) is first entry in the C block.
571 rank = g ;
572 PR (("rank of Front pivcols: %ld\n", rank)) ;
573 }
574 }
575
576 if (CHECK_BLAS_INT && !cc->blas_ok)
577 {
578 // This cannot occur if the BLAS_INT and the Long are the same integer.
579 // In that case, CHECK_BLAS_INT is FALSE at compile-time, and the
580 // compiler will then remove this as dead code.
581 ERROR (CHOLMOD_INVALID, "problem too large for the BLAS") ;
582 return (0) ;
583 }
584
585 return (rank) ;
586 }
587
588
589 // =============================================================================
590
591 template Long spqr_front <double>
592 (
593 // input, not modified
594 Long m, // F is m-by-n with leading dimension m
595 Long n,
596 Long npiv, // number of pivot columns
597 double tol, // a column is flagged as dead if its norm is <= tol
598 Long ntol, // apply tol only to first ntol pivot columns
599 Long fchunk, // block size for compact WY Householder reflections,
600 // treated as 1 if fchunk <= 1 (in which case the
601 // unblocked code is used).
602
603 // input/output
604 double *F, // frontal matrix F of size m-by-n
605 Long *Stair, // size n, entries F (Stair[k]:m-1, k) are all zero,
606 // and remain zero on output.
607 char *Rdead, // size npiv; all zero on input. If k is dead,
608 // Rdead [k] is set to 1
609
610 // output, not defined on input
611 double *Tau, // size n, Householder coefficients
612
613 // workspace, undefined on input and output
614 double *W, // size b*n, where b = min (fchunk,n,m)
615
616 // input/output
617 double *wscale,
618 double *wssq,
619
620 cholmod_common *cc
621 ) ;
622
623 // =============================================================================
624
625 template Long spqr_front <Complex>
626 (
627 // input, not modified
628 Long m, // F is m-by-n with leading dimension m
629 Long n,
630 Long npiv, // number of pivot columns
631 double tol, // a column is flagged as dead if its norm is <= tol
632 Long ntol, // apply tol only to first ntol pivot columns
633 Long fchunk, // block size for compact WY Householder reflections,
634 // treated as 1 if fchunk <= 1 (in which case the
635 // unblocked code is used).
636
637 // input/output
638 Complex *F, // frontal matrix F of size m-by-n
639 Long *Stair, // size n, entries F (Stair[k]:m-1, k) are all zero,
640 // and remain zero on output.
641 char *Rdead, // size npiv; all zero on input. If k is dead,
642 // Rdead [k] is set to 1
643
644 // output, not defined on input
645 Complex *Tau, // size n, Householder coefficients
646
647 // workspace, undefined on input and output
648 Complex *W, // size b*n, where b = min (fchunk,n,m)
649
650 // input/output
651 double *wscale,
652 double *wssq,
653
654 cholmod_common *cc
655 ) ;
656