// ============================================================================= // === spqr_front ============================================================== // ============================================================================= /* Given an m-by-n frontal matrix, use Householder reflections to reduce it to upper trapezoidal form. Columns 0:npiv-1 are checked against tol. 0 # x x x x x x 1 # x x x x x x 2 # x x x x x x 3 # x x x x x x 4 - # x x x x x <- Stair [0] = 4 5 # x x x x x 6 # x x x x x 7 # x x x x x 8 - # x x x x <- Stair [1] = 8 9 # x x x x 10 # x x x x 11 # x x x x 12 - # x x x <- Stair [2] = 12 13 # x x x 14 - # x x <- Stair [3] = 14 15 - # x <- Stair [4] = 15 16 - # <- Stair [5] = 16 - <- Stair [6] = 17 Suppose npiv = 3, and no columns fall below tol: 0 R r r r r r r 1 h R r r r r r 2 h h R r r r r 3 h h h C c c c 4 - h h h C c c <- Stair [0] = 4 5 h h h h C c 6 h h h h h C 7 h h h h h h 8 - h h h h h <- Stair [1] = 8 9 h h h h h 10 h h h h h 11 h h h h h 12 - h h h h <- Stair [2] = 12 13 h h h h 14 - h h h <- Stair [3] = 14 15 - h h <- Stair [4] = 15 16 - h <- Stair [5] = 16 - <- Stair [6] = 17 where r is an entry in the R block, c is an entry in the C (contribution) block, and h is an entry in the H block (Householder vectors). Diagonal entries are capitalized. Suppose the 2nd column has a norm <= tol; the result is a "squeezed" R: 0 R r r r r r r <- Stair [1] = 0 to denote a dead pivot column 1 h - R r r r r 2 h - h C c c c 3 h - h h C c c 4 - - h h h C c <- Stair [0] = 4 5 - h h h h C 6 - h h h h h 7 - h h h h h 8 - h h h h h 9 h h h h h 10 h h h h h 11 h h h h h 12 - h h h h <- Stair [2] = 12 13 h h h h 14 - h h h <- Stair [3] = 14 15 - h h <- Stair [4] = 15 16 - h <- Stair [5] = 16 - <- Stair [6] = 17 where "diagonal" entries are capitalized. The 2nd H vector is missing (it is H2 = I, identity). The 2nd column of R is not deleted. The contribution block is always triangular, but the first npiv columns of the R can become "staggered". Columns npiv to n-1 in the R block are always the same length. If columns are found "dead", the staircase may be updated. Stair[k] is set to zero if k is dead. Also, Stair[k] is increased, if necessary, to ensure that R and C reside within the staircase. For example: 0 0 0 0 0 x with npiv = 2 has a Stair = [ 0 1 2 ] on output, to reflect the C block: - C c - - C A tol of zero means that any nonzero norm (however small) is accepted; only exact zero columns are flagged as dead. A negative tol means that the norms are ignored; a column is never flagged dead. The default tol is set elsewhere as 20 * (m+1) * eps * max column 2-norm of A. LAPACK's dlarf* routines are used to construct and apply the Householder reflections. The panel size (block size) is provided as an input parameter, which defines the number of Householder vectors in a panel. However, when the front is small (or when the remaining part of a front is small) the block size is increased to include the entire front. "Small" is defined, below, as fronts with fewer than 5000 entries. NOTE: this function does not check its inputs. If the caller runs out of memory and passes NULL pointers, this function will segfault. */ #include "spqr.hpp" #define SMALL 5000 #define MINCHUNK 4 #define MINCHUNK_RATIO 4 // ============================================================================= // === spqr_private_house ====================================================== // ============================================================================= // Construct a Householder reflection H = I - tau * v * v' such that H*x is // reduced to zero except for the first element. Returns X [0] = the first // entry of H*x, and X [1:n-1] = the Householder vector V [1:n-1], where // V [0] = 1. If X [1:n-1] is zero, then the H=I (tau = 0) is returned, // and V [1:n-1] is all zero. In MATLAB (1-based indexing), ignoring the // rescaling done in dlarfg/zlarfg, this function computes the following: /* function [x, tau] = house (x) n = length (x) ; beta = norm (x) ; if (x (1) > 0) beta = -beta ; end tau = (beta - x (1)) / beta ; x (2:n) = x (2:n) / (x (1) - beta) ; x (1) = beta ; */ // Note that for the complex case, the reflection must be applied as H'*x, // which requires that tau be conjugated in spqr_private_apply1. // // This function performs about 3*n+2 flops inline double spqr_private_larfg (Long n, double *X, cholmod_common *cc) { double tau = 0 ; BLAS_INT N = n, one = 1 ; if (CHECK_BLAS_INT && !EQ (N,n)) { cc->blas_ok = FALSE ; } if (!CHECK_BLAS_INT || cc->blas_ok) { LAPACK_DLARFG (&N, X, X + 1, &one, &tau) ; } return (tau) ; } inline Complex spqr_private_larfg (Long n, Complex *X, cholmod_common *cc) { Complex tau = 0 ; BLAS_INT N = n, one = 1 ; if (CHECK_BLAS_INT && !EQ (N,n)) { cc->blas_ok = FALSE ; } if (!CHECK_BLAS_INT || cc->blas_ok) { LAPACK_ZLARFG (&N, X, X + 1, &one, &tau) ; } return (tau) ; } template Entry spqr_private_house // returns tau ( // inputs, not modified Long n, // input/output Entry *X, // size n cholmod_common *cc ) { return (spqr_private_larfg (n, X, cc)) ; } // ============================================================================= // === spqr_private_apply1 ===================================================== // ============================================================================= // Apply a single Householder reflection; C = C - tau * v * v' * C. The // algorithm used by dlarf is: /* function C = apply1 (C, v, tau) w = C'*v ; C = C - tau*v*w' ; */ // For the complex case, we need to apply H', which requires that tau be // conjugated. // // If applied to a single column, this function performs 2*n-1 flops to // compute w, and 2*n+1 to apply it to C, for a total of 4*n flops. inline void spqr_private_larf (Long m, Long n, double *V, double tau, double *C, Long ldc, double *W, cholmod_common *cc) { BLAS_INT M = m, N = n, LDC = ldc, one = 1 ; char left = 'L' ; if (CHECK_BLAS_INT && !(EQ (M,m) && EQ (N,n) && EQ (LDC,ldc))) { cc->blas_ok = FALSE ; } if (!CHECK_BLAS_INT || cc->blas_ok) { LAPACK_DLARF (&left, &M, &N, V, &one, &tau, C, &LDC, W) ; } } inline void spqr_private_larf (Long m, Long n, Complex *V, Complex tau, Complex *C, Long ldc, Complex *W, cholmod_common *cc) { BLAS_INT M = m, N = n, LDC = ldc, one = 1 ; char left = 'L' ; Complex conj_tau = spqr_conj (tau) ; if (CHECK_BLAS_INT && !(EQ (M,m) && EQ (N,n) && EQ (LDC,ldc))) { cc->blas_ok = FALSE ; } if (!CHECK_BLAS_INT || cc->blas_ok) { LAPACK_ZLARF (&left, &M, &N, V, &one, &conj_tau, C, &LDC, W) ; } } template void spqr_private_apply1 ( // inputs, not modified Long m, // C is m-by-n Long n, Long ldc, // leading dimension of C Entry *V, // size m, Householder vector V Entry tau, // Householder coefficient // input/output Entry *C, // size m-by-n // workspace, not defined on input or output Entry *W, // size n cholmod_common *cc ) { Entry vsave ; if (m <= 0 || n <= 0) { return ; // nothing to do } vsave = V [0] ; // temporarily restore unit diagonal of V V [0] = 1 ; spqr_private_larf (m, n, V, tau, C, ldc, W, cc) ; V [0] = vsave ; // restore V [0] } // ============================================================================= // === spqr_front ============================================================== // ============================================================================= // Factorize a front F into a sequence of Householder vectors H, and an upper // trapezoidal matrix R. R may be a squeezed upper trapezoidal matrix if any // of the leading npiv columns are linearly dependent. Returns the row index // rank that indicates the first entry in C, which is F (rank,npiv), or 0 // on error. template Long spqr_front ( // input, not modified Long m, // F is m-by-n with leading dimension m Long n, Long npiv, // number of pivot columns double tol, // a column is flagged as dead if its norm is <= tol Long ntol, // apply tol only to first ntol pivot columns Long fchunk, // block size for compact WY Householder reflections, // treated as 1 if fchunk <= 1 // input/output Entry *F, // frontal matrix F of size m-by-n Long *Stair, // size n, entries F (Stair[k]:m-1, k) are all zero, // for each k = 0:n-1, and remain zero on output. char *Rdead, // size npiv; all zero on input. If k is dead, // Rdead [k] is set to 1 // output, not defined on input Entry *Tau, // size n, Householder coefficients // workspace, undefined on input and output Entry *W, // size b*n, where b = min (fchunk,n,m) // input/output double *wscale, double *wssq, cholmod_common *cc ) { Entry tau ; double wk ; Entry *V ; Long k, t, g, g1, nv, k1, k2, i, t0, vzeros, mleft, nleft, vsize, minchunk, rank ; // NOTE: inputs are not checked for NULL (except if debugging enabled) ASSERT (F != NULL) ; ASSERT (Stair != NULL) ; ASSERT (Rdead != NULL) ; ASSERT (Tau != NULL) ; ASSERT (W != NULL) ; ASSERT (m >= 0 && n >= 0) ; npiv = MAX (0, npiv) ; // npiv must be between 0 and n npiv = MIN (n, npiv) ; g1 = 0 ; // row index of first queued-up Householder k1 = 0 ; // pending Householders are in F (g1:t, k1:k2-1) k2 = 0 ; V = F ; // Householder vectors start here g = 0 ; // number of good Householders found nv = 0 ; // number of Householder reflections queued up vzeros = 0 ; // number of explicit zeros in queued-up H's t = 0 ; // staircase of current column fchunk = MAX (fchunk, 1) ; minchunk = MAX (MINCHUNK, fchunk/MINCHUNK_RATIO) ; rank = MIN (m,npiv) ; // F (rank,npiv) is the first entry in C. rank // is also the number of rows in the R block, // and the number of good pivot columns found. ntol = MIN (ntol, npiv) ; // Note ntol can be negative, which means to // not use tol at all. PR (("Front %ld by %ld with %ld pivots\n", m, n, npiv)) ; for (k = 0 ; k < n ; k++) { // --------------------------------------------------------------------- // reduce the kth column of F to eliminate all but "diagonal" F (g,k) // --------------------------------------------------------------------- // get the staircase for the kth column, and operate on the "diagonal" // F (g,k); eliminate F (g+1:t-1, k) to zero t0 = t ; // t0 = staircase of column k-1 t = Stair [k] ; // t = staircase of this column k PR (("k %ld g %ld m %ld n %ld npiv %ld\n", k, g, m, n, npiv)) ; if (g >= m) { // F (g,k) is outside the matrix, so we're done. If this happens // when k < npiv, then there is no contribution block. PR (("hit the wall, npiv: %ld k %ld rank %ld\n", npiv, k, rank)) ; for ( ; k < npiv ; k++) { Rdead [k] = 1 ; Stair [k] = 0 ; // remaining pivot columns all dead Tau [k] = 0 ; } for ( ; k < n ; k++) { Stair [k] = m ; // non-pivotal columns Tau [k] = 0 ; } ASSERT (nv == 0) ; // there can be no pending updates return (rank) ; } // if t < g+1, then this column is all zero; fix staircase so that R is // always inside the staircase. t = MAX (g+1,t) ; Stair [k] = t ; // --------------------------------------------------------------------- // If t just grew a lot since the last t, apply H now to all of F // --------------------------------------------------------------------- // vzeros is the number of zero entries in V after including the next // Householder vector. If it would exceed 50% of the size of V, // apply the pending Householder reflections now, but only if // enough vectors have accumulated. vzeros += nv * (t - t0) ; if (nv >= minchunk) { vsize = (nv*(nv+1))/2 + nv*(t-g1-nv) ; if (vzeros > MAX (16, vsize/2)) { // apply pending block of Householder reflections PR (("(1) apply k1 %ld k2 %ld\n", k1, k2)) ; spqr_larftb ( 0, // method 0: Left, Transpose t0-g1, n-k2, nv, m, m, V, // F (g1:t-1, k1:k1+nv-1) &Tau [k1], // Tau (k1:k-1) &F [INDEX (g1,k2,m)], // F (g1:t-1, k2:n-1) W, cc) ; // size nv*nv + nv*(n-k2) nv = 0 ; // clear queued-up Householder reflections vzeros = 0 ; } } // --------------------------------------------------------------------- // find a Householder reflection that reduces column k // --------------------------------------------------------------------- tau = spqr_private_house (t-g, &F [INDEX (g,k,m)], cc) ; // --------------------------------------------------------------------- // check to see if the kth column is OK // --------------------------------------------------------------------- if (k < ntol && (wk = spqr_abs (F [INDEX (g,k,m)], cc)) <= tol) { // ----------------------------------------------------------------- // norm (F (g:t-1, k)) is too tiny; the kth pivot column is dead // ----------------------------------------------------------------- // keep track of the 2-norm of w, the dead column 2-norms if (wk != 0) { // see also LAPACK's dnrm2 function if ((*wscale) == 0) { // this is the nonzero first entry in w (*wssq) = 1 ; } if ((*wscale) < wk) { double rr = (*wscale) / wk ; (*wssq) = 1 + (*wssq) * rr * rr ; (*wscale) = wk ; } else { double rr = wk / (*wscale) ; (*wssq) += rr * rr ; } } // zero out F (g:m-1,k) and flag it as dead for (i = g ; i < m ; i++) { // This is not strictly necessary. On output, entries outside // the staircase are ignored. F [INDEX (i,k,m)] = 0 ; } Stair [k] = 0 ; Tau [k] = 0 ; Rdead [k] = 1 ; if (nv > 0) { // apply pending block of Householder reflections PR (("(2) apply k1 %ld k2 %ld\n", k1, k2)) ; spqr_larftb ( 0, // method 0: Left, Transpose t0-g1, n-k2, nv, m, m, V, // F (g1:t-1, k1:k1+nv-1) &Tau [k1], // Tau (k1:k-1) &F [INDEX (g1,k2,m)], // F (g1:t-1, k2:n-1) W, cc) ; // size nv*nv + nv*(n-k2) nv = 0 ; // clear queued-up Householder reflections vzeros = 0 ; } } else { // ----------------------------------------------------------------- // one more good pivot column found // ----------------------------------------------------------------- Tau [k] = tau ; // save the Householder coefficient if (nv == 0) { // start the queue of pending Householder updates, and define // the current panel as k1:k2-1 g1 = g ; // first row of V k1 = k ; // first column of V k2 = MIN (n, k+fchunk) ; // k2-1 is last col in panel V = &F [INDEX (g1,k1,m)] ; // pending V starts here // check for switch to unblocked code mleft = m-g1 ; // number of rows left nleft = n-k1 ; // number of columns left if (mleft * (nleft-(fchunk+4)) < SMALL || mleft <= fchunk/2 || fchunk <= 1) { // remaining matrix is small; switch to unblocked code by // including the rest of the matrix in the panel. Always // use unblocked code if fchunk <= 1. k2 = n ; } } nv++ ; // one more pending update; V is F (g1:t-1, k1:k1+nv-1) // ----------------------------------------------------------------- // keep track of "pure" flops, for performance testing only // ----------------------------------------------------------------- // The Householder vector is of length t-g, including the unit // diagonal, and takes 3*(t-g) flops to compute. It will be // applied as a block, but compute the "pure" flops by assuming // that this single Householder vector is computed and then applied // just by itself to the rest of the frontal matrix (columns // k+1:n-1, or n-k-1 columns). Applying the Householder reflection // to just one column takes 4*(t-g) flops. This computation only // works if TBB is disabled, merely because it uses a global // variable to keep track of the flop count. If TBB is used, this // computation may result in a race condition; it is disabled in // that case. FLOP_COUNT ((t-g) * (3 + 4 * (n-k-1))) ; // ----------------------------------------------------------------- // apply the kth Householder reflection to the current panel // ----------------------------------------------------------------- // F (g:t-1, k+1:k2-1) -= v * tau * v' * F (g:t-1, k+1:k2-1), where // v is stored in F (g:t-1,k). This applies just one reflection // to the current panel. PR (("apply 1: k %ld\n", k)) ; spqr_private_apply1 (t-g, k2-k-1, m, &F [INDEX (g,k,m)], tau, &F [INDEX (g,k+1,m)], W, cc) ; g++ ; // one more pivot found // ----------------------------------------------------------------- // apply the Householder reflections if end of panel reached // ----------------------------------------------------------------- if (k == k2-1 || g == m) // or if last pivot is found { // apply pending block of Householder reflections PR (("(3) apply k1 %ld k2 %ld\n", k1, k2)) ; spqr_larftb ( 0, // method 0: Left, Transpose t-g1, n-k2, nv, m, m, V, // F (g1:t-1, k1:k1+nv-1) &Tau [k1], // Tau (k1:k2-1) &F [INDEX (g1,k2,m)], // F (g1:t-1, k2:n-1) W, cc) ; // size nv*nv + nv*(n-k2) nv = 0 ; // clear queued-up Householder reflections vzeros = 0 ; } } // --------------------------------------------------------------------- // determine the rank of the pivot columns // --------------------------------------------------------------------- if (k == npiv-1) { // the rank is the number of good columns found in the first // npiv columns. It is also the number of rows in the R block. // F (rank,npiv) is first entry in the C block. rank = g ; PR (("rank of Front pivcols: %ld\n", rank)) ; } } if (CHECK_BLAS_INT && !cc->blas_ok) { // This cannot occur if the BLAS_INT and the Long are the same integer. // In that case, CHECK_BLAS_INT is FALSE at compile-time, and the // compiler will then remove this as dead code. ERROR (CHOLMOD_INVALID, "problem too large for the BLAS") ; return (0) ; } return (rank) ; } // ============================================================================= template Long spqr_front ( // input, not modified Long m, // F is m-by-n with leading dimension m Long n, Long npiv, // number of pivot columns double tol, // a column is flagged as dead if its norm is <= tol Long ntol, // apply tol only to first ntol pivot columns Long fchunk, // block size for compact WY Householder reflections, // treated as 1 if fchunk <= 1 (in which case the // unblocked code is used). // input/output double *F, // frontal matrix F of size m-by-n Long *Stair, // size n, entries F (Stair[k]:m-1, k) are all zero, // and remain zero on output. char *Rdead, // size npiv; all zero on input. If k is dead, // Rdead [k] is set to 1 // output, not defined on input double *Tau, // size n, Householder coefficients // workspace, undefined on input and output double *W, // size b*n, where b = min (fchunk,n,m) // input/output double *wscale, double *wssq, cholmod_common *cc ) ; // ============================================================================= template Long spqr_front ( // input, not modified Long m, // F is m-by-n with leading dimension m Long n, Long npiv, // number of pivot columns double tol, // a column is flagged as dead if its norm is <= tol Long ntol, // apply tol only to first ntol pivot columns Long fchunk, // block size for compact WY Householder reflections, // treated as 1 if fchunk <= 1 (in which case the // unblocked code is used). // input/output Complex *F, // frontal matrix F of size m-by-n Long *Stair, // size n, entries F (Stair[k]:m-1, k) are all zero, // and remain zero on output. char *Rdead, // size npiv; all zero on input. If k is dead, // Rdead [k] is set to 1 // output, not defined on input Complex *Tau, // size n, Householder coefficients // workspace, undefined on input and output Complex *W, // size b*n, where b = min (fchunk,n,m) // input/output double *wscale, double *wssq, cholmod_common *cc ) ;