1// Copyright ©2015 The Gonum Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5package gonum
6
7import (
8	"gonum.org/v1/gonum/blas"
9	"gonum.org/v1/gonum/blas/blas64"
10)
11
12// Dgebrd reduces a general m×n matrix A to upper or lower bidiagonal form B by
13// an orthogonal transformation:
14//  Q^T * A * P = B.
15// The diagonal elements of B are stored in d and the off-diagonal elements are stored
16// in e. These are additionally stored along the diagonal of A and the off-diagonal
17// of A. If m >= n B is an upper-bidiagonal matrix, and if m < n B is a
18// lower-bidiagonal matrix.
19//
20// The remaining elements of A store the data needed to construct Q and P.
21// The matrices Q and P are products of elementary reflectors
22//  if m >= n, Q = H_0 * H_1 * ... * H_{n-1},
23//             P = G_0 * G_1 * ... * G_{n-2},
24//  if m < n,  Q = H_0 * H_1 * ... * H_{m-2},
25//             P = G_0 * G_1 * ... * G_{m-1},
26// where
27//  H_i = I - tauQ[i] * v_i * v_i^T,
28//  G_i = I - tauP[i] * u_i * u_i^T.
29//
30// As an example, on exit the entries of A when m = 6, and n = 5
31//  [ d   e  u1  u1  u1]
32//  [v1   d   e  u2  u2]
33//  [v1  v2   d   e  u3]
34//  [v1  v2  v3   d   e]
35//  [v1  v2  v3  v4   d]
36//  [v1  v2  v3  v4  v5]
37// and when m = 5, n = 6
38//  [ d  u1  u1  u1  u1  u1]
39//  [ e   d  u2  u2  u2  u2]
40//  [v1   e   d  u3  u3  u3]
41//  [v1  v2   e   d  u4  u4]
42//  [v1  v2  v3   e   d  u5]
43//
44// d, tauQ, and tauP must all have length at least min(m,n), and e must have
45// length min(m,n) - 1, unless lwork is -1 when there is no check except for
46// work which must have a length of at least one.
47//
48// work is temporary storage, and lwork specifies the usable memory length.
49// At minimum, lwork >= max(1,m,n) or be -1 and this function will panic otherwise.
50// Dgebrd is blocked decomposition, but the block size is limited
51// by the temporary space available. If lwork == -1, instead of performing Dgebrd,
52// the optimal work length will be stored into work[0].
53//
54// Dgebrd is an internal routine. It is exported for testing purposes.
55func (impl Implementation) Dgebrd(m, n int, a []float64, lda int, d, e, tauQ, tauP, work []float64, lwork int) {
56	switch {
57	case m < 0:
58		panic(mLT0)
59	case n < 0:
60		panic(nLT0)
61	case lda < max(1, n):
62		panic(badLdA)
63	case lwork < max(1, max(m, n)) && lwork != -1:
64		panic(badLWork)
65	case len(work) < max(1, lwork):
66		panic(shortWork)
67	}
68
69	// Quick return if possible.
70	minmn := min(m, n)
71	if minmn == 0 {
72		work[0] = 1
73		return
74	}
75
76	nb := impl.Ilaenv(1, "DGEBRD", " ", m, n, -1, -1)
77	lwkopt := (m + n) * nb
78	if lwork == -1 {
79		work[0] = float64(lwkopt)
80		return
81	}
82
83	switch {
84	case len(a) < (m-1)*lda+n:
85		panic(shortA)
86	case len(d) < minmn:
87		panic(shortD)
88	case len(e) < minmn-1:
89		panic(shortE)
90	case len(tauQ) < minmn:
91		panic(shortTauQ)
92	case len(tauP) < minmn:
93		panic(shortTauP)
94	}
95
96	nx := minmn
97	ws := max(m, n)
98	if 1 < nb && nb < minmn {
99		// At least one blocked operation can be done.
100		// Get the crossover point nx.
101		nx = max(nb, impl.Ilaenv(3, "DGEBRD", " ", m, n, -1, -1))
102		// Determine when to switch from blocked to unblocked code.
103		if nx < minmn {
104			// At least one blocked operation will be done.
105			ws = (m + n) * nb
106			if lwork < ws {
107				// Not enough work space for the optimal nb,
108				// consider using a smaller block size.
109				nbmin := impl.Ilaenv(2, "DGEBRD", " ", m, n, -1, -1)
110				if lwork >= (m+n)*nbmin {
111					// Enough work space for minimum block size.
112					nb = lwork / (m + n)
113				} else {
114					nb = minmn
115					nx = minmn
116				}
117			}
118		}
119	}
120	bi := blas64.Implementation()
121	ldworkx := nb
122	ldworky := nb
123	var i int
124	for i = 0; i < minmn-nx; i += nb {
125		// Reduce rows and columns i:i+nb to bidiagonal form and return
126		// the matrices X and Y which are needed to update the unreduced
127		// part of the matrix.
128		// X is stored in the first m rows of work, y in the next rows.
129		x := work[:m*ldworkx]
130		y := work[m*ldworkx:]
131		impl.Dlabrd(m-i, n-i, nb, a[i*lda+i:], lda,
132			d[i:], e[i:], tauQ[i:], tauP[i:],
133			x, ldworkx, y, ldworky)
134
135		// Update the trailing submatrix A[i+nb:m,i+nb:n], using an update
136		// of the form  A := A - V*Y**T - X*U**T
137		bi.Dgemm(blas.NoTrans, blas.Trans, m-i-nb, n-i-nb, nb,
138			-1, a[(i+nb)*lda+i:], lda, y[nb*ldworky:], ldworky,
139			1, a[(i+nb)*lda+i+nb:], lda)
140
141		bi.Dgemm(blas.NoTrans, blas.NoTrans, m-i-nb, n-i-nb, nb,
142			-1, x[nb*ldworkx:], ldworkx, a[i*lda+i+nb:], lda,
143			1, a[(i+nb)*lda+i+nb:], lda)
144
145		// Copy diagonal and off-diagonal elements of B back into A.
146		if m >= n {
147			for j := i; j < i+nb; j++ {
148				a[j*lda+j] = d[j]
149				a[j*lda+j+1] = e[j]
150			}
151		} else {
152			for j := i; j < i+nb; j++ {
153				a[j*lda+j] = d[j]
154				a[(j+1)*lda+j] = e[j]
155			}
156		}
157	}
158	// Use unblocked code to reduce the remainder of the matrix.
159	impl.Dgebd2(m-i, n-i, a[i*lda+i:], lda, d[i:], e[i:], tauQ[i:], tauP[i:], work)
160	work[0] = float64(ws)
161}
162