1// Copyright ©2016 The Gonum Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5package gonum
6
7import (
8	"gonum.org/v1/gonum/blas"
9	"gonum.org/v1/gonum/blas/blas64"
10	"gonum.org/v1/gonum/lapack"
11)
12
13// Dgehrd reduces a block of a real n×n general matrix A to upper Hessenberg
14// form H by an orthogonal similarity transformation Q^T * A * Q = H.
15//
16// The matrix Q is represented as a product of (ihi-ilo) elementary
17// reflectors
18//  Q = H_{ilo} H_{ilo+1} ... H_{ihi-1}.
19// Each H_i has the form
20//  H_i = I - tau[i] * v * v^T
21// where v is a real vector with v[0:i+1] = 0, v[i+1] = 1 and v[ihi+1:n] = 0.
22// v[i+2:ihi+1] is stored on exit in A[i+2:ihi+1,i].
23//
24// On entry, a contains the n×n general matrix to be reduced. On return, the
25// upper triangle and the first subdiagonal of A will be overwritten with the
26// upper Hessenberg matrix H, and the elements below the first subdiagonal, with
27// the slice tau, represent the orthogonal matrix Q as a product of elementary
28// reflectors.
29//
30// The contents of a are illustrated by the following example, with n = 7, ilo =
31// 1 and ihi = 5.
32// On entry,
33//  [ a   a   a   a   a   a   a ]
34//  [     a   a   a   a   a   a ]
35//  [     a   a   a   a   a   a ]
36//  [     a   a   a   a   a   a ]
37//  [     a   a   a   a   a   a ]
38//  [     a   a   a   a   a   a ]
39//  [                         a ]
40// on return,
41//  [ a   a   h   h   h   h   a ]
42//  [     a   h   h   h   h   a ]
43//  [     h   h   h   h   h   h ]
44//  [     v1  h   h   h   h   h ]
45//  [     v1  v2  h   h   h   h ]
46//  [     v1  v2  v3  h   h   h ]
47//  [                         a ]
48// where a denotes an element of the original matrix A, h denotes a
49// modified element of the upper Hessenberg matrix H, and vi denotes an
50// element of the vector defining H_i.
51//
52// ilo and ihi determine the block of A that will be reduced to upper Hessenberg
53// form. It must hold that 0 <= ilo <= ihi < n if n > 0, and ilo == 0 and ihi ==
54// -1 if n == 0, otherwise Dgehrd will panic.
55//
56// On return, tau will contain the scalar factors of the elementary reflectors.
57// Elements tau[:ilo] and tau[ihi:] will be set to zero. tau must have length
58// equal to n-1 if n > 0, otherwise Dgehrd will panic.
59//
60// work must have length at least lwork and lwork must be at least max(1,n),
61// otherwise Dgehrd will panic. On return, work[0] contains the optimal value of
62// lwork.
63//
64// If lwork == -1, instead of performing Dgehrd, only the optimal value of lwork
65// will be stored in work[0].
66//
67// Dgehrd is an internal routine. It is exported for testing purposes.
68func (impl Implementation) Dgehrd(n, ilo, ihi int, a []float64, lda int, tau, work []float64, lwork int) {
69	switch {
70	case n < 0:
71		panic(nLT0)
72	case ilo < 0 || max(0, n-1) < ilo:
73		panic(badIlo)
74	case ihi < min(ilo, n-1) || n <= ihi:
75		panic(badIhi)
76	case lda < max(1, n):
77		panic(badLdA)
78	case lwork < max(1, n) && lwork != -1:
79		panic(badLWork)
80	case len(work) < lwork:
81		panic(shortWork)
82	}
83
84	// Quick return if possible.
85	if n == 0 {
86		work[0] = 1
87		return
88	}
89
90	const (
91		nbmax = 64
92		ldt   = nbmax + 1
93		tsize = ldt * nbmax
94	)
95	// Compute the workspace requirements.
96	nb := min(nbmax, impl.Ilaenv(1, "DGEHRD", " ", n, ilo, ihi, -1))
97	lwkopt := n*nb + tsize
98	if lwork == -1 {
99		work[0] = float64(lwkopt)
100		return
101	}
102
103	if len(a) < (n-1)*lda+n {
104		panic(shortA)
105	}
106	if len(tau) != n-1 {
107		panic(badLenTau)
108	}
109
110	// Set tau[:ilo] and tau[ihi:] to zero.
111	for i := 0; i < ilo; i++ {
112		tau[i] = 0
113	}
114	for i := ihi; i < n-1; i++ {
115		tau[i] = 0
116	}
117
118	// Quick return if possible.
119	nh := ihi - ilo + 1
120	if nh <= 1 {
121		work[0] = 1
122		return
123	}
124
125	// Determine the block size.
126	nbmin := 2
127	var nx int
128	if 1 < nb && nb < nh {
129		// Determine when to cross over from blocked to unblocked code
130		// (last block is always handled by unblocked code).
131		nx = max(nb, impl.Ilaenv(3, "DGEHRD", " ", n, ilo, ihi, -1))
132		if nx < nh {
133			// Determine if workspace is large enough for blocked code.
134			if lwork < n*nb+tsize {
135				// Not enough workspace to use optimal nb:
136				// determine the minimum value of nb, and reduce
137				// nb or force use of unblocked code.
138				nbmin = max(2, impl.Ilaenv(2, "DGEHRD", " ", n, ilo, ihi, -1))
139				if lwork >= n*nbmin+tsize {
140					nb = (lwork - tsize) / n
141				} else {
142					nb = 1
143				}
144			}
145		}
146	}
147	ldwork := nb // work is used as an n×nb matrix.
148
149	var i int
150	if nb < nbmin || nh <= nb {
151		// Use unblocked code below.
152		i = ilo
153	} else {
154		// Use blocked code.
155		bi := blas64.Implementation()
156		iwt := n * nb // Size of the matrix Y and index where the matrix T starts in work.
157		for i = ilo; i < ihi-nx; i += nb {
158			ib := min(nb, ihi-i)
159
160			// Reduce columns [i:i+ib] to Hessenberg form, returning the
161			// matrices V and T of the block reflector H = I - V*T*V^T
162			// which performs the reduction, and also the matrix Y = A*V*T.
163			impl.Dlahr2(ihi+1, i+1, ib, a[i:], lda, tau[i:], work[iwt:], ldt, work, ldwork)
164
165			// Apply the block reflector H to A[:ihi+1,i+ib:ihi+1] from the
166			// right, computing  A := A - Y * V^T. V[i+ib,i+ib-1] must be set
167			// to 1.
168			ei := a[(i+ib)*lda+i+ib-1]
169			a[(i+ib)*lda+i+ib-1] = 1
170			bi.Dgemm(blas.NoTrans, blas.Trans, ihi+1, ihi-i-ib+1, ib,
171				-1, work, ldwork,
172				a[(i+ib)*lda+i:], lda,
173				1, a[i+ib:], lda)
174			a[(i+ib)*lda+i+ib-1] = ei
175
176			// Apply the block reflector H to A[0:i+1,i+1:i+ib-1] from the
177			// right.
178			bi.Dtrmm(blas.Right, blas.Lower, blas.Trans, blas.Unit, i+1, ib-1,
179				1, a[(i+1)*lda+i:], lda, work, ldwork)
180			for j := 0; j <= ib-2; j++ {
181				bi.Daxpy(i+1, -1, work[j:], ldwork, a[i+j+1:], lda)
182			}
183
184			// Apply the block reflector H to A[i+1:ihi+1,i+ib:n] from the
185			// left.
186			impl.Dlarfb(blas.Left, blas.Trans, lapack.Forward, lapack.ColumnWise,
187				ihi-i, n-i-ib, ib,
188				a[(i+1)*lda+i:], lda, work[iwt:], ldt, a[(i+1)*lda+i+ib:], lda, work, ldwork)
189		}
190	}
191	// Use unblocked code to reduce the rest of the matrix.
192	impl.Dgehd2(n, i, ihi, a, lda, tau, work)
193	work[0] = float64(lwkopt)
194}
195