1// Copyright ©2017 The Gonum Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5package gonum
6
7import (
8	"math"
9
10	"gonum.org/v1/gonum/blas"
11	"gonum.org/v1/gonum/blas/blas64"
12	"gonum.org/v1/gonum/lapack"
13)
14
15// Dtgsja computes the generalized singular value decomposition (GSVD)
16// of two real upper triangular or trapezoidal matrices A and B.
17//
18// A and B have the following forms, which may be obtained by the
19// preprocessing subroutine Dggsvp from a general m×n matrix A and p×n
20// matrix B:
21//
22//            n-k-l  k    l
23//  A =    k [  0   A12  A13 ] if m-k-l >= 0;
24//         l [  0    0   A23 ]
25//     m-k-l [  0    0    0  ]
26//
27//            n-k-l  k    l
28//  A =    k [  0   A12  A13 ] if m-k-l < 0;
29//       m-k [  0    0   A23 ]
30//
31//            n-k-l  k    l
32//  B =    l [  0    0   B13 ]
33//       p-l [  0    0    0  ]
34//
35// where the k×k matrix A12 and l×l matrix B13 are non-singular
36// upper triangular. A23 is l×l upper triangular if m-k-l >= 0,
37// otherwise A23 is (m-k)×l upper trapezoidal.
38//
39// On exit,
40//
41//  Uᵀ*A*Q = D1*[ 0 R ], Vᵀ*B*Q = D2*[ 0 R ],
42//
43// where U, V and Q are orthogonal matrices.
44// R is a non-singular upper triangular matrix, and D1 and D2 are
45// diagonal matrices, which are of the following structures:
46//
47// If m-k-l >= 0,
48//
49//                    k  l
50//       D1 =     k [ I  0 ]
51//                l [ 0  C ]
52//            m-k-l [ 0  0 ]
53//
54//                  k  l
55//       D2 = l   [ 0  S ]
56//            p-l [ 0  0 ]
57//
58//               n-k-l  k    l
59//  [ 0 R ] = k [  0   R11  R12 ] k
60//            l [  0    0   R22 ] l
61//
62// where
63//
64//  C = diag( alpha_k, ... , alpha_{k+l} ),
65//  S = diag( beta_k,  ... , beta_{k+l} ),
66//  C^2 + S^2 = I.
67//
68// R is stored in
69//  A[0:k+l, n-k-l:n]
70// on exit.
71//
72// If m-k-l < 0,
73//
74//                 k m-k k+l-m
75//      D1 =   k [ I  0    0  ]
76//           m-k [ 0  C    0  ]
77//
78//                   k m-k k+l-m
79//      D2 =   m-k [ 0  S    0  ]
80//           k+l-m [ 0  0    I  ]
81//             p-l [ 0  0    0  ]
82//
83//                 n-k-l  k   m-k  k+l-m
84//  [ 0 R ] =    k [ 0    R11  R12  R13 ]
85//             m-k [ 0     0   R22  R23 ]
86//           k+l-m [ 0     0    0   R33 ]
87//
88// where
89//  C = diag( alpha_k, ... , alpha_m ),
90//  S = diag( beta_k,  ... , beta_m ),
91//  C^2 + S^2 = I.
92//
93//  R = [ R11 R12 R13 ] is stored in A[0:m, n-k-l:n]
94//      [  0  R22 R23 ]
95// and R33 is stored in
96//  B[m-k:l, n+m-k-l:n] on exit.
97//
98// The computation of the orthogonal transformation matrices U, V or Q
99// is optional. These matrices may either be formed explicitly, or they
100// may be post-multiplied into input matrices U1, V1, or Q1.
101//
102// Dtgsja essentially uses a variant of Kogbetliantz algorithm to reduce
103// min(l,m-k)×l triangular or trapezoidal matrix A23 and l×l
104// matrix B13 to the form:
105//
106//  U1ᵀ*A13*Q1 = C1*R1; V1ᵀ*B13*Q1 = S1*R1,
107//
108// where U1, V1 and Q1 are orthogonal matrices. C1 and S1 are diagonal
109// matrices satisfying
110//
111//  C1^2 + S1^2 = I,
112//
113// and R1 is an l×l non-singular upper triangular matrix.
114//
115// jobU, jobV and jobQ are options for computing the orthogonal matrices. The behavior
116// is as follows
117//  jobU == lapack.GSVDU        Compute orthogonal matrix U
118//  jobU == lapack.GSVDUnit     Use unit-initialized matrix
119//  jobU == lapack.GSVDNone     Do not compute orthogonal matrix.
120// The behavior is the same for jobV and jobQ with the exception that instead of
121// lapack.GSVDU these accept lapack.GSVDV and lapack.GSVDQ respectively.
122// The matrices U, V and Q must be m×m, p×p and n×n respectively unless the
123// relevant job parameter is lapack.GSVDNone.
124//
125// k and l specify the sub-blocks in the input matrices A and B:
126//  A23 = A[k:min(k+l,m), n-l:n) and B13 = B[0:l, n-l:n]
127// of A and B, whose GSVD is going to be computed by Dtgsja.
128//
129// tola and tolb are the convergence criteria for the Jacobi-Kogbetliantz
130// iteration procedure. Generally, they are the same as used in the preprocessing
131// step, for example,
132//  tola = max(m, n)*norm(A)*eps,
133//  tolb = max(p, n)*norm(B)*eps,
134// where eps is the machine epsilon.
135//
136// work must have length at least 2*n, otherwise Dtgsja will panic.
137//
138// alpha and beta must have length n or Dtgsja will panic. On exit, alpha and
139// beta contain the generalized singular value pairs of A and B
140//   alpha[0:k] = 1,
141//   beta[0:k]  = 0,
142// if m-k-l >= 0,
143//   alpha[k:k+l] = diag(C),
144//   beta[k:k+l]  = diag(S),
145// if m-k-l < 0,
146//   alpha[k:m]= C, alpha[m:k+l]= 0
147//   beta[k:m] = S, beta[m:k+l] = 1.
148// if k+l < n,
149//   alpha[k+l:n] = 0 and
150//   beta[k+l:n]  = 0.
151//
152// On exit, A[n-k:n, 0:min(k+l,m)] contains the triangular matrix R or part of R
153// and if necessary, B[m-k:l, n+m-k-l:n] contains a part of R.
154//
155// Dtgsja returns whether the routine converged and the number of iteration cycles
156// that were run.
157//
158// Dtgsja is an internal routine. It is exported for testing purposes.
159func (impl Implementation) Dtgsja(jobU, jobV, jobQ lapack.GSVDJob, m, p, n, k, l int, a []float64, lda int, b []float64, ldb int, tola, tolb float64, alpha, beta, u []float64, ldu int, v []float64, ldv int, q []float64, ldq int, work []float64) (cycles int, ok bool) {
160	const maxit = 40
161
162	initu := jobU == lapack.GSVDUnit
163	wantu := initu || jobU == lapack.GSVDU
164
165	initv := jobV == lapack.GSVDUnit
166	wantv := initv || jobV == lapack.GSVDV
167
168	initq := jobQ == lapack.GSVDUnit
169	wantq := initq || jobQ == lapack.GSVDQ
170
171	switch {
172	case !initu && !wantu && jobU != lapack.GSVDNone:
173		panic(badGSVDJob + "U")
174	case !initv && !wantv && jobV != lapack.GSVDNone:
175		panic(badGSVDJob + "V")
176	case !initq && !wantq && jobQ != lapack.GSVDNone:
177		panic(badGSVDJob + "Q")
178	case m < 0:
179		panic(mLT0)
180	case p < 0:
181		panic(pLT0)
182	case n < 0:
183		panic(nLT0)
184
185	case lda < max(1, n):
186		panic(badLdA)
187	case len(a) < (m-1)*lda+n:
188		panic(shortA)
189
190	case ldb < max(1, n):
191		panic(badLdB)
192	case len(b) < (p-1)*ldb+n:
193		panic(shortB)
194
195	case len(alpha) != n:
196		panic(badLenAlpha)
197	case len(beta) != n:
198		panic(badLenBeta)
199
200	case ldu < 1, wantu && ldu < m:
201		panic(badLdU)
202	case wantu && len(u) < (m-1)*ldu+m:
203		panic(shortU)
204
205	case ldv < 1, wantv && ldv < p:
206		panic(badLdV)
207	case wantv && len(v) < (p-1)*ldv+p:
208		panic(shortV)
209
210	case ldq < 1, wantq && ldq < n:
211		panic(badLdQ)
212	case wantq && len(q) < (n-1)*ldq+n:
213		panic(shortQ)
214
215	case len(work) < 2*n:
216		panic(shortWork)
217	}
218
219	// Initialize U, V and Q, if necessary
220	if initu {
221		impl.Dlaset(blas.All, m, m, 0, 1, u, ldu)
222	}
223	if initv {
224		impl.Dlaset(blas.All, p, p, 0, 1, v, ldv)
225	}
226	if initq {
227		impl.Dlaset(blas.All, n, n, 0, 1, q, ldq)
228	}
229
230	bi := blas64.Implementation()
231	minTol := math.Min(tola, tolb)
232
233	// Loop until convergence.
234	upper := false
235	for cycles = 1; cycles <= maxit; cycles++ {
236		upper = !upper
237
238		for i := 0; i < l-1; i++ {
239			for j := i + 1; j < l; j++ {
240				var a1, a2, a3 float64
241				if k+i < m {
242					a1 = a[(k+i)*lda+n-l+i]
243				}
244				if k+j < m {
245					a3 = a[(k+j)*lda+n-l+j]
246				}
247
248				b1 := b[i*ldb+n-l+i]
249				b3 := b[j*ldb+n-l+j]
250
251				var b2 float64
252				if upper {
253					if k+i < m {
254						a2 = a[(k+i)*lda+n-l+j]
255					}
256					b2 = b[i*ldb+n-l+j]
257				} else {
258					if k+j < m {
259						a2 = a[(k+j)*lda+n-l+i]
260					}
261					b2 = b[j*ldb+n-l+i]
262				}
263
264				csu, snu, csv, snv, csq, snq := impl.Dlags2(upper, a1, a2, a3, b1, b2, b3)
265
266				// Update (k+i)-th and (k+j)-th rows of matrix A: Uᵀ*A.
267				if k+j < m {
268					bi.Drot(l, a[(k+j)*lda+n-l:], 1, a[(k+i)*lda+n-l:], 1, csu, snu)
269				}
270
271				// Update i-th and j-th rows of matrix B: Vᵀ*B.
272				bi.Drot(l, b[j*ldb+n-l:], 1, b[i*ldb+n-l:], 1, csv, snv)
273
274				// Update (n-l+i)-th and (n-l+j)-th columns of matrices
275				// A and B: A*Q and B*Q.
276				bi.Drot(min(k+l, m), a[n-l+j:], lda, a[n-l+i:], lda, csq, snq)
277				bi.Drot(l, b[n-l+j:], ldb, b[n-l+i:], ldb, csq, snq)
278
279				if upper {
280					if k+i < m {
281						a[(k+i)*lda+n-l+j] = 0
282					}
283					b[i*ldb+n-l+j] = 0
284				} else {
285					if k+j < m {
286						a[(k+j)*lda+n-l+i] = 0
287					}
288					b[j*ldb+n-l+i] = 0
289				}
290
291				// Update orthogonal matrices U, V, Q, if desired.
292				if wantu && k+j < m {
293					bi.Drot(m, u[k+j:], ldu, u[k+i:], ldu, csu, snu)
294				}
295				if wantv {
296					bi.Drot(p, v[j:], ldv, v[i:], ldv, csv, snv)
297				}
298				if wantq {
299					bi.Drot(n, q[n-l+j:], ldq, q[n-l+i:], ldq, csq, snq)
300				}
301			}
302		}
303
304		if !upper {
305			// The matrices A13 and B13 were lower triangular at the start
306			// of the cycle, and are now upper triangular.
307			//
308			// Convergence test: test the parallelism of the corresponding
309			// rows of A and B.
310			var error float64
311			for i := 0; i < min(l, m-k); i++ {
312				bi.Dcopy(l-i, a[(k+i)*lda+n-l+i:], 1, work, 1)
313				bi.Dcopy(l-i, b[i*ldb+n-l+i:], 1, work[l:], 1)
314				ssmin := impl.Dlapll(l-i, work, 1, work[l:], 1)
315				error = math.Max(error, ssmin)
316			}
317			if math.Abs(error) <= minTol {
318				// The algorithm has converged.
319				// Compute the generalized singular value pairs (alpha, beta)
320				// and set the triangular matrix R to array A.
321				for i := 0; i < k; i++ {
322					alpha[i] = 1
323					beta[i] = 0
324				}
325
326				for i := 0; i < min(l, m-k); i++ {
327					a1 := a[(k+i)*lda+n-l+i]
328					b1 := b[i*ldb+n-l+i]
329
330					if a1 != 0 {
331						gamma := b1 / a1
332
333						// Change sign if necessary.
334						if gamma < 0 {
335							bi.Dscal(l-i, -1, b[i*ldb+n-l+i:], 1)
336							if wantv {
337								bi.Dscal(p, -1, v[i:], ldv)
338							}
339						}
340						beta[k+i], alpha[k+i], _ = impl.Dlartg(math.Abs(gamma), 1)
341
342						if alpha[k+i] >= beta[k+i] {
343							bi.Dscal(l-i, 1/alpha[k+i], a[(k+i)*lda+n-l+i:], 1)
344						} else {
345							bi.Dscal(l-i, 1/beta[k+i], b[i*ldb+n-l+i:], 1)
346							bi.Dcopy(l-i, b[i*ldb+n-l+i:], 1, a[(k+i)*lda+n-l+i:], 1)
347						}
348					} else {
349						alpha[k+i] = 0
350						beta[k+i] = 1
351						bi.Dcopy(l-i, b[i*ldb+n-l+i:], 1, a[(k+i)*lda+n-l+i:], 1)
352					}
353				}
354
355				for i := m; i < k+l; i++ {
356					alpha[i] = 0
357					beta[i] = 1
358				}
359				if k+l < n {
360					for i := k + l; i < n; i++ {
361						alpha[i] = 0
362						beta[i] = 0
363					}
364				}
365
366				return cycles, true
367			}
368		}
369	}
370
371	// The algorithm has not converged after maxit cycles.
372	return cycles, false
373}
374