1// Copyright ©2015 The Gonum Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5package gonum
6
7import (
8	"math"
9
10	"gonum.org/v1/gonum/blas"
11	"gonum.org/v1/gonum/blas/blas64"
12	"gonum.org/v1/gonum/lapack"
13)
14
15const noSVDO = "dgesvd: not coded for overwrite"
16
17// Dgesvd computes the singular value decomposition of the input matrix A.
18//
19// The singular value decomposition is
20//  A = U * Sigma * Vᵀ
21// where Sigma is an m×n diagonal matrix containing the singular values of A,
22// U is an m×m orthogonal matrix and V is an n×n orthogonal matrix. The first
23// min(m,n) columns of U and V are the left and right singular vectors of A
24// respectively.
25//
26// jobU and jobVT are options for computing the singular vectors. The behavior
27// is as follows
28//  jobU == lapack.SVDAll       All m columns of U are returned in u
29//  jobU == lapack.SVDStore     The first min(m,n) columns are returned in u
30//  jobU == lapack.SVDOverwrite The first min(m,n) columns of U are written into a
31//  jobU == lapack.SVDNone      The columns of U are not computed.
32// The behavior is the same for jobVT and the rows of Vᵀ. At most one of jobU
33// and jobVT can equal lapack.SVDOverwrite, and Dgesvd will panic otherwise.
34//
35// On entry, a contains the data for the m×n matrix A. During the call to Dgesvd
36// the data is overwritten. On exit, A contains the appropriate singular vectors
37// if either job is lapack.SVDOverwrite.
38//
39// s is a slice of length at least min(m,n) and on exit contains the singular
40// values in decreasing order.
41//
42// u contains the left singular vectors on exit, stored column-wise. If
43// jobU == lapack.SVDAll, u is of size m×m. If jobU == lapack.SVDStore u is
44// of size m×min(m,n). If jobU == lapack.SVDOverwrite or lapack.SVDNone, u is
45// not used.
46//
47// vt contains the left singular vectors on exit, stored row-wise. If
48// jobV == lapack.SVDAll, vt is of size n×n. If jobVT == lapack.SVDStore vt is
49// of size min(m,n)×n. If jobVT == lapack.SVDOverwrite or lapack.SVDNone, vt is
50// not used.
51//
52// work is a slice for storing temporary memory, and lwork is the usable size of
53// the slice. lwork must be at least max(5*min(m,n), 3*min(m,n)+max(m,n)).
54// If lwork == -1, instead of performing Dgesvd, the optimal work length will be
55// stored into work[0]. Dgesvd will panic if the working memory has insufficient
56// storage.
57//
58// Dgesvd returns whether the decomposition successfully completed.
59func (impl Implementation) Dgesvd(jobU, jobVT lapack.SVDJob, m, n int, a []float64, lda int, s, u []float64, ldu int, vt []float64, ldvt int, work []float64, lwork int) (ok bool) {
60	if jobU == lapack.SVDOverwrite || jobVT == lapack.SVDOverwrite {
61		panic(noSVDO)
62	}
63
64	wantua := jobU == lapack.SVDAll
65	wantus := jobU == lapack.SVDStore
66	wantuas := wantua || wantus
67	wantuo := jobU == lapack.SVDOverwrite
68	wantun := jobU == lapack.SVDNone
69	if !(wantua || wantus || wantuo || wantun) {
70		panic(badSVDJob)
71	}
72
73	wantva := jobVT == lapack.SVDAll
74	wantvs := jobVT == lapack.SVDStore
75	wantvas := wantva || wantvs
76	wantvo := jobVT == lapack.SVDOverwrite
77	wantvn := jobVT == lapack.SVDNone
78	if !(wantva || wantvs || wantvo || wantvn) {
79		panic(badSVDJob)
80	}
81
82	if wantuo && wantvo {
83		panic(bothSVDOver)
84	}
85
86	minmn := min(m, n)
87	minwork := 1
88	if minmn > 0 {
89		minwork = max(3*minmn+max(m, n), 5*minmn)
90	}
91	switch {
92	case m < 0:
93		panic(mLT0)
94	case n < 0:
95		panic(nLT0)
96	case lda < max(1, n):
97		panic(badLdA)
98	case ldu < 1, wantua && ldu < m, wantus && ldu < minmn:
99		panic(badLdU)
100	case ldvt < 1 || (wantvas && ldvt < n):
101		panic(badLdVT)
102	case lwork < minwork && lwork != -1:
103		panic(badLWork)
104	case len(work) < max(1, lwork):
105		panic(shortWork)
106	}
107
108	// Quick return if possible.
109	if minmn == 0 {
110		work[0] = 1
111		return true
112	}
113
114	// Compute optimal workspace size for subroutines.
115	opts := string(jobU) + string(jobVT)
116	mnthr := impl.Ilaenv(6, "DGESVD", opts, m, n, 0, 0)
117	maxwrk := 1
118	var wrkbl, bdspac int
119	if m >= n {
120		bdspac = 5 * n
121		impl.Dgeqrf(m, n, a, lda, nil, work, -1)
122		lwork_dgeqrf := int(work[0])
123
124		impl.Dorgqr(m, n, n, a, lda, nil, work, -1)
125		lwork_dorgqr_n := int(work[0])
126		impl.Dorgqr(m, m, n, a, lda, nil, work, -1)
127		lwork_dorgqr_m := int(work[0])
128
129		impl.Dgebrd(n, n, a, lda, s, nil, nil, nil, work, -1)
130		lwork_dgebrd := int(work[0])
131
132		impl.Dorgbr(lapack.GeneratePT, n, n, n, a, lda, nil, work, -1)
133		lwork_dorgbr_p := int(work[0])
134
135		impl.Dorgbr(lapack.GenerateQ, n, n, n, a, lda, nil, work, -1)
136		lwork_dorgbr_q := int(work[0])
137
138		if m >= mnthr {
139			if wantun {
140				// Path 1 (m much larger than n, jobU == None)
141				maxwrk = n + lwork_dgeqrf
142				maxwrk = max(maxwrk, 3*n+lwork_dgebrd)
143				if wantvo || wantvas {
144					maxwrk = max(maxwrk, 3*n+lwork_dorgbr_p)
145				}
146				maxwrk = max(maxwrk, bdspac)
147			} else if wantuo && wantvn {
148				// Path 2 (m much larger than n, jobU == Overwrite, jobVT == None)
149				wrkbl = n + lwork_dgeqrf
150				wrkbl = max(wrkbl, n+lwork_dorgqr_n)
151				wrkbl = max(wrkbl, 3*n+lwork_dgebrd)
152				wrkbl = max(wrkbl, 3*n+lwork_dorgbr_q)
153				wrkbl = max(wrkbl, bdspac)
154				maxwrk = max(n*n+wrkbl, n*n+m*n+n)
155			} else if wantuo && wantvas {
156				// Path 3 (m much larger than n, jobU == Overwrite, jobVT == Store or All)
157				wrkbl = n + lwork_dgeqrf
158				wrkbl = max(wrkbl, n+lwork_dorgqr_n)
159				wrkbl = max(wrkbl, 3*n+lwork_dgebrd)
160				wrkbl = max(wrkbl, 3*n+lwork_dorgbr_q)
161				wrkbl = max(wrkbl, 3*n+lwork_dorgbr_p)
162				wrkbl = max(wrkbl, bdspac)
163				maxwrk = max(n*n+wrkbl, n*n+m*n+n)
164			} else if wantus && wantvn {
165				// Path 4 (m much larger than n, jobU == Store, jobVT == None)
166				wrkbl = n + lwork_dgeqrf
167				wrkbl = max(wrkbl, n+lwork_dorgqr_n)
168				wrkbl = max(wrkbl, 3*n+lwork_dgebrd)
169				wrkbl = max(wrkbl, 3*n+lwork_dorgbr_q)
170				wrkbl = max(wrkbl, bdspac)
171				maxwrk = n*n + wrkbl
172			} else if wantus && wantvo {
173				// Path 5 (m much larger than n, jobU == Store, jobVT == Overwrite)
174				wrkbl = n + lwork_dgeqrf
175				wrkbl = max(wrkbl, n+lwork_dorgqr_n)
176				wrkbl = max(wrkbl, 3*n+lwork_dgebrd)
177				wrkbl = max(wrkbl, 3*n+lwork_dorgbr_q)
178				wrkbl = max(wrkbl, 3*n+lwork_dorgbr_p)
179				wrkbl = max(wrkbl, bdspac)
180				maxwrk = 2*n*n + wrkbl
181			} else if wantus && wantvas {
182				// Path 6 (m much larger than n, jobU == Store, jobVT == Store or All)
183				wrkbl = n + lwork_dgeqrf
184				wrkbl = max(wrkbl, n+lwork_dorgqr_n)
185				wrkbl = max(wrkbl, 3*n+lwork_dgebrd)
186				wrkbl = max(wrkbl, 3*n+lwork_dorgbr_q)
187				wrkbl = max(wrkbl, 3*n+lwork_dorgbr_p)
188				wrkbl = max(wrkbl, bdspac)
189				maxwrk = n*n + wrkbl
190			} else if wantua && wantvn {
191				// Path 7 (m much larger than n, jobU == All, jobVT == None)
192				wrkbl = n + lwork_dgeqrf
193				wrkbl = max(wrkbl, n+lwork_dorgqr_m)
194				wrkbl = max(wrkbl, 3*n+lwork_dgebrd)
195				wrkbl = max(wrkbl, 3*n+lwork_dorgbr_q)
196				wrkbl = max(wrkbl, bdspac)
197				maxwrk = n*n + wrkbl
198			} else if wantua && wantvo {
199				// Path 8 (m much larger than n, jobU == All, jobVT == Overwrite)
200				wrkbl = n + lwork_dgeqrf
201				wrkbl = max(wrkbl, n+lwork_dorgqr_m)
202				wrkbl = max(wrkbl, 3*n+lwork_dgebrd)
203				wrkbl = max(wrkbl, 3*n+lwork_dorgbr_q)
204				wrkbl = max(wrkbl, 3*n+lwork_dorgbr_p)
205				wrkbl = max(wrkbl, bdspac)
206				maxwrk = 2*n*n + wrkbl
207			} else if wantua && wantvas {
208				// Path 9 (m much larger than n, jobU == All, jobVT == Store or All)
209				wrkbl = n + lwork_dgeqrf
210				wrkbl = max(wrkbl, n+lwork_dorgqr_m)
211				wrkbl = max(wrkbl, 3*n+lwork_dgebrd)
212				wrkbl = max(wrkbl, 3*n+lwork_dorgbr_q)
213				wrkbl = max(wrkbl, 3*n+lwork_dorgbr_p)
214				wrkbl = max(wrkbl, bdspac)
215				maxwrk = n*n + wrkbl
216			}
217		} else {
218			// Path 10 (m at least n, but not much larger)
219			impl.Dgebrd(m, n, a, lda, s, nil, nil, nil, work, -1)
220			lwork_dgebrd := int(work[0])
221			maxwrk = 3*n + lwork_dgebrd
222			if wantus || wantuo {
223				impl.Dorgbr(lapack.GenerateQ, m, n, n, a, lda, nil, work, -1)
224				lwork_dorgbr_q = int(work[0])
225				maxwrk = max(maxwrk, 3*n+lwork_dorgbr_q)
226			}
227			if wantua {
228				impl.Dorgbr(lapack.GenerateQ, m, m, n, a, lda, nil, work, -1)
229				lwork_dorgbr_q := int(work[0])
230				maxwrk = max(maxwrk, 3*n+lwork_dorgbr_q)
231			}
232			if !wantvn {
233				maxwrk = max(maxwrk, 3*n+lwork_dorgbr_p)
234			}
235			maxwrk = max(maxwrk, bdspac)
236		}
237	} else {
238		bdspac = 5 * m
239
240		impl.Dgelqf(m, n, a, lda, nil, work, -1)
241		lwork_dgelqf := int(work[0])
242
243		impl.Dorglq(n, n, m, nil, n, nil, work, -1)
244		lwork_dorglq_n := int(work[0])
245		impl.Dorglq(m, n, m, a, lda, nil, work, -1)
246		lwork_dorglq_m := int(work[0])
247
248		impl.Dgebrd(m, m, a, lda, s, nil, nil, nil, work, -1)
249		lwork_dgebrd := int(work[0])
250
251		impl.Dorgbr(lapack.GeneratePT, m, m, m, a, n, nil, work, -1)
252		lwork_dorgbr_p := int(work[0])
253
254		impl.Dorgbr(lapack.GenerateQ, m, m, m, a, n, nil, work, -1)
255		lwork_dorgbr_q := int(work[0])
256
257		if n >= mnthr {
258			if wantvn {
259				// Path 1t (n much larger than m, jobVT == None)
260				maxwrk = m + lwork_dgelqf
261				maxwrk = max(maxwrk, 3*m+lwork_dgebrd)
262				if wantuo || wantuas {
263					maxwrk = max(maxwrk, 3*m+lwork_dorgbr_q)
264				}
265				maxwrk = max(maxwrk, bdspac)
266			} else if wantvo && wantun {
267				// Path 2t (n much larger than m, jobU == None, jobVT == Overwrite)
268				wrkbl = m + lwork_dgelqf
269				wrkbl = max(wrkbl, m+lwork_dorglq_m)
270				wrkbl = max(wrkbl, 3*m+lwork_dgebrd)
271				wrkbl = max(wrkbl, 3*m+lwork_dorgbr_p)
272				wrkbl = max(wrkbl, bdspac)
273				maxwrk = max(m*m+wrkbl, m*m+m*n+m)
274			} else if wantvo && wantuas {
275				// Path 3t (n much larger than m, jobU == Store or All, jobVT == Overwrite)
276				wrkbl = m + lwork_dgelqf
277				wrkbl = max(wrkbl, m+lwork_dorglq_m)
278				wrkbl = max(wrkbl, 3*m+lwork_dgebrd)
279				wrkbl = max(wrkbl, 3*m+lwork_dorgbr_p)
280				wrkbl = max(wrkbl, 3*m+lwork_dorgbr_q)
281				wrkbl = max(wrkbl, bdspac)
282				maxwrk = max(m*m+wrkbl, m*m+m*n+m)
283			} else if wantvs && wantun {
284				// Path 4t (n much larger than m, jobU == None, jobVT == Store)
285				wrkbl = m + lwork_dgelqf
286				wrkbl = max(wrkbl, m+lwork_dorglq_m)
287				wrkbl = max(wrkbl, 3*m+lwork_dgebrd)
288				wrkbl = max(wrkbl, 3*m+lwork_dorgbr_p)
289				wrkbl = max(wrkbl, bdspac)
290				maxwrk = m*m + wrkbl
291			} else if wantvs && wantuo {
292				// Path 5t (n much larger than m, jobU == Overwrite, jobVT == Store)
293				wrkbl = m + lwork_dgelqf
294				wrkbl = max(wrkbl, m+lwork_dorglq_m)
295				wrkbl = max(wrkbl, 3*m+lwork_dgebrd)
296				wrkbl = max(wrkbl, 3*m+lwork_dorgbr_p)
297				wrkbl = max(wrkbl, 3*m+lwork_dorgbr_q)
298				wrkbl = max(wrkbl, bdspac)
299				maxwrk = 2*m*m + wrkbl
300			} else if wantvs && wantuas {
301				// Path 6t (n much larger than m, jobU == Store or All, jobVT == Store)
302				wrkbl = m + lwork_dgelqf
303				wrkbl = max(wrkbl, m+lwork_dorglq_m)
304				wrkbl = max(wrkbl, 3*m+lwork_dgebrd)
305				wrkbl = max(wrkbl, 3*m+lwork_dorgbr_p)
306				wrkbl = max(wrkbl, 3*m+lwork_dorgbr_q)
307				wrkbl = max(wrkbl, bdspac)
308				maxwrk = m*m + wrkbl
309			} else if wantva && wantun {
310				// Path 7t (n much larger than m, jobU== None, jobVT == All)
311				wrkbl = m + lwork_dgelqf
312				wrkbl = max(wrkbl, m+lwork_dorglq_n)
313				wrkbl = max(wrkbl, 3*m+lwork_dgebrd)
314				wrkbl = max(wrkbl, 3*m+lwork_dorgbr_p)
315				wrkbl = max(wrkbl, bdspac)
316				maxwrk = m*m + wrkbl
317			} else if wantva && wantuo {
318				// Path 8t (n much larger than m, jobU == Overwrite, jobVT == All)
319				wrkbl = m + lwork_dgelqf
320				wrkbl = max(wrkbl, m+lwork_dorglq_n)
321				wrkbl = max(wrkbl, 3*m+lwork_dgebrd)
322				wrkbl = max(wrkbl, 3*m+lwork_dorgbr_p)
323				wrkbl = max(wrkbl, 3*m+lwork_dorgbr_q)
324				wrkbl = max(wrkbl, bdspac)
325				maxwrk = 2*m*m + wrkbl
326			} else if wantva && wantuas {
327				// Path 9t (n much larger than m, jobU == Store or All, jobVT == All)
328				wrkbl = m + lwork_dgelqf
329				wrkbl = max(wrkbl, m+lwork_dorglq_n)
330				wrkbl = max(wrkbl, 3*m+lwork_dgebrd)
331				wrkbl = max(wrkbl, 3*m+lwork_dorgbr_p)
332				wrkbl = max(wrkbl, 3*m+lwork_dorgbr_q)
333				wrkbl = max(wrkbl, bdspac)
334				maxwrk = m*m + wrkbl
335			}
336		} else {
337			// Path 10t (n greater than m, but not much larger)
338			impl.Dgebrd(m, n, a, lda, s, nil, nil, nil, work, -1)
339			lwork_dgebrd = int(work[0])
340			maxwrk = 3*m + lwork_dgebrd
341			if wantvs || wantvo {
342				impl.Dorgbr(lapack.GeneratePT, m, n, m, a, n, nil, work, -1)
343				lwork_dorgbr_p = int(work[0])
344				maxwrk = max(maxwrk, 3*m+lwork_dorgbr_p)
345			}
346			if wantva {
347				impl.Dorgbr(lapack.GeneratePT, n, n, m, a, n, nil, work, -1)
348				lwork_dorgbr_p = int(work[0])
349				maxwrk = max(maxwrk, 3*m+lwork_dorgbr_p)
350			}
351			if !wantun {
352				maxwrk = max(maxwrk, 3*m+lwork_dorgbr_q)
353			}
354			maxwrk = max(maxwrk, bdspac)
355		}
356	}
357
358	maxwrk = max(maxwrk, minwork)
359	if lwork == -1 {
360		work[0] = float64(maxwrk)
361		return true
362	}
363
364	if len(a) < (m-1)*lda+n {
365		panic(shortA)
366	}
367	if len(s) < minmn {
368		panic(shortS)
369	}
370	if (len(u) < (m-1)*ldu+m && wantua) || (len(u) < (m-1)*ldu+minmn && wantus) {
371		panic(shortU)
372	}
373	if (len(vt) < (n-1)*ldvt+n && wantva) || (len(vt) < (minmn-1)*ldvt+n && wantvs) {
374		panic(shortVT)
375	}
376
377	// Perform decomposition.
378	eps := dlamchE
379	smlnum := math.Sqrt(dlamchS) / eps
380	bignum := 1 / smlnum
381
382	// Scale A if max element outside range [smlnum, bignum].
383	anrm := impl.Dlange(lapack.MaxAbs, m, n, a, lda, nil)
384	var iscl bool
385	if anrm > 0 && anrm < smlnum {
386		iscl = true
387		impl.Dlascl(lapack.General, 0, 0, anrm, smlnum, m, n, a, lda)
388	} else if anrm > bignum {
389		iscl = true
390		impl.Dlascl(lapack.General, 0, 0, anrm, bignum, m, n, a, lda)
391	}
392
393	bi := blas64.Implementation()
394	var ie int
395	if m >= n {
396		// If A has sufficiently more rows than columns, use the QR decomposition.
397		if m >= mnthr {
398			// m >> n
399			if wantun {
400				// Path 1.
401				itau := 0
402				iwork := itau + n
403
404				// Compute A = Q * R.
405				impl.Dgeqrf(m, n, a, lda, work[itau:], work[iwork:], lwork-iwork)
406
407				// Zero out below R.
408				impl.Dlaset(blas.Lower, n-1, n-1, 0, 0, a[lda:], lda)
409				ie = 0
410				itauq := ie + n
411				itaup := itauq + n
412				iwork = itaup + n
413				// Bidiagonalize R in A.
414				impl.Dgebrd(n, n, a, lda, s, work[ie:], work[itauq:],
415					work[itaup:], work[iwork:], lwork-iwork)
416				ncvt := 0
417				if wantvo || wantvas {
418					impl.Dorgbr(lapack.GeneratePT, n, n, n, a, lda, work[itaup:],
419						work[iwork:], lwork-iwork)
420					ncvt = n
421				}
422				iwork = ie + n
423
424				// Perform bidiagonal QR iteration computing right singular vectors
425				// of A in A if desired.
426				ok = impl.Dbdsqr(blas.Upper, n, ncvt, 0, 0, s, work[ie:],
427					a, lda, work, 1, work, 1, work[iwork:])
428
429				// If right singular vectors desired in VT, copy them there.
430				if wantvas {
431					impl.Dlacpy(blas.All, n, n, a, lda, vt, ldvt)
432				}
433			} else if wantuo && wantvn {
434				// Path 2
435				panic(noSVDO)
436			} else if wantuo && wantvas {
437				// Path 3
438				panic(noSVDO)
439			} else if wantus {
440				if wantvn {
441					// Path 4
442					if lwork >= n*n+max(4*n, bdspac) {
443						// Sufficient workspace for a fast algorithm.
444						ir := 0
445						var ldworkr int
446						if lwork >= wrkbl+lda*n {
447							ldworkr = lda
448						} else {
449							ldworkr = n
450						}
451						itau := ir + ldworkr*n
452						iwork := itau + n
453						// Compute A = Q * R.
454						impl.Dgeqrf(m, n, a, lda, work[itau:], work[iwork:], lwork-iwork)
455
456						// Copy R to work[ir:], zeroing out below it.
457						impl.Dlacpy(blas.Upper, n, n, a, lda, work[ir:], ldworkr)
458						impl.Dlaset(blas.Lower, n-1, n-1, 0, 0, work[ir+ldworkr:], ldworkr)
459
460						// Generate Q in A.
461						impl.Dorgqr(m, n, n, a, lda, work[itau:], work[iwork:], lwork-iwork)
462						ie := itau
463						itauq := ie + n
464						itaup := itauq + n
465						iwork = itaup + n
466
467						// Bidiagonalize R in work[ir:].
468						impl.Dgebrd(n, n, work[ir:], ldworkr, s, work[ie:],
469							work[itauq:], work[itaup:], work[iwork:], lwork-iwork)
470
471						// Generate left vectors bidiagonalizing R in work[ir:].
472						impl.Dorgbr(lapack.GenerateQ, n, n, n, work[ir:], ldworkr,
473							work[itauq:], work[iwork:], lwork-iwork)
474						iwork = ie + n
475
476						// Perform bidiagonal QR iteration, compuing left singular
477						// vectors of R in work[ir:].
478						ok = impl.Dbdsqr(blas.Upper, n, 0, n, 0, s, work[ie:], work, 1,
479							work[ir:], ldworkr, work, 1, work[iwork:])
480
481						// Multiply Q in A by left singular vectors of R in
482						// work[ir:], storing result in U.
483						bi.Dgemm(blas.NoTrans, blas.NoTrans, m, n, n, 1, a, lda,
484							work[ir:], ldworkr, 0, u, ldu)
485					} else {
486						// Insufficient workspace for a fast algorithm.
487						itau := 0
488						iwork := itau + n
489
490						// Compute A = Q*R, copying result to U.
491						impl.Dgeqrf(m, n, a, lda, work[itau:], work[iwork:], lwork-iwork)
492						impl.Dlacpy(blas.Lower, m, n, a, lda, u, ldu)
493
494						// Generate Q in U.
495						impl.Dorgqr(m, n, n, u, ldu, work[itau:], work[iwork:], lwork-iwork)
496						ie := itau
497						itauq := ie + n
498						itaup := itauq + n
499						iwork = itaup + n
500
501						// Zero out below R in A.
502						impl.Dlaset(blas.Lower, n-1, n-1, 0, 0, a[lda:], lda)
503
504						// Bidiagonalize R in A.
505						impl.Dgebrd(n, n, a, lda, s, work[ie:],
506							work[itauq:], work[itaup:], work[iwork:], lwork-iwork)
507
508						// Multiply Q in U by left vectors bidiagonalizing R.
509						impl.Dormbr(lapack.ApplyQ, blas.Right, blas.NoTrans, m, n, n,
510							a, lda, work[itauq:], u, ldu, work[iwork:], lwork-iwork)
511						iwork = ie + n
512
513						// Perform bidiagonal QR iteration, computing left
514						// singular vectors of A in U.
515						ok = impl.Dbdsqr(blas.Upper, n, 0, m, 0, s, work[ie:], work, 1,
516							u, ldu, work, 1, work[iwork:])
517					}
518				} else if wantvo {
519					// Path 5
520					panic(noSVDO)
521				} else if wantvas {
522					// Path 6
523					if lwork >= n*n+max(4*n, bdspac) {
524						// Sufficient workspace for a fast algorithm.
525						iu := 0
526						var ldworku int
527						if lwork >= wrkbl+lda*n {
528							ldworku = lda
529						} else {
530							ldworku = n
531						}
532						itau := iu + ldworku*n
533						iwork := itau + n
534
535						// Compute A = Q * R.
536						impl.Dgeqrf(m, n, a, lda, work[itau:], work[iwork:], lwork-iwork)
537						// Copy R to work[iu:], zeroing out below it.
538						impl.Dlacpy(blas.Upper, n, n, a, lda, work[iu:], ldworku)
539						impl.Dlaset(blas.Lower, n-1, n-1, 0, 0, work[iu+ldworku:], ldworku)
540
541						// Generate Q in A.
542						impl.Dorgqr(m, n, n, a, lda, work[itau:], work[iwork:], lwork-iwork)
543
544						ie := itau
545						itauq := ie + n
546						itaup := itauq + n
547						iwork = itaup + n
548
549						// Bidiagonalize R in work[iu:], copying result to VT.
550						impl.Dgebrd(n, n, work[iu:], ldworku, s, work[ie:],
551							work[itauq:], work[itaup:], work[iwork:], lwork-iwork)
552						impl.Dlacpy(blas.Upper, n, n, work[iu:], ldworku, vt, ldvt)
553
554						// Generate left bidiagonalizing vectors in work[iu:].
555						impl.Dorgbr(lapack.GenerateQ, n, n, n, work[iu:], ldworku,
556							work[itauq:], work[iwork:], lwork-iwork)
557
558						// Generate right bidiagonalizing vectors in VT.
559						impl.Dorgbr(lapack.GeneratePT, n, n, n, vt, ldvt,
560							work[itaup:], work[iwork:], lwork-iwork)
561						iwork = ie + n
562
563						// Perform bidiagonal QR iteration, computing left singular
564						// vectors of R in work[iu:], and computing right singular
565						// vectors of R in VT.
566						ok = impl.Dbdsqr(blas.Upper, n, n, n, 0, s, work[ie:],
567							vt, ldvt, work[iu:], ldworku, work, 1, work[iwork:])
568
569						// Multiply Q in A by left singular vectors of R in
570						// work[iu:], storing result in U.
571						bi.Dgemm(blas.NoTrans, blas.NoTrans, m, n, n, 1, a, lda,
572							work[iu:], ldworku, 0, u, ldu)
573					} else {
574						// Insufficient workspace for a fast algorithm.
575						itau := 0
576						iwork := itau + n
577
578						// Compute A = Q * R, copying result to U.
579						impl.Dgeqrf(m, n, a, lda, work[itau:], work[iwork:], lwork-iwork)
580						impl.Dlacpy(blas.Lower, m, n, a, lda, u, ldu)
581
582						// Generate Q in U.
583						impl.Dorgqr(m, n, n, u, ldu, work[itau:], work[iwork:], lwork-iwork)
584
585						// Copy R to VT, zeroing out below it.
586						impl.Dlacpy(blas.Upper, n, n, a, lda, vt, ldvt)
587						impl.Dlaset(blas.Lower, n-1, n-1, 0, 0, vt[ldvt:], ldvt)
588
589						ie := itau
590						itauq := ie + n
591						itaup := itauq + n
592						iwork = itaup + n
593
594						// Bidiagonalize R in VT.
595						impl.Dgebrd(n, n, vt, ldvt, s, work[ie:],
596							work[itauq:], work[itaup:], work[iwork:], lwork-iwork)
597
598						// Multiply Q in U by left bidiagonalizing vectors in VT.
599						impl.Dormbr(lapack.ApplyQ, blas.Right, blas.NoTrans, m, n, n,
600							vt, ldvt, work[itauq:], u, ldu, work[iwork:], lwork-iwork)
601
602						// Generate right bidiagonalizing vectors in VT.
603						impl.Dorgbr(lapack.GeneratePT, n, n, n, vt, ldvt,
604							work[itaup:], work[iwork:], lwork-iwork)
605						iwork = ie + n
606
607						// Perform bidiagonal QR iteration, computing left singular
608						// vectors of A in U and computing right singular vectors
609						// of A in VT.
610						ok = impl.Dbdsqr(blas.Upper, n, n, m, 0, s, work[ie:],
611							vt, ldvt, u, ldu, work, 1, work[iwork:])
612					}
613				}
614			} else if wantua {
615				if wantvn {
616					// Path 7
617					if lwork >= n*n+max(max(n+m, 4*n), bdspac) {
618						// Sufficient workspace for a fast algorithm.
619						ir := 0
620						var ldworkr int
621						if lwork >= wrkbl+lda*n {
622							ldworkr = lda
623						} else {
624							ldworkr = n
625						}
626						itau := ir + ldworkr*n
627						iwork := itau + n
628
629						// Compute A = Q*R, copying result to U.
630						impl.Dgeqrf(m, n, a, lda, work[itau:], work[iwork:], lwork-iwork)
631						impl.Dlacpy(blas.Lower, m, n, a, lda, u, ldu)
632
633						// Copy R to work[ir:], zeroing out below it.
634						impl.Dlacpy(blas.Upper, n, n, a, lda, work[ir:], ldworkr)
635						impl.Dlaset(blas.Lower, n-1, n-1, 0, 0, work[ir+ldworkr:], ldworkr)
636
637						// Generate Q in U.
638						impl.Dorgqr(m, m, n, u, ldu, work[itau:], work[iwork:], lwork-iwork)
639						ie := itau
640						itauq := ie + n
641						itaup := itauq + n
642						iwork = itaup + n
643
644						// Bidiagonalize R in work[ir:].
645						impl.Dgebrd(n, n, work[ir:], ldworkr, s, work[ie:],
646							work[itauq:], work[itaup:], work[iwork:], lwork-iwork)
647
648						// Generate left bidiagonalizing vectors in work[ir:].
649						impl.Dorgbr(lapack.GenerateQ, n, n, n, work[ir:], ldworkr,
650							work[itauq:], work[iwork:], lwork-iwork)
651						iwork = ie + n
652
653						// Perform bidiagonal QR iteration, computing left singular
654						// vectors of R in work[ir:].
655						ok = impl.Dbdsqr(blas.Upper, n, 0, n, 0, s, work[ie:], work, 1,
656							work[ir:], ldworkr, work, 1, work[iwork:])
657
658						// Multiply Q in U by left singular vectors of R in
659						// work[ir:], storing result in A.
660						bi.Dgemm(blas.NoTrans, blas.NoTrans, m, n, n, 1, u, ldu,
661							work[ir:], ldworkr, 0, a, lda)
662
663						// Copy left singular vectors of A from A to U.
664						impl.Dlacpy(blas.All, m, n, a, lda, u, ldu)
665					} else {
666						// Insufficient workspace for a fast algorithm.
667						itau := 0
668						iwork := itau + n
669
670						// Compute A = Q*R, copying result to U.
671						impl.Dgeqrf(m, n, a, lda, work[itau:], work[iwork:], lwork-iwork)
672						impl.Dlacpy(blas.Lower, m, n, a, lda, u, ldu)
673
674						// Generate Q in U.
675						impl.Dorgqr(m, m, n, u, ldu, work[itau:], work[iwork:], lwork-iwork)
676						ie := itau
677						itauq := ie + n
678						itaup := itauq + n
679						iwork = itaup + n
680
681						// Zero out below R in A.
682						impl.Dlaset(blas.Lower, n-1, n-1, 0, 0, a[lda:], lda)
683
684						// Bidiagonalize R in A.
685						impl.Dgebrd(n, n, a, lda, s, work[ie:],
686							work[itauq:], work[itaup:], work[iwork:], lwork-iwork)
687
688						// Multiply Q in U by left bidiagonalizing vectors in A.
689						impl.Dormbr(lapack.ApplyQ, blas.Right, blas.NoTrans, m, n, n,
690							a, lda, work[itauq:], u, ldu, work[iwork:], lwork-iwork)
691						iwork = ie + n
692
693						// Perform bidiagonal QR iteration, computing left
694						// singular vectors of A in U.
695						ok = impl.Dbdsqr(blas.Upper, n, 0, m, 0, s, work[ie:],
696							work, 1, u, ldu, work, 1, work[iwork:])
697					}
698				} else if wantvo {
699					// Path 8.
700					panic(noSVDO)
701				} else if wantvas {
702					// Path 9.
703					if lwork >= n*n+max(max(n+m, 4*n), bdspac) {
704						// Sufficient workspace for a fast algorithm.
705						iu := 0
706						var ldworku int
707						if lwork >= wrkbl+lda*n {
708							ldworku = lda
709						} else {
710							ldworku = n
711						}
712						itau := iu + ldworku*n
713						iwork := itau + n
714
715						// Compute A = Q * R, copying result to U.
716						impl.Dgeqrf(m, n, a, lda, work[itau:], work[iwork:], lwork-iwork)
717						impl.Dlacpy(blas.Lower, m, n, a, lda, u, ldu)
718
719						// Generate Q in U.
720						impl.Dorgqr(m, m, n, u, ldu, work[itau:], work[iwork:], lwork-iwork)
721
722						// Copy R to work[iu:], zeroing out below it.
723						impl.Dlacpy(blas.Upper, n, n, a, lda, work[iu:], ldworku)
724						impl.Dlaset(blas.Lower, n-1, n-1, 0, 0, work[iu+ldworku:], ldworku)
725
726						ie = itau
727						itauq := ie + n
728						itaup := itauq + n
729						iwork = itaup + n
730
731						// Bidiagonalize R in work[iu:], copying result to VT.
732						impl.Dgebrd(n, n, work[iu:], ldworku, s, work[ie:],
733							work[itauq:], work[itaup:], work[iwork:], lwork-iwork)
734						impl.Dlacpy(blas.Upper, n, n, work[iu:], ldworku, vt, ldvt)
735
736						// Generate left bidiagonalizing vectors in work[iu:].
737						impl.Dorgbr(lapack.GenerateQ, n, n, n, work[iu:], ldworku,
738							work[itauq:], work[iwork:], lwork-iwork)
739
740						// Generate right bidiagonalizing vectors in VT.
741						impl.Dorgbr(lapack.GeneratePT, n, n, n, vt, ldvt,
742							work[itaup:], work[iwork:], lwork-iwork)
743						iwork = ie + n
744
745						// Perform bidiagonal QR iteration, computing left singular
746						// vectors of R in work[iu:] and computing right
747						// singular vectors of R in VT.
748						ok = impl.Dbdsqr(blas.Upper, n, n, n, 0, s, work[ie:],
749							vt, ldvt, work[iu:], ldworku, work, 1, work[iwork:])
750
751						// Multiply Q in U by left singular vectors of R in
752						// work[iu:], storing result in A.
753						bi.Dgemm(blas.NoTrans, blas.NoTrans, m, n, n, 1,
754							u, ldu, work[iu:], ldworku, 0, a, lda)
755
756						// Copy left singular vectors of A from A to U.
757						impl.Dlacpy(blas.All, m, n, a, lda, u, ldu)
758
759						/*
760							// Bidiagonalize R in VT.
761							impl.Dgebrd(n, n, vt, ldvt, s, work[ie:],
762								work[itauq:], work[itaup:], work[iwork:], lwork-iwork)
763
764							// Multiply Q in U by left bidiagonalizing vectors in VT.
765							impl.Dormbr(lapack.ApplyQ, blas.Right, blas.NoTrans,
766								m, n, n, vt, ldvt, work[itauq:], u, ldu, work[iwork:], lwork-iwork)
767
768							// Generate right bidiagonalizing vectors in VT.
769							impl.Dorgbr(lapack.GeneratePT, n, n, n, vt, ldvt,
770								work[itaup:], work[iwork:], lwork-iwork)
771							iwork = ie + n
772
773							// Perform bidiagonal QR iteration, computing left singular
774							// vectors of A in U and computing right singular vectors
775							// of A in VT.
776							ok = impl.Dbdsqr(blas.Upper, n, n, m, 0, s, work[ie:],
777								vt, ldvt, u, ldu, work, 1, work[iwork:])
778						*/
779					} else {
780						// Insufficient workspace for a fast algorithm.
781						itau := 0
782						iwork := itau + n
783
784						// Compute A = Q*R, copying result to U.
785						impl.Dgeqrf(m, n, a, lda, work[itau:], work[iwork:], lwork-iwork)
786						impl.Dlacpy(blas.Lower, m, n, a, lda, u, ldu)
787
788						// Generate Q in U.
789						impl.Dorgqr(m, m, n, u, ldu, work[itau:], work[iwork:], lwork-iwork)
790
791						// Copy R from A to VT, zeroing out below it.
792						impl.Dlacpy(blas.Upper, n, n, a, lda, vt, ldvt)
793						if n > 1 {
794							impl.Dlaset(blas.Lower, n-1, n-1, 0, 0, vt[ldvt:], ldvt)
795						}
796
797						ie := itau
798						itauq := ie + n
799						itaup := itauq + n
800						iwork = itaup + n
801
802						// Bidiagonalize R in VT.
803						impl.Dgebrd(n, n, vt, ldvt, s, work[ie:],
804							work[itauq:], work[itaup:], work[iwork:], lwork-iwork)
805
806						// Multiply Q in U by left bidiagonalizing vectors in VT.
807						impl.Dormbr(lapack.ApplyQ, blas.Right, blas.NoTrans,
808							m, n, n, vt, ldvt, work[itauq:], u, ldu, work[iwork:], lwork-iwork)
809
810						// Generate right bidiagonizing vectors in VT.
811						impl.Dorgbr(lapack.GeneratePT, n, n, n, vt, ldvt,
812							work[itaup:], work[iwork:], lwork-iwork)
813						iwork = ie + n
814
815						// Perform bidiagonal QR iteration, computing left singular
816						// vectors of A in U and computing right singular vectors
817						// of A in VT.
818						ok = impl.Dbdsqr(blas.Upper, n, n, m, 0, s, work[ie:],
819							vt, ldvt, u, ldu, work, 1, work[iwork:])
820					}
821				}
822			}
823		} else {
824			// Path 10.
825			// M at least N, but not much larger.
826			ie = 0
827			itauq := ie + n
828			itaup := itauq + n
829			iwork := itaup + n
830
831			// Bidiagonalize A.
832			impl.Dgebrd(m, n, a, lda, s, work[ie:], work[itauq:],
833				work[itaup:], work[iwork:], lwork-iwork)
834			if wantuas {
835				// Left singular vectors are desired in U. Copy result to U and
836				// generate left biadiagonalizing vectors in U.
837				impl.Dlacpy(blas.Lower, m, n, a, lda, u, ldu)
838				var ncu int
839				if wantus {
840					ncu = n
841				}
842				if wantua {
843					ncu = m
844				}
845				impl.Dorgbr(lapack.GenerateQ, m, ncu, n, u, ldu, work[itauq:], work[iwork:], lwork-iwork)
846			}
847			if wantvas {
848				// Right singular vectors are desired in VT. Copy result to VT and
849				// generate left biadiagonalizing vectors in VT.
850				impl.Dlacpy(blas.Upper, n, n, a, lda, vt, ldvt)
851				impl.Dorgbr(lapack.GeneratePT, n, n, n, vt, ldvt, work[itaup:], work[iwork:], lwork-iwork)
852			}
853			if wantuo {
854				panic(noSVDO)
855			}
856			if wantvo {
857				panic(noSVDO)
858			}
859			iwork = ie + n
860			var nru, ncvt int
861			if wantuas || wantuo {
862				nru = m
863			}
864			if wantun {
865				nru = 0
866			}
867			if wantvas || wantvo {
868				ncvt = n
869			}
870			if wantvn {
871				ncvt = 0
872			}
873			if !wantuo && !wantvo {
874				// Perform bidiagonal QR iteration, if desired, computing left
875				// singular vectors in U and right singular vectors in VT.
876				ok = impl.Dbdsqr(blas.Upper, n, ncvt, nru, 0, s, work[ie:],
877					vt, ldvt, u, ldu, work, 1, work[iwork:])
878			} else {
879				// There will be two branches when the implementation is complete.
880				panic(noSVDO)
881			}
882		}
883	} else {
884		// A has more columns than rows. If A has sufficiently more columns than
885		// rows, first reduce using the LQ decomposition.
886		if n >= mnthr {
887			// n >> m.
888			if wantvn {
889				// Path 1t.
890				itau := 0
891				iwork := itau + m
892
893				// Compute A = L*Q.
894				impl.Dgelqf(m, n, a, lda, work[itau:], work[iwork:], lwork-iwork)
895
896				// Zero out above L.
897				impl.Dlaset(blas.Upper, m-1, m-1, 0, 0, a[1:], lda)
898				ie := 0
899				itauq := ie + m
900				itaup := itauq + m
901				iwork = itaup + m
902
903				// Bidiagonalize L in A.
904				impl.Dgebrd(m, m, a, lda, s, work[ie:itauq],
905					work[itauq:itaup], work[itaup:iwork], work[iwork:], lwork-iwork)
906				if wantuo || wantuas {
907					impl.Dorgbr(lapack.GenerateQ, m, m, m, a, lda,
908						work[itauq:], work[iwork:], lwork-iwork)
909				}
910				iwork = ie + m
911				nru := 0
912				if wantuo || wantuas {
913					nru = m
914				}
915
916				// Perform bidiagonal QR iteration, computing left singular vectors
917				// of A in A if desired.
918				ok = impl.Dbdsqr(blas.Upper, m, 0, nru, 0, s, work[ie:],
919					work, 1, a, lda, work, 1, work[iwork:])
920
921				// If left singular vectors desired in U, copy them there.
922				if wantuas {
923					impl.Dlacpy(blas.All, m, m, a, lda, u, ldu)
924				}
925			} else if wantvo && wantun {
926				// Path 2t.
927				panic(noSVDO)
928			} else if wantvo && wantuas {
929				// Path 3t.
930				panic(noSVDO)
931			} else if wantvs {
932				if wantun {
933					// Path 4t.
934					if lwork >= m*m+max(4*m, bdspac) {
935						// Sufficient workspace for a fast algorithm.
936						ir := 0
937						var ldworkr int
938						if lwork >= wrkbl+lda*m {
939							ldworkr = lda
940						} else {
941							ldworkr = m
942						}
943						itau := ir + ldworkr*m
944						iwork := itau + m
945
946						// Compute A = L*Q.
947						impl.Dgelqf(m, n, a, lda, work[itau:], work[iwork:], lwork-iwork)
948
949						// Copy L to work[ir:], zeroing out above it.
950						impl.Dlacpy(blas.Lower, m, m, a, lda, work[ir:], ldworkr)
951						impl.Dlaset(blas.Upper, m-1, m-1, 0, 0, work[ir+1:], ldworkr)
952
953						// Generate Q in A.
954						impl.Dorglq(m, n, m, a, lda, work[itau:], work[iwork:], lwork-iwork)
955						ie := itau
956						itauq := ie + m
957						itaup := itauq + m
958						iwork = itaup + m
959
960						// Bidiagonalize L in work[ir:].
961						impl.Dgebrd(m, m, work[ir:], ldworkr, s, work[ie:],
962							work[itauq:], work[itaup:], work[iwork:], lwork-iwork)
963
964						// Generate right vectors bidiagonalizing L in work[ir:].
965						impl.Dorgbr(lapack.GeneratePT, m, m, m, work[ir:], ldworkr,
966							work[itaup:], work[iwork:], lwork-iwork)
967						iwork = ie + m
968
969						// Perform bidiagonal QR iteration, computing right singular
970						// vectors of L in work[ir:].
971						ok = impl.Dbdsqr(blas.Upper, m, m, 0, 0, s, work[ie:],
972							work[ir:], ldworkr, work, 1, work, 1, work[iwork:])
973
974						// Multiply right singular vectors of L in work[ir:] by
975						// Q in A, storing result in VT.
976						bi.Dgemm(blas.NoTrans, blas.NoTrans, m, n, m, 1,
977							work[ir:], ldworkr, a, lda, 0, vt, ldvt)
978					} else {
979						// Insufficient workspace for a fast algorithm.
980						itau := 0
981						iwork := itau + m
982
983						// Compute A = L*Q.
984						impl.Dgelqf(m, n, a, lda, work[itau:], work[iwork:], lwork-iwork)
985
986						// Copy result to VT.
987						impl.Dlacpy(blas.Upper, m, n, a, lda, vt, ldvt)
988
989						// Generate Q in VT.
990						impl.Dorglq(m, n, m, vt, ldvt, work[itau:], work[iwork:], lwork-iwork)
991						ie := itau
992						itauq := ie + m
993						itaup := itauq + m
994						iwork = itaup + m
995
996						// Zero out above L in A.
997						impl.Dlaset(blas.Upper, m-1, m-1, 0, 0, a[1:], lda)
998
999						// Bidiagonalize L in A.
1000						impl.Dgebrd(m, m, a, lda, s, work[ie:],
1001							work[itauq:], work[itaup:], work[iwork:], lwork-iwork)
1002
1003						// Multiply right vectors bidiagonalizing L by Q in VT.
1004						impl.Dormbr(lapack.ApplyP, blas.Left, blas.Trans, m, n, m,
1005							a, lda, work[itaup:], vt, ldvt, work[iwork:], lwork-iwork)
1006						iwork = ie + m
1007
1008						// Perform bidiagonal QR iteration, computing right
1009						// singular vectors of A in VT.
1010						ok = impl.Dbdsqr(blas.Upper, m, n, 0, 0, s, work[ie:],
1011							vt, ldvt, work, 1, work, 1, work[iwork:])
1012					}
1013				} else if wantuo {
1014					// Path 5t.
1015					panic(noSVDO)
1016				} else if wantuas {
1017					// Path 6t.
1018					if lwork >= m*m+max(4*m, bdspac) {
1019						// Sufficient workspace for a fast algorithm.
1020						iu := 0
1021						var ldworku int
1022						if lwork >= wrkbl+lda*m {
1023							ldworku = lda
1024						} else {
1025							ldworku = m
1026						}
1027						itau := iu + ldworku*m
1028						iwork := itau + m
1029
1030						// Compute A = L*Q.
1031						impl.Dgelqf(m, n, a, lda, work[itau:], work[iwork:], lwork-iwork)
1032
1033						// Copy L to work[iu:], zeroing out above it.
1034						impl.Dlacpy(blas.Lower, m, m, a, lda, work[iu:], ldworku)
1035						impl.Dlaset(blas.Upper, m-1, m-1, 0, 0, work[iu+1:], ldworku)
1036
1037						// Generate Q in A.
1038						impl.Dorglq(m, n, m, a, lda, work[itau:], work[iwork:], lwork-iwork)
1039						ie := itau
1040						itauq := ie + m
1041						itaup := itauq + m
1042						iwork = itaup + m
1043
1044						// Bidiagonalize L in work[iu:], copying result to U.
1045						impl.Dgebrd(m, m, work[iu:], ldworku, s, work[ie:],
1046							work[itauq:], work[itaup:], work[iwork:], lwork-iwork)
1047						impl.Dlacpy(blas.Lower, m, m, work[iu:], ldworku, u, ldu)
1048
1049						// Generate right bidiagionalizing vectors in work[iu:].
1050						impl.Dorgbr(lapack.GeneratePT, m, m, m, work[iu:], ldworku,
1051							work[itaup:], work[iwork:], lwork-iwork)
1052
1053						// Generate left bidiagonalizing vectors in U.
1054						impl.Dorgbr(lapack.GenerateQ, m, m, m, u, ldu, work[itauq:], work[iwork:], lwork-iwork)
1055						iwork = ie + m
1056
1057						// Perform bidiagonal QR iteration, computing left singular
1058						// vectors of L in U and computing right singular vectors of
1059						// L in work[iu:].
1060						ok = impl.Dbdsqr(blas.Upper, m, m, m, 0, s, work[ie:],
1061							work[iu:], ldworku, u, ldu, work, 1, work[iwork:])
1062
1063						// Multiply right singular vectors of L in work[iu:] by
1064						// Q in A, storing result in VT.
1065						bi.Dgemm(blas.NoTrans, blas.NoTrans, m, n, m, 1,
1066							work[iu:], ldworku, a, lda, 0, vt, ldvt)
1067					} else {
1068						// Insufficient workspace for a fast algorithm.
1069						itau := 0
1070						iwork := itau + m
1071
1072						// Compute A = L*Q, copying result to VT.
1073						impl.Dgelqf(m, n, a, lda, work[itau:], work[iwork:], lwork-iwork)
1074						impl.Dlacpy(blas.Upper, m, n, a, lda, vt, ldvt)
1075
1076						// Generate Q in VT.
1077						impl.Dorglq(m, n, m, vt, ldvt, work[itau:], work[iwork:], lwork-iwork)
1078
1079						// Copy L to U, zeroing out above it.
1080						impl.Dlacpy(blas.Lower, m, m, a, lda, u, ldu)
1081						impl.Dlaset(blas.Upper, m-1, m-1, 0, 0, u[1:], ldu)
1082
1083						ie := itau
1084						itauq := ie + m
1085						itaup := itauq + m
1086						iwork = itaup + m
1087
1088						// Bidiagonalize L in U.
1089						impl.Dgebrd(m, m, u, ldu, s, work[ie:],
1090							work[itauq:], work[itaup:], work[iwork:], lwork-iwork)
1091
1092						// Multiply right bidiagonalizing vectors in U by Q in VT.
1093						impl.Dormbr(lapack.ApplyP, blas.Left, blas.Trans, m, n, m,
1094							u, ldu, work[itaup:], vt, ldvt, work[iwork:], lwork-iwork)
1095
1096						// Generate left bidiagonalizing vectors in U.
1097						impl.Dorgbr(lapack.GenerateQ, m, m, m, u, ldu, work[itauq:], work[iwork:], lwork-iwork)
1098						iwork = ie + m
1099
1100						// Perform bidiagonal QR iteration, computing left singular
1101						// vectors of A in U and computing right singular vectors
1102						// of A in VT.
1103						ok = impl.Dbdsqr(blas.Upper, m, n, m, 0, s, work[ie:], vt, ldvt,
1104							u, ldu, work, 1, work[iwork:])
1105					}
1106				}
1107			} else if wantva {
1108				if wantun {
1109					// Path 7t.
1110					if lwork >= m*m+max(max(n+m, 4*m), bdspac) {
1111						// Sufficient workspace for a fast algorithm.
1112						ir := 0
1113						var ldworkr int
1114						if lwork >= wrkbl+lda*m {
1115							ldworkr = lda
1116						} else {
1117							ldworkr = m
1118						}
1119						itau := ir + ldworkr*m
1120						iwork := itau + m
1121
1122						// Compute A = L*Q, copying result to VT.
1123						impl.Dgelqf(m, n, a, lda, work[itau:], work[iwork:], lwork-iwork)
1124						impl.Dlacpy(blas.Upper, m, n, a, lda, vt, ldvt)
1125
1126						// Copy L to work[ir:], zeroing out above it.
1127						impl.Dlacpy(blas.Lower, m, m, a, lda, work[ir:], ldworkr)
1128						impl.Dlaset(blas.Upper, m-1, m-1, 0, 0, work[ir+1:], ldworkr)
1129
1130						// Generate Q in VT.
1131						impl.Dorglq(n, n, m, vt, ldvt, work[itau:], work[iwork:], lwork-iwork)
1132
1133						ie := itau
1134						itauq := ie + m
1135						itaup := itauq + m
1136						iwork = itaup + m
1137
1138						// Bidiagonalize L in work[ir:].
1139						impl.Dgebrd(m, m, work[ir:], ldworkr, s, work[ie:],
1140							work[itauq:], work[itaup:], work[iwork:], lwork-iwork)
1141
1142						// Generate right bidiagonalizing vectors in work[ir:].
1143						impl.Dorgbr(lapack.GeneratePT, m, m, m, work[ir:], ldworkr,
1144							work[itaup:], work[iwork:], lwork-iwork)
1145						iwork = ie + m
1146
1147						// Perform bidiagonal QR iteration, computing right
1148						// singular vectors of L in work[ir:].
1149						ok = impl.Dbdsqr(blas.Upper, m, m, 0, 0, s, work[ie:],
1150							work[ir:], ldworkr, work, 1, work, 1, work[iwork:])
1151
1152						// Multiply right singular vectors of L in work[ir:] by
1153						// Q in VT, storing result in A.
1154						bi.Dgemm(blas.NoTrans, blas.NoTrans, m, n, m, 1,
1155							work[ir:], ldworkr, vt, ldvt, 0, a, lda)
1156
1157						// Copy right singular vectors of A from A to VT.
1158						impl.Dlacpy(blas.All, m, n, a, lda, vt, ldvt)
1159					} else {
1160						// Insufficient workspace for a fast algorithm.
1161						itau := 0
1162						iwork := itau + m
1163						// Compute A = L * Q, copying result to VT.
1164						impl.Dgelqf(m, n, a, lda, work[itau:], work[iwork:], lwork-iwork)
1165						impl.Dlacpy(blas.Upper, m, n, a, lda, vt, ldvt)
1166
1167						// Generate Q in VT.
1168						impl.Dorglq(n, n, m, vt, ldvt, work[itau:], work[iwork:], lwork-iwork)
1169
1170						ie := itau
1171						itauq := ie + m
1172						itaup := itauq + m
1173						iwork = itaup + m
1174
1175						// Zero out above L in A.
1176						impl.Dlaset(blas.Upper, m-1, m-1, 0, 0, a[1:], lda)
1177
1178						// Bidiagonalize L in A.
1179						impl.Dgebrd(m, m, a, lda, s, work[ie:], work[itauq:],
1180							work[itaup:], work[iwork:], lwork-iwork)
1181
1182						// Multiply right bidiagonalizing vectors in A by Q in VT.
1183						impl.Dormbr(lapack.ApplyP, blas.Left, blas.Trans, m, n, m,
1184							a, lda, work[itaup:], vt, ldvt, work[iwork:], lwork-iwork)
1185						iwork = ie + m
1186
1187						// Perform bidiagonal QR iteration, computing right singular
1188						// vectors of A in VT.
1189						ok = impl.Dbdsqr(blas.Upper, m, n, 0, 0, s, work[ie:],
1190							vt, ldvt, work, 1, work, 1, work[iwork:])
1191					}
1192				} else if wantuo {
1193					panic(noSVDO)
1194				} else if wantuas {
1195					// Path 9t.
1196					if lwork >= m*m+max(max(m+n, 4*m), bdspac) {
1197						// Sufficient workspace for a fast algorithm.
1198						iu := 0
1199
1200						var ldworku int
1201						if lwork >= wrkbl+lda*m {
1202							ldworku = lda
1203						} else {
1204							ldworku = m
1205						}
1206						itau := iu + ldworku*m
1207						iwork := itau + m
1208
1209						// Generate A = L * Q copying result to VT.
1210						impl.Dgelqf(m, n, a, lda, work[itau:], work[iwork:], lwork-iwork)
1211						impl.Dlacpy(blas.Upper, m, n, a, lda, vt, ldvt)
1212
1213						// Generate Q in VT.
1214						impl.Dorglq(n, n, m, vt, ldvt, work[itau:], work[iwork:], lwork-iwork)
1215
1216						// Copy L to work[iu:], zeroing out above it.
1217						impl.Dlacpy(blas.Lower, m, m, a, lda, work[iu:], ldworku)
1218						impl.Dlaset(blas.Upper, m-1, m-1, 0, 0, work[iu+1:], ldworku)
1219						ie = itau
1220						itauq := ie + m
1221						itaup := itauq + m
1222						iwork = itaup + m
1223
1224						// Bidiagonalize L in work[iu:], copying result to U.
1225						impl.Dgebrd(m, m, work[iu:], ldworku, s, work[ie:],
1226							work[itauq:], work[itaup:], work[iwork:], lwork-iwork)
1227						impl.Dlacpy(blas.Lower, m, m, work[iu:], ldworku, u, ldu)
1228
1229						// Generate right bidiagonalizing vectors in work[iu:].
1230						impl.Dorgbr(lapack.GeneratePT, m, m, m, work[iu:], ldworku,
1231							work[itaup:], work[iwork:], lwork-iwork)
1232
1233						// Generate left bidiagonalizing vectors in U.
1234						impl.Dorgbr(lapack.GenerateQ, m, m, m, u, ldu, work[itauq:], work[iwork:], lwork-iwork)
1235						iwork = ie + m
1236
1237						// Perform bidiagonal QR iteration, computing left singular
1238						// vectors of L in U and computing right singular vectors
1239						// of L in work[iu:].
1240						ok = impl.Dbdsqr(blas.Upper, m, m, m, 0, s, work[ie:],
1241							work[iu:], ldworku, u, ldu, work, 1, work[iwork:])
1242
1243						// Multiply right singular vectors of L in work[iu:]
1244						// Q in VT, storing result in A.
1245						bi.Dgemm(blas.NoTrans, blas.NoTrans, m, n, m, 1,
1246							work[iu:], ldworku, vt, ldvt, 0, a, lda)
1247
1248						// Copy right singular vectors of A from A to VT.
1249						impl.Dlacpy(blas.All, m, n, a, lda, vt, ldvt)
1250					} else {
1251						// Insufficient workspace for a fast algorithm.
1252						itau := 0
1253						iwork := itau + m
1254
1255						// Compute A = L * Q, copying result to VT.
1256						impl.Dgelqf(m, n, a, lda, work[itau:], work[iwork:], lwork-iwork)
1257						impl.Dlacpy(blas.Upper, m, n, a, lda, vt, ldvt)
1258
1259						// Generate Q in VT.
1260						impl.Dorglq(n, n, m, vt, ldvt, work[itau:], work[iwork:], lwork-iwork)
1261
1262						// Copy L to U, zeroing out above it.
1263						impl.Dlacpy(blas.Lower, m, m, a, lda, u, ldu)
1264						impl.Dlaset(blas.Upper, m-1, m-1, 0, 0, u[1:], ldu)
1265
1266						ie = itau
1267						itauq := ie + m
1268						itaup := itauq + m
1269						iwork = itaup + m
1270
1271						// Bidiagonalize L in U.
1272						impl.Dgebrd(m, m, u, ldu, s, work[ie:], work[itauq:],
1273							work[itaup:], work[iwork:], lwork-iwork)
1274
1275						// Multiply right bidiagonalizing vectors in U by Q in VT.
1276						impl.Dormbr(lapack.ApplyP, blas.Left, blas.Trans, m, n, m,
1277							u, ldu, work[itaup:], vt, ldvt, work[iwork:], lwork-iwork)
1278
1279						// Generate left bidiagonalizing vectors in U.
1280						impl.Dorgbr(lapack.GenerateQ, m, m, m, u, ldu, work[itauq:], work[iwork:], lwork-iwork)
1281						iwork = ie + m
1282
1283						// Perform bidiagonal QR iteration, computing left singular
1284						// vectors of A in U and computing right singular vectors
1285						// of A in VT.
1286						ok = impl.Dbdsqr(blas.Upper, m, n, m, 0, s, work[ie:],
1287							vt, ldvt, u, ldu, work, 1, work[iwork:])
1288					}
1289				}
1290			}
1291		} else {
1292			// Path 10t.
1293			// N at least M, but not much larger.
1294			ie = 0
1295			itauq := ie + m
1296			itaup := itauq + m
1297			iwork := itaup + m
1298
1299			// Bidiagonalize A.
1300			impl.Dgebrd(m, n, a, lda, s, work[ie:], work[itauq:], work[itaup:], work[iwork:], lwork-iwork)
1301			if wantuas {
1302				// If left singular vectors desired in U, copy result to U and
1303				// generate left bidiagonalizing vectors in U.
1304				impl.Dlacpy(blas.Lower, m, m, a, lda, u, ldu)
1305				impl.Dorgbr(lapack.GenerateQ, m, m, n, u, ldu, work[itauq:], work[iwork:], lwork-iwork)
1306			}
1307			if wantvas {
1308				// If right singular vectors desired in VT, copy result to VT
1309				// and generate right bidiagonalizing vectors in VT.
1310				impl.Dlacpy(blas.Upper, m, n, a, lda, vt, ldvt)
1311				var nrvt int
1312				if wantva {
1313					nrvt = n
1314				} else {
1315					nrvt = m
1316				}
1317				impl.Dorgbr(lapack.GeneratePT, nrvt, n, m, vt, ldvt, work[itaup:], work[iwork:], lwork-iwork)
1318			}
1319			if wantuo {
1320				panic(noSVDO)
1321			}
1322			if wantvo {
1323				panic(noSVDO)
1324			}
1325			iwork = ie + m
1326			var nru, ncvt int
1327			if wantuas || wantuo {
1328				nru = m
1329			}
1330			if wantvas || wantvo {
1331				ncvt = n
1332			}
1333			if !wantuo && !wantvo {
1334				// Perform bidiagonal QR iteration, if desired, computing left
1335				// singular vectors in U and computing right singular vectors in
1336				// VT.
1337				ok = impl.Dbdsqr(blas.Lower, m, ncvt, nru, 0, s, work[ie:],
1338					vt, ldvt, u, ldu, work, 1, work[iwork:])
1339			} else {
1340				// There will be two branches when the implementation is complete.
1341				panic(noSVDO)
1342			}
1343		}
1344	}
1345	if !ok {
1346		if ie > 1 {
1347			for i := 0; i < minmn-1; i++ {
1348				work[i+1] = work[i+ie]
1349			}
1350		}
1351		if ie < 1 {
1352			for i := minmn - 2; i >= 0; i-- {
1353				work[i+1] = work[i+ie]
1354			}
1355		}
1356	}
1357	// Undo scaling if necessary.
1358	if iscl {
1359		if anrm > bignum {
1360			impl.Dlascl(lapack.General, 0, 0, bignum, anrm, 1, minmn, s, minmn)
1361		}
1362		if !ok && anrm > bignum {
1363			impl.Dlascl(lapack.General, 0, 0, bignum, anrm, 1, minmn-1, work[1:], minmn)
1364		}
1365		if anrm < smlnum {
1366			impl.Dlascl(lapack.General, 0, 0, smlnum, anrm, 1, minmn, s, minmn)
1367		}
1368		if !ok && anrm < smlnum {
1369			impl.Dlascl(lapack.General, 0, 0, smlnum, anrm, 1, minmn-1, work[1:], minmn)
1370		}
1371	}
1372	work[0] = float64(maxwrk)
1373	return ok
1374}
1375