1 /*
2 * $Id: dgesv2.c,v 1.1 2005-09-18 22:04:46 dhmunro Exp $
3 * LAPACK routine to return the SVD of a matrix, m<n branch (see dgesvd.c).
4 */
5 /* Copyright (c) 2005, The Regents of the University of California.
6 * All rights reserved.
7 * This file is part of yorick (http://yorick.sourceforge.net).
8 * Read the accompanying LICENSE file for details.
9 */
10
11 #include "dg.h"
12
13 extern void dgesv2( char jobu, char jobvt, long m, long n,
14 double a[], long lda, double s[], double u[], long ldu,
15 double vt[], long ldvt,
16 double work[], long lwork, long *info );
17
18 /*---blas routines---*/
19 /* dgemm */
20
21
22
23 /*---converted nutty string switches to single characters (lower case)---*/
24 #define lsame(x,y) ((x)==(y))
25
26
27 /*-----Fortran intrinsics converted-----*/
28 extern double sqrt(double);
29 #define min(x,y) ((x)<(y)?(x):(y))
30 #define max(x,y) ((x)>(y)?(x):(y))
31 /*-----end of Fortran intrinsics-----*/
32
33
34
dgesv2(char jobu,char jobvt,long m,long n,double a[],long lda,double s[],double u[],long ldu,double vt[],long ldvt,double work[],long lwork,long * info)35 void dgesv2( char jobu, char jobvt, long m, long n,
36 double a[], long lda, double s[], double u[], long ldu,
37 double vt[], long ldvt,
38 double work[], long lwork, long *info )
39 {
40 /**
41 * -- LAPACK driver routine (version 1.1) --
42 * Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
43 * Courant Institute, Argonne National Lab, and Rice University
44 * March 31, 1993
45 *
46 * This is m<n branch of original dgesvd routine (DHM).
47 *
48 * .. Scalar Arguments ..*/
49 /** ..
50 * .. Array Arguments ..*/
51 #undef work_1
52 #define work_1(a1) work[a1-1]
53 #undef vt_2
54 #define vt_2(a1,a2) vt[a1-1+ldvt*(a2-1)]
55 #undef u_2
56 #define u_2(a1,a2) u[a1-1+ldu*(a2-1)]
57 #undef s_1
58 #define s_1(a1) s[a1-1]
59 #undef a_2
60 #define a_2(a1,a2) a[a1-1+lda*(a2-1)]
61 /* .. Parameters ..*/
62 #undef zero
63 #define zero 0.0e0
64 #undef one
65 #define one 1.0e0
66 /** ..
67 * .. Local Scalars ..*/
68 int wntua, wntuas, wntun, wntuo, wntus, wntva,
69 wntvas, wntvn, wntvo, wntvs;
70 long blk, chunk, i, ie=0, ierr, ir, iscl, itau, itaup,
71 itauq, iu, iwork, ldwrkr, ldwrku, maxwrk=0,
72 minmn, minwrk, mnthr, ncvt=0, nru=0, nrvt=0,
73 wrkbl=0;
74 double anrm, bignum, eps, smlnum;
75 char job_u_vt[3];
76 /** ..
77 * .. Local Arrays ..*/
78 double dum[1];
79 #undef dum_1
80 #define dum_1(a1) [a1-1]
81 /** ..
82 * .. Intrinsic Functions ..*/
83 /* intrinsic max, min, sqrt;*/
84 /** ..
85 * .. Executable Statements ..
86 *
87 * Test the input arguments
88 **/
89 /*-----implicit-declarations-----*/
90 /*-----end-of-declarations-----*/
91 *info = 0;
92 minmn = min( m, n );
93 job_u_vt[0]= jobu;
94 job_u_vt[1]= jobvt;
95 job_u_vt[2]= '\0';
96 mnthr = ilaenv( 6, "dgesvd", job_u_vt, m, n, 0, 0 );
97 wntua = lsame( jobu, 'a' );
98 wntus = lsame( jobu, 's' );
99 wntuas = wntua || wntus;
100 wntuo = lsame( jobu, 'o' );
101 wntun = lsame( jobu, 'n' );
102 wntva = lsame( jobvt, 'a' );
103 wntvs = lsame( jobvt, 's' );
104 wntvas = wntva || wntvs;
105 wntvo = lsame( jobvt, 'o' );
106 wntvn = lsame( jobvt, 'n' );
107 minwrk = 1;
108
109 if( !( wntua || wntus || wntuo || wntun ) ) {
110 *info = -1;
111 } else if( !( wntva || wntvs || wntvo || wntvn ) ||
112 ( wntvo && wntuo ) ) {
113 *info = -2;
114 } else if( m<0 ) {
115 *info = -3;
116 } else if( n<0 ) {
117 *info = -4;
118 } else if( lda<max( 1, m ) ) {
119 *info = -6;
120 } else if( ldu<1 || ( wntuas && ldu<m ) ) {
121 *info = -9;
122 } else if( ldvt<1 || ( wntva && ldvt<n ) ||
123 ( wntvs && ldvt<minmn ) ) {
124 *info = -11;
125 }
126 /**
127 * Compute workspace
128 * (Note: Comments in the code beginning "Workspace:" describe the
129 * minimal amount of workspace needed at that point in the code,
130 * as well as the preferred amount for good performance.
131 * NB refers to the optimal block size for the immediately
132 * following subroutine, as returned by ILAENV.)
133 **/
134 if( *info==0 && lwork>=1 && m>0 && n>0 ) {
135 if( n>=mnthr ) {
136 if( wntvn ) {
137 /**
138 * Path 1t(N much larger than M, JOBVT='N')
139 **/
140 maxwrk = m + m*ilaenv( 1, "dgelqf", " ", m, n, -1,
141 -1 );
142 maxwrk = max( maxwrk, 3*m+2*m*
143 ilaenv( 1, "dgebrd", " ", m, m, -1, -1 ) );
144 if( wntuo || wntuas )
145 maxwrk = max( maxwrk, 3*m+m*
146 ilaenv( 1, "dorgbr", "q", m, m, m, -1 ) );
147 maxwrk = max( maxwrk, 5*m-4 );
148 minwrk = max( 4*m, 5*m-4 );
149 } else if( wntvo && wntun ) {
150 /**
151 * Path 2t(N much larger than M, JOBU='N', JOBVT='O')
152 **/
153 wrkbl = m + m*ilaenv( 1, "dgelqf", " ", m, n, -1, -1 );
154 wrkbl = max( wrkbl, m+m*ilaenv( 1, "dorglq", " ", m,
155 n, m, -1 ) );
156 wrkbl = max( wrkbl, 3*m+2*m*
157 ilaenv( 1, "dgebrd", " ", m, m, -1, -1 ) );
158 wrkbl = max( wrkbl, 3*m+( m-1 )*
159 ilaenv( 1, "dorgbr", "p", m, m, m, -1 ) );
160 wrkbl = max( wrkbl, 5*m-4 );
161 maxwrk = max( m*m+wrkbl, m*m+m*n+m );
162 minwrk = max( 3*m+n, 5*m-4 );
163 minwrk = min( minwrk, maxwrk );
164 } else if( wntvo && wntuas ) {
165 /**
166 * Path 3t(N much larger than M, JOBU='S' or 'A',
167 * JOBVT='O')
168 **/
169 wrkbl = m + m*ilaenv( 1, "dgelqf", " ", m, n, -1, -1 );
170 wrkbl = max( wrkbl, m+m*ilaenv( 1, "dorglq", " ", m,
171 n, m, -1 ) );
172 wrkbl = max( wrkbl, 3*m+2*m*
173 ilaenv( 1, "dgebrd", " ", m, m, -1, -1 ) );
174 wrkbl = max( wrkbl, 3*m+( m-1 )*
175 ilaenv( 1, "dorgbr", "p", m, m, m, -1 ) );
176 wrkbl = max( wrkbl, 3*m+m*
177 ilaenv( 1, "dorgbr", "q", m, m, m, -1 ) );
178 wrkbl = max( wrkbl, 5*m-4 );
179 maxwrk = max( m*m+wrkbl, m*m+m*n+m );
180 minwrk = max( 3*m+n, 5*m-4 );
181 minwrk = min( minwrk, maxwrk );
182 } else if( wntvs && wntun ) {
183 /**
184 * Path 4t(N much larger than M, JOBU='N', JOBVT='S')
185 **/
186 wrkbl = m + m*ilaenv( 1, "dgelqf", " ", m, n, -1, -1 );
187 wrkbl = max( wrkbl, m+m*ilaenv( 1, "dorglq", " ", m,
188 n, m, -1 ) );
189 wrkbl = max( wrkbl, 3*m+2*m*
190 ilaenv( 1, "dgebrd", " ", m, m, -1, -1 ) );
191 wrkbl = max( wrkbl, 3*m+( m-1 )*
192 ilaenv( 1, "dorgbr", "p", m, m, m, -1 ) );
193 wrkbl = max( wrkbl, 5*m-4 );
194 maxwrk = m*m + wrkbl;
195 minwrk = max( 3*m+n, 5*m-4 );
196 minwrk = min( minwrk, maxwrk );
197 } else if( wntvs && wntuo ) {
198 /**
199 * Path 5t(N much larger than M, JOBU='O', JOBVT='S')
200 **/
201 wrkbl = m + m*ilaenv( 1, "dgelqf", " ", m, n, -1, -1 );
202 wrkbl = max( wrkbl, m+m*ilaenv( 1, "dorglq", " ", m,
203 n, m, -1 ) );
204 wrkbl = max( wrkbl, 3*m+2*m*
205 ilaenv( 1, "dgebrd", " ", m, m, -1, -1 ) );
206 wrkbl = max( wrkbl, 3*m+( m-1 )*
207 ilaenv( 1, "dorgbr", "p", m, m, m, -1 ) );
208 wrkbl = max( wrkbl, 3*m+m*
209 ilaenv( 1, "dorgbr", "q", m, m, m, -1 ) );
210 wrkbl = max( wrkbl, 5*m-4 );
211 maxwrk = 2*m*m + wrkbl;
212 minwrk = max( 3*m+n, 5*m-4 );
213 minwrk = min( minwrk, maxwrk );
214 } else if( wntvs && wntuas ) {
215 /**
216 * Path 6t(N much larger than M, JOBU='S' or 'A',
217 * JOBVT='S')
218 **/
219 wrkbl = m + m*ilaenv( 1, "dgelqf", " ", m, n, -1, -1 );
220 wrkbl = max( wrkbl, m+m*ilaenv( 1, "dorglq", " ", m,
221 n, m, -1 ) );
222 wrkbl = max( wrkbl, 3*m+2*m*
223 ilaenv( 1, "dgebrd", " ", m, m, -1, -1 ) );
224 wrkbl = max( wrkbl, 3*m+( m-1 )*
225 ilaenv( 1, "dorgbr", "p", m, m, m, -1 ) );
226 wrkbl = max( wrkbl, 3*m+m*
227 ilaenv( 1, "dorgbr", "q", m, m, m, -1 ) );
228 wrkbl = max( wrkbl, 5*m-4 );
229 maxwrk = m*m + wrkbl;
230 minwrk = max( 3*m+n, 5*m-4 );
231 minwrk = min( minwrk, maxwrk );
232 } else if( wntva && wntun ) {
233 /**
234 * Path 7t(N much larger than M, JOBU='N', JOBVT='A')
235 **/
236 wrkbl = m + m*ilaenv( 1, "dgelqf", " ", m, n, -1, -1 );
237 wrkbl = max( wrkbl, m+n*ilaenv( 1, "dorglq", " ", n,
238 n, m, -1 ) );
239 wrkbl = max( wrkbl, 3*m+2*m*
240 ilaenv( 1, "dgebrd", " ", m, m, -1, -1 ) );
241 wrkbl = max( wrkbl, 3*m+( m-1 )*
242 ilaenv( 1, "dorgbr", "p", m, m, m, -1 ) );
243 wrkbl = max( wrkbl, 5*m-4 );
244 maxwrk = m*m + wrkbl;
245 minwrk = max( 3*m+n, 5*m-4 );
246 minwrk = min( minwrk, maxwrk );
247 } else if( wntva && wntuo ) {
248 /**
249 * Path 8t(N much larger than M, JOBU='O', JOBVT='A')
250 **/
251 wrkbl = m + m*ilaenv( 1, "dgelqf", " ", m, n, -1, -1 );
252 wrkbl = max( wrkbl, m+n*ilaenv( 1, "dorglq", " ", n,
253 n, m, -1 ) );
254 wrkbl = max( wrkbl, 3*m+2*m*
255 ilaenv( 1, "dgebrd", " ", m, m, -1, -1 ) );
256 wrkbl = max( wrkbl, 3*m+( m-1 )*
257 ilaenv( 1, "dorgbr", "p", m, m, m, -1 ) );
258 wrkbl = max( wrkbl, 3*m+m*
259 ilaenv( 1, "dorgbr", "q", m, m, m, -1 ) );
260 wrkbl = max( wrkbl, 5*m-4 );
261 maxwrk = 2*m*m + wrkbl;
262 minwrk = max( 3*m+n, 5*m-4 );
263 minwrk = min( minwrk, maxwrk );
264 } else if( wntva && wntuas ) {
265 /**
266 * Path 9t(N much larger than M, JOBU='S' or 'A',
267 * JOBVT='A')
268 **/
269 wrkbl = m + m*ilaenv( 1, "dgelqf", " ", m, n, -1, -1 );
270 wrkbl = max( wrkbl, m+n*ilaenv( 1, "dorglq", " ", n,
271 n, m, -1 ) );
272 wrkbl = max( wrkbl, 3*m+2*m*
273 ilaenv( 1, "dgebrd", " ", m, m, -1, -1 ) );
274 wrkbl = max( wrkbl, 3*m+( m-1 )*
275 ilaenv( 1, "dorgbr", "p", m, m, m, -1 ) );
276 wrkbl = max( wrkbl, 3*m+m*
277 ilaenv( 1, "dorgbr", "q", m, m, m, -1 ) );
278 wrkbl = max( wrkbl, 5*m-4 );
279 maxwrk = m*m + wrkbl;
280 minwrk = max( 3*m+n, 5*m-4 );
281 minwrk = min( minwrk, maxwrk );
282 }
283 } else {
284 /**
285 * Path 10t(N greater than M, but not much larger)
286 **/
287 maxwrk = 3*m + ( m+n )*ilaenv( 1, "dgebrd", " ", m, n,
288 -1, -1 );
289 if( wntvs || wntvo )
290 maxwrk = max( maxwrk, 3*m+m*
291 ilaenv( 1, "dorgbr", "p", m, n, m, -1 ) );
292 if( wntva )
293 maxwrk = max( maxwrk, 3*m+n*
294 ilaenv( 1, "dorgbr", "p", n, n, m, -1 ) );
295 if( !wntun )
296 maxwrk = max( maxwrk, 3*m+( m-1 )*
297 ilaenv( 1, "dorgbr", "q", m, m, m, -1 ) );
298 maxwrk = max( maxwrk, 5*m-4 );
299 minwrk = max( 3*m+n, 5*m-4 );
300 }
301 work_1( 1 ) = maxwrk;
302 }
303
304 if( lwork<minwrk ) {
305 *info = -13;
306 }
307 if( *info!=0 ) {
308 xerbla( "dgesvd", -*info );
309 return;
310 }
311 /**
312 * Quick return if possible
313 **/
314 if( m==0 || n==0 ) {
315 if( lwork>=1 )
316 work_1( 1 ) = one;
317 return;
318 }
319 /**
320 * Get machine constants
321 **/
322 eps = dlamch( 'p' );
323 smlnum = sqrt( dlamch( 's' ) ) / eps;
324 bignum = one / smlnum;
325 /**
326 * Scale A if max entry outside range [SMLNUM,BIGNUM]
327 **/
328 anrm = dlange( 'm', m, n, a, lda, dum );
329 iscl = 0;
330 if( anrm>zero && anrm<smlnum ) {
331 iscl = 1;
332 dlascl( 'g', 0, 0, anrm, smlnum, m, n, a, lda, &ierr );
333 } else if( anrm>bignum ) {
334 iscl = 1;
335 dlascl( 'g', 0, 0, anrm, bignum, m, n, a, lda, &ierr );
336 }
337
338 /**
339 * A has more columns than rows. If A has sufficiently more
340 * columns than rows, first reduce using the LQ decomposition (if
341 * sufficient workspace available)
342 **/
343 if( n>=mnthr ) {
344
345 if( wntvn ) {
346 /**
347 * Path 1t(N much larger than M, JOBVT='N')
348 * No right singular vectors to be computed
349 **/
350 itau = 1;
351 iwork = itau + m;
352 /**
353 * Compute A=L*Q
354 * (Workspace: need 2*M, prefer M+M*NB)
355 **/
356 dgelqf( m, n, a, lda, &work_1( itau ), &work_1( iwork ),
357 lwork-iwork+1, &ierr );
358 /**
359 * Zero out above L
360 **/
361 dlaset( 'u', m-1, m-1, zero, zero, &a_2( 1, 2 ), lda );
362 ie = 1;
363 itauq = ie + m;
364 itaup = itauq + m;
365 iwork = itaup + m;
366 /**
367 * Bidiagonalize L in A
368 * (Workspace: need 4*M, prefer 3*M+2*M*NB)
369 **/
370 dgebrd( m, m, a, lda, s, &work_1( ie ), &work_1( itauq ),
371 &work_1( itaup ), &work_1( iwork ), lwork-iwork+1,
372 &ierr );
373 if( wntuo || wntuas ) {
374 /**
375 * If left singular vectors desired, generate Q
376 * (Workspace: need 4*M, prefer 3*M+M*NB)
377 **/
378 dorgbr( 'q', m, m, m, a, lda, &work_1( itauq ),
379 &work_1( iwork ), lwork-iwork+1, &ierr );
380 }
381 iwork = ie + m;
382 nru = 0;
383 if( wntuo || wntuas )
384 nru = m;
385 /**
386 * Perform bidiagonal QR iteration, computing left singular
387 * vectors of A in A if desired
388 * (Workspace: need 5*M-4)
389 **/
390 dbdsqr( 'u', m, 0, nru, 0, s, &work_1( ie ), dum, 1, a,
391 lda, dum, 1, &work_1( iwork ), info );
392 /**
393 * If left singular vectors desired in U, copy them there
394 **/
395 if( wntuas )
396 dlacpy( 'f', m, m, a, lda, u, ldu );
397
398 } else if( wntvo && wntun ) {
399 /**
400 * Path 2t(N much larger than M, JOBU='N', JOBVT='O')
401 * M right singular vectors to be overwritten on A and
402 * no left singular vectors to be computed
403 **/
404 if( lwork>=m*m+max( 4*m, 5*m-4 ) ) {
405 /**
406 * Sufficient workspace for a fast algorithm
407 **/
408 ir = 1;
409 if( lwork>=max( wrkbl, lda*n+m )+lda*m ) {
410 /**
411 * WORK(IU) is LDA by N and WORK(IR) is LDA by M
412 **/
413 ldwrku = lda;
414 chunk = n;
415 ldwrkr = lda;
416 } else if( lwork>=max( wrkbl, lda*n+m )+m*m ) {
417 /**
418 * WORK(IU) is LDA by N and WORK(IR) is M by M
419 **/
420 ldwrku = lda;
421 chunk = n;
422 ldwrkr = m;
423 } else {
424 /**
425 * WORK(IU) is M by CHUNK and WORK(IR) is M by M
426 **/
427 ldwrku = m;
428 chunk = ( lwork-m*m-m ) / m;
429 ldwrkr = m;
430 }
431 itau = ir + ldwrkr*m;
432 iwork = itau + m;
433 /**
434 * Compute A=L*Q
435 * (Workspace: need M*M+2*M, prefer M*M+M+M*NB)
436 **/
437 dgelqf( m, n, a, lda, &work_1( itau ),
438 &work_1( iwork ), lwork-iwork+1, &ierr );
439 /**
440 * Copy L to WORK(IR) and zero out above it
441 **/
442 dlacpy( 'l', m, m, a, lda, &work_1( ir ), ldwrkr );
443 dlaset( 'u', m-1, m-1, zero, zero,
444 &work_1( ir+ldwrkr ), ldwrkr );
445 /**
446 * Generate Q in A
447 * (Workspace: need M*M+2*M, prefer M*M+M+M*NB)
448 **/
449 dorglq( m, n, m, a, lda, &work_1( itau ),
450 &work_1( iwork ), lwork-iwork+1, &ierr );
451 ie = itau;
452 itauq = ie + m;
453 itaup = itauq + m;
454 iwork = itaup + m;
455 /**
456 * Bidiagonalize L in WORK(IR)
457 * (Workspace: need M*M+4*M, prefer M*M+3*M+2*M*NB)
458 **/
459 dgebrd( m, m, &work_1( ir ), ldwrkr, s, &work_1( ie ),
460 &work_1( itauq ), &work_1( itaup ),
461 &work_1( iwork ), lwork-iwork+1, &ierr );
462 /**
463 * Generate right vectors bidiagonalizing L
464 * (Workspace: need M*M+4*M-1, prefer M*M+3*M+(M-1)*NB)
465 **/
466 dorgbr( 'p', m, m, m, &work_1( ir ), ldwrkr,
467 &work_1( itaup ), &work_1( iwork ),
468 lwork-iwork+1, &ierr );
469 iwork = ie + m;
470 /**
471 * Perform bidiagonal QR iteration, computing right
472 * singular vectors of L in WORK(IR)
473 * (Workspace: need M*M+5*M-4)
474 **/
475 dbdsqr( 'u', m, m, 0, 0, s, &work_1( ie ),
476 &work_1( ir ), ldwrkr, dum, 1, dum, 1,
477 &work_1( iwork ), info );
478 iu = ie + m;
479 /**
480 * Multiply right singular vectors of L in WORK(IR) by Q
481 * in A, storing result in WORK(IU) and copying to A
482 * (Workspace: need M*M+2*M, prefer M*M+M*N+M)
483 **/
484 for (i=1 ; chunk>0?i<=n:i>=n ; i+=chunk) {
485 blk = min( n-i+1, chunk );
486 cblas_dgemm(CblasColMajor, CblasNoTrans, CblasNoTrans, m, blk, m,
487 one, &work_1( ir ), ldwrkr, &a_2( 1, i ), lda, zero,
488 &work_1( iu ), ldwrku );
489 dlacpy( 'f', m, blk, &work_1( iu ), ldwrku,
490 &a_2( 1, i ), lda );
491 }
492
493 } else {
494 /**
495 * Insufficient workspace for a fast algorithm
496 **/
497 ie = 1;
498 itauq = ie + m;
499 itaup = itauq + m;
500 iwork = itaup + m;
501 /**
502 * Bidiagonalize A
503 * (Workspace: need 3*M+N, prefer 3*M+(M+N)*NB)
504 **/
505 dgebrd( m, n, a, lda, s, &work_1( ie ),
506 &work_1( itauq ), &work_1( itaup ),
507 &work_1( iwork ), lwork-iwork+1, &ierr );
508 /**
509 * Generate right vectors bidiagonalizing A
510 * (Workspace: need 4*M, prefer 3*M+M*NB)
511 **/
512 dorgbr( 'p', m, n, m, a, lda, &work_1( itaup ),
513 &work_1( iwork ), lwork-iwork+1, &ierr );
514 iwork = ie + m;
515 /**
516 * Perform bidiagonal QR iteration, computing right
517 * singular vectors of A in A
518 * (Workspace: need 5*M-4)
519 **/
520 dbdsqr( 'l', m, n, 0, 0, s, &work_1( ie ), a, lda,
521 dum, 1, dum, 1, &work_1( iwork ), info );
522
523 }
524
525 } else if( wntvo && wntuas ) {
526 /**
527 * Path 3t(N much larger than M, JOBU='S' or 'A', JOBVT='O')
528 * M right singular vectors to be overwritten on A and
529 * M left singular vectors to be computed in U
530 **/
531 if( lwork>=m*m+max( 4*m, 5*m-4 ) ) {
532 /**
533 * Sufficient workspace for a fast algorithm
534 **/
535 ir = 1;
536 if( lwork>=max( wrkbl, lda*n+m )+lda*m ) {
537 /**
538 * WORK(IU) is LDA by N and WORK(IR) is LDA by M
539 **/
540 ldwrku = lda;
541 chunk = n;
542 ldwrkr = lda;
543 } else if( lwork>=max( wrkbl, lda*n+m )+m*m ) {
544 /**
545 * WORK(IU) is LDA by N and WORK(IR) is M by M
546 **/
547 ldwrku = lda;
548 chunk = n;
549 ldwrkr = m;
550 } else {
551 /**
552 * WORK(IU) is M by CHUNK and WORK(IR) is M by M
553 **/
554 ldwrku = m;
555 chunk = ( lwork-m*m-m ) / m;
556 ldwrkr = m;
557 }
558 itau = ir + ldwrkr*m;
559 iwork = itau + m;
560 /**
561 * Compute A=L*Q
562 * (Workspace: need M*M+2*M, prefer M*M+M+M*NB)
563 **/
564 dgelqf( m, n, a, lda, &work_1( itau ),
565 &work_1( iwork ), lwork-iwork+1, &ierr );
566 /**
567 * Copy L to U, zeroing about above it
568 **/
569 dlacpy( 'l', m, m, a, lda, u, ldu );
570 dlaset( 'u', m-1, m-1, zero, zero, &u_2( 1, 2 ),
571 ldu );
572 /**
573 * Generate Q in A
574 * (Workspace: need M*M+2*M, prefer M*M+M+M*NB)
575 **/
576 dorglq( m, n, m, a, lda, &work_1( itau ),
577 &work_1( iwork ), lwork-iwork+1, &ierr );
578 ie = itau;
579 itauq = ie + m;
580 itaup = itauq + m;
581 iwork = itaup + m;
582 /**
583 * Bidiagonalize L in U, copying result to WORK(IR)
584 * (Workspace: need M*M+4*M, prefer M*M+3*M+2*M*NB)
585 **/
586 dgebrd( m, m, u, ldu, s, &work_1( ie ),
587 &work_1( itauq ), &work_1( itaup ),
588 &work_1( iwork ), lwork-iwork+1, &ierr );
589 dlacpy( 'u', m, m, u, ldu, &work_1( ir ), ldwrkr );
590 /**
591 * Generate right vectors bidiagonalizing L in WORK(IR)
592 * (Workspace: need M*M+4*M-1, prefer M*M+3*M+(M-1)*NB)
593 **/
594 dorgbr( 'p', m, m, m, &work_1( ir ), ldwrkr,
595 &work_1( itaup ), &work_1( iwork ),
596 lwork-iwork+1, &ierr );
597 /**
598 * Generate left vectors bidiagonalizing L in U
599 * (Workspace: need M*M+4*M, prefer M*M+3*M+M*NB)
600 **/
601 dorgbr( 'q', m, m, m, u, ldu, &work_1( itauq ),
602 &work_1( iwork ), lwork-iwork+1, &ierr );
603 iwork = ie + m;
604 /**
605 * Perform bidiagonal QR iteration, computing left
606 * singular vectors of L in U, and computing right
607 * singular vectors of L in WORK(IR)
608 * (Workspace: need M*M+5*M-4)
609 **/
610 dbdsqr( 'u', m, m, m, 0, s, &work_1( ie ),
611 &work_1( ir ), ldwrkr, u, ldu, dum, 1,
612 &work_1( iwork ), info );
613 iu = ie + m;
614 /**
615 * Multiply right singular vectors of L in WORK(IR) by Q
616 * in A, storing result in WORK(IU) and copying to A
617 * (Workspace: need M*M+2*M, prefer M*M+M*N+M))
618 **/
619 for (i=1 ; chunk>0?i<=n:i>=n ; i+=chunk) {
620 blk = min( n-i+1, chunk );
621 cblas_dgemm(CblasColMajor, CblasNoTrans, CblasNoTrans, m, blk, m,
622 one, &work_1( ir ), ldwrkr, &a_2( 1, i ), lda, zero,
623 &work_1( iu ), ldwrku );
624 dlacpy( 'f', m, blk, &work_1( iu ), ldwrku,
625 &a_2( 1, i ), lda );
626 }
627
628 } else {
629 /**
630 * Insufficient workspace for a fast algorithm
631 **/
632 itau = 1;
633 iwork = itau + m;
634 /**
635 * Compute A=L*Q
636 * (Workspace: need 2*M, prefer M+M*NB)
637 **/
638 dgelqf( m, n, a, lda, &work_1( itau ),
639 &work_1( iwork ), lwork-iwork+1, &ierr );
640 /**
641 * Copy L to U, zeroing out above it
642 **/
643 dlacpy( 'l', m, m, a, lda, u, ldu );
644 dlaset( 'u', m-1, m-1, zero, zero, &u_2( 1, 2 ),
645 ldu );
646 /**
647 * Generate Q in A
648 * (Workspace: need 2*M, prefer M+M*NB)
649 **/
650 dorglq( m, n, m, a, lda, &work_1( itau ),
651 &work_1( iwork ), lwork-iwork+1, &ierr );
652 ie = itau;
653 itauq = ie + m;
654 itaup = itauq + m;
655 iwork = itaup + m;
656 /**
657 * Bidiagonalize L in U
658 * (Workspace: need 4*M, prefer 3*M+2*M*NB)
659 **/
660 dgebrd( m, m, u, ldu, s, &work_1( ie ),
661 &work_1( itauq ), &work_1( itaup ),
662 &work_1( iwork ), lwork-iwork+1, &ierr );
663 /**
664 * Multiply right vectors bidiagonalizing L by Q in A
665 * (Workspace: need 3*M+N, prefer 3*M+N*NB)
666 **/
667 dormbr( 'p', 'l', 't', m, n, m, u, ldu,
668 &work_1( itaup ), a, lda, &work_1( iwork ),
669 lwork-iwork+1, &ierr );
670 /**
671 * Generate left vectors bidiagonalizing L in U
672 * (Workspace: need 4*M, prefer 3*M+M*NB)
673 **/
674 dorgbr( 'q', m, m, m, u, ldu, &work_1( itauq ),
675 &work_1( iwork ), lwork-iwork+1, &ierr );
676 iwork = ie + m;
677 /**
678 * Perform bidiagonal QR iteration, computing left
679 * singular vectors of A in U and computing right
680 * singular vectors of A in A
681 * (Workspace: need 5*M-4)
682 **/
683 dbdsqr( 'u', m, n, m, 0, s, &work_1( ie ), a, lda,
684 u, ldu, dum, 1, &work_1( iwork ), info );
685
686 }
687
688 } else if( wntvs ) {
689
690 if( wntun ) {
691 /**
692 * Path 4t(N much larger than M, JOBU='N', JOBVT='S')
693 * M right singular vectors to be computed in VT and
694 * no left singular vectors to be computed
695 **/
696 if( lwork>=m*m+max( 4*m, 5*m-4 ) ) {
697 /**
698 * Sufficient workspace for a fast algorithm
699 **/
700 ir = 1;
701 if( lwork>=wrkbl+lda*m ) {
702 /**
703 * WORK(IR) is LDA by M
704 **/
705 ldwrkr = lda;
706 } else {
707 /**
708 * WORK(IR) is M by M
709 **/
710 ldwrkr = m;
711 }
712 itau = ir + ldwrkr*m;
713 iwork = itau + m;
714 /**
715 * Compute A=L*Q
716 * (Workspace: need M*M+2*M, prefer M*M+M+M*NB)
717 **/
718 dgelqf( m, n, a, lda, &work_1( itau ),
719 &work_1( iwork ), lwork-iwork+1, &ierr );
720 /**
721 * Copy L to WORK(IR), zeroing out above it
722 **/
723 dlacpy( 'l', m, m, a, lda, &work_1( ir ),
724 ldwrkr );
725 dlaset( 'u', m-1, m-1, zero, zero,
726 &work_1( ir+ldwrkr ), ldwrkr );
727 /**
728 * Generate Q in A
729 * (Workspace: need M*M+2*M, prefer M*M+M+M*NB)
730 **/
731 dorglq( m, n, m, a, lda, &work_1( itau ),
732 &work_1( iwork ), lwork-iwork+1, &ierr );
733 ie = itau;
734 itauq = ie + m;
735 itaup = itauq + m;
736 iwork = itaup + m;
737 /**
738 * Bidiagonalize L in WORK(IR)
739 * (Workspace: need M*M+4*M, prefer M*M+3*M+2*M*NB)
740 **/
741 dgebrd( m, m, &work_1( ir ), ldwrkr, s,
742 &work_1( ie ), &work_1( itauq ),
743 &work_1( itaup ), &work_1( iwork ),
744 lwork-iwork+1, &ierr );
745 /**
746 * Generate right vectors bidiagonalizing L in
747 * WORK(IR)
748 * (Workspace: need M*M+4*M, prefer M*M+3*M+(M-1)*NB)
749 **/
750 dorgbr( 'p', m, m, m, &work_1( ir ), ldwrkr,
751 &work_1( itaup ), &work_1( iwork ),
752 lwork-iwork+1, &ierr );
753 iwork = ie + m;
754 /**
755 * Perform bidiagonal QR iteration, computing right
756 * singular vectors of L in WORK(IR)
757 * (Workspace: need M*M+5*M-4)
758 **/
759 dbdsqr( 'u', m, m, 0, 0, s, &work_1( ie ),
760 &work_1( ir ), ldwrkr, dum, 1, dum, 1,
761 &work_1( iwork ), info );
762 /**
763 * Multiply right singular vectors of L in WORK(IR) by
764 * Q in A, storing result in VT
765 * (Workspace: need M*M)
766 **/
767 cblas_dgemm(CblasColMajor, CblasNoTrans, CblasNoTrans, m, n, m,
768 one, &work_1( ir ), ldwrkr, a, lda, zero, vt, ldvt );
769
770 } else {
771 /**
772 * Insufficient workspace for a fast algorithm
773 **/
774 itau = 1;
775 iwork = itau + m;
776 /**
777 * Compute A=L*Q
778 * (Workspace: need 2*M, prefer M+M*NB)
779 **/
780 dgelqf( m, n, a, lda, &work_1( itau ),
781 &work_1( iwork ), lwork-iwork+1, &ierr );
782 /**
783 * Copy result to VT
784 **/
785 dlacpy( 'u', m, n, a, lda, vt, ldvt );
786 /**
787 * Generate Q in VT
788 * (Workspace: need 2*M, prefer M+M*NB)
789 **/
790 dorglq( m, n, m, vt, ldvt, &work_1( itau ),
791 &work_1( iwork ), lwork-iwork+1, &ierr );
792 ie = itau;
793 itauq = ie + m;
794 itaup = itauq + m;
795 iwork = itaup + m;
796 /**
797 * Zero out above L in A
798 **/
799 dlaset( 'u', m-1, m-1, zero, zero, &a_2( 1, 2 ),
800 lda );
801 /**
802 * Bidiagonalize L in A
803 * (Workspace: need 4*M, prefer 3*M+2*M*NB)
804 **/
805 dgebrd( m, m, a, lda, s, &work_1( ie ),
806 &work_1( itauq ), &work_1( itaup ),
807 &work_1( iwork ), lwork-iwork+1, &ierr );
808 /**
809 * Multiply right vectors bidiagonalizing L by Q in VT
810 * (Workspace: need 3*M+N, prefer 3*M+N*NB)
811 **/
812 dormbr( 'p', 'l', 't', m, n, m, a, lda,
813 &work_1( itaup ), vt, ldvt,
814 &work_1( iwork ), lwork-iwork+1, &ierr );
815 iwork = ie + m;
816 /**
817 * Perform bidiagonal QR iteration, computing right
818 * singular vectors of A in VT
819 * (Workspace: need 5*M-4)
820 **/
821 dbdsqr( 'u', m, n, 0, 0, s, &work_1( ie ), vt,
822 ldvt, dum, 1, dum, 1, &work_1( iwork ),
823 info );
824
825 }
826
827 } else if( wntuo ) {
828 /**
829 * Path 5t(N much larger than M, JOBU='O', JOBVT='S')
830 * M right singular vectors to be computed in VT and
831 * M left singular vectors to be overwritten on A
832 **/
833 if( lwork>=2*m*m+max( 4*m, 5*m-4 ) ) {
834 /**
835 * Sufficient workspace for a fast algorithm
836 **/
837 iu = 1;
838 if( lwork>=wrkbl+2*lda*m ) {
839 /**
840 * WORK(IU) is LDA by M and WORK(IR) is LDA by M
841 **/
842 ldwrku = lda;
843 ir = iu + ldwrku*m;
844 ldwrkr = lda;
845 } else if( lwork>=wrkbl+( lda+m )*m ) {
846 /**
847 * WORK(IU) is LDA by M and WORK(IR) is M by M
848 **/
849 ldwrku = lda;
850 ir = iu + ldwrku*m;
851 ldwrkr = m;
852 } else {
853 /**
854 * WORK(IU) is M by M and WORK(IR) is M by M
855 **/
856 ldwrku = m;
857 ir = iu + ldwrku*m;
858 ldwrkr = m;
859 }
860 itau = ir + ldwrkr*m;
861 iwork = itau + m;
862 /**
863 * Compute A=L*Q
864 * (Workspace: need 2*M*M+2*M, prefer 2*M*M+M+M*NB)
865 **/
866 dgelqf( m, n, a, lda, &work_1( itau ),
867 &work_1( iwork ), lwork-iwork+1, &ierr );
868 /**
869 * Copy L to WORK(IU), zeroing out below it
870 **/
871 dlacpy( 'l', m, m, a, lda, &work_1( iu ),
872 ldwrku );
873 dlaset( 'u', m-1, m-1, zero, zero,
874 &work_1( iu+ldwrku ), ldwrku );
875 /**
876 * Generate Q in A
877 * (Workspace: need 2*M*M+2*M, prefer 2*M*M+M+M*NB)
878 **/
879 dorglq( m, n, m, a, lda, &work_1( itau ),
880 &work_1( iwork ), lwork-iwork+1, &ierr );
881 ie = itau;
882 itauq = ie + m;
883 itaup = itauq + m;
884 iwork = itaup + m;
885 /**
886 * Bidiagonalize L in WORK(IU), copying result to
887 * WORK(IR)
888 * (Workspace: need 2*M*M+4*M,
889 * prefer 2*M*M+3*M+2*M*NB)
890 **/
891 dgebrd( m, m, &work_1( iu ), ldwrku, s,
892 &work_1( ie ), &work_1( itauq ),
893 &work_1( itaup ), &work_1( iwork ),
894 lwork-iwork+1, &ierr );
895 dlacpy( 'l', m, m, &work_1( iu ), ldwrku,
896 &work_1( ir ), ldwrkr );
897 /**
898 * Generate right bidiagonalizing vectors in WORK(IU)
899 * (Workspace: need 2*M*M+4*M-1,
900 * prefer 2*M*M+3*M+(M-1)*NB)
901 **/
902 dorgbr( 'p', m, m, m, &work_1( iu ), ldwrku,
903 &work_1( itaup ), &work_1( iwork ),
904 lwork-iwork+1, &ierr );
905 /**
906 * Generate left bidiagonalizing vectors in WORK(IR)
907 * (Workspace: need 2*M*M+4*M, prefer 2*M*M+3*M+M*NB)
908 **/
909 dorgbr( 'q', m, m, m, &work_1( ir ), ldwrkr,
910 &work_1( itauq ), &work_1( iwork ),
911 lwork-iwork+1, &ierr );
912 iwork = ie + m;
913 /**
914 * Perform bidiagonal QR iteration, computing left
915 * singular vectors of L in WORK(IR) and computing
916 * right singular vectors of L in WORK(IU)
917 * (Workspace: need 2*M*M+5*M-4)
918 **/
919 dbdsqr( 'u', m, m, m, 0, s, &work_1( ie ),
920 &work_1( iu ), ldwrku, &work_1( ir ),
921 ldwrkr, dum, 1, &work_1( iwork ), info );
922 /**
923 * Multiply right singular vectors of L in WORK(IU) by
924 * Q in A, storing result in VT
925 * (Workspace: need M*M)
926 **/
927 cblas_dgemm(CblasColMajor, CblasNoTrans, CblasNoTrans, m, n, m,
928 one, &work_1( iu ), ldwrku, a, lda, zero, vt, ldvt );
929 /**
930 * Copy left singular vectors of L to A
931 * (Workspace: need M*M)
932 **/
933 dlacpy( 'f', m, m, &work_1( ir ), ldwrkr, a,
934 lda );
935
936 } else {
937 /**
938 * Insufficient workspace for a fast algorithm
939 **/
940 itau = 1;
941 iwork = itau + m;
942 /**
943 * Compute A=L*Q, copying result to VT
944 * (Workspace: need 2*M, prefer M+M*NB)
945 **/
946 dgelqf( m, n, a, lda, &work_1( itau ),
947 &work_1( iwork ), lwork-iwork+1, &ierr );
948 dlacpy( 'u', m, n, a, lda, vt, ldvt );
949 /**
950 * Generate Q in VT
951 * (Workspace: need 2*M, prefer M+M*NB)
952 **/
953 dorglq( m, n, m, vt, ldvt, &work_1( itau ),
954 &work_1( iwork ), lwork-iwork+1, &ierr );
955 ie = itau;
956 itauq = ie + m;
957 itaup = itauq + m;
958 iwork = itaup + m;
959 /**
960 * Zero out above L in A
961 **/
962 dlaset( 'u', m-1, m-1, zero, zero, &a_2( 1, 2 ),
963 lda );
964 /**
965 * Bidiagonalize L in A
966 * (Workspace: need 4*M, prefer 3*M+2*M*NB)
967 **/
968 dgebrd( m, m, a, lda, s, &work_1( ie ),
969 &work_1( itauq ), &work_1( itaup ),
970 &work_1( iwork ), lwork-iwork+1, &ierr );
971 /**
972 * Multiply right vectors bidiagonalizing L by Q in VT
973 * (Workspace: need 3*M+N, prefer 3*M+N*NB)
974 **/
975 dormbr( 'p', 'l', 't', m, n, m, a, lda,
976 &work_1( itaup ), vt, ldvt,
977 &work_1( iwork ), lwork-iwork+1, &ierr );
978 /**
979 * Generate left bidiagonalizing vectors of L in A
980 * (Workspace: need 4*M, prefer 3*M+M*NB)
981 **/
982 dorgbr( 'q', m, m, m, a, lda, &work_1( itauq ),
983 &work_1( iwork ), lwork-iwork+1, &ierr );
984 iwork = ie + m;
985 /**
986 * Perform bidiagonal QR iteration, compute left
987 * singular vectors of A in A and compute right
988 * singular vectors of A in VT
989 * (Workspace: need 5*M-4)
990 **/
991 dbdsqr( 'u', m, n, m, 0, s, &work_1( ie ), vt,
992 ldvt, a, lda, dum, 1, &work_1( iwork ),
993 info );
994
995 }
996
997 } else if( wntuas ) {
998 /**
999 * Path 6t(N much larger than M, JOBU='S' or 'A',
1000 * JOBVT='S')
1001 * M right singular vectors to be computed in VT and
1002 * M left singular vectors to be computed in U
1003 **/
1004 if( lwork>=m*m+max( 4*m, 5*m-4 ) ) {
1005 /**
1006 * Sufficient workspace for a fast algorithm
1007 **/
1008 iu = 1;
1009 if( lwork>=wrkbl+lda*m ) {
1010 /**
1011 * WORK(IU) is LDA by N
1012 **/
1013 ldwrku = lda;
1014 } else {
1015 /**
1016 * WORK(IU) is LDA by M
1017 **/
1018 ldwrku = m;
1019 }
1020 itau = iu + ldwrku*m;
1021 iwork = itau + m;
1022 /**
1023 * Compute A=L*Q
1024 * (Workspace: need M*M+2*M, prefer M*M+M+M*NB)
1025 **/
1026 dgelqf( m, n, a, lda, &work_1( itau ),
1027 &work_1( iwork ), lwork-iwork+1, &ierr );
1028 /**
1029 * Copy L to WORK(IU), zeroing out above it
1030 **/
1031 dlacpy( 'l', m, m, a, lda, &work_1( iu ),
1032 ldwrku );
1033 dlaset( 'u', m-1, m-1, zero, zero,
1034 &work_1( iu+ldwrku ), ldwrku );
1035 /**
1036 * Generate Q in A
1037 * (Workspace: need M*M+2*M, prefer M*M+M+M*NB)
1038 **/
1039 dorglq( m, n, m, a, lda, &work_1( itau ),
1040 &work_1( iwork ), lwork-iwork+1, &ierr );
1041 ie = itau;
1042 itauq = ie + m;
1043 itaup = itauq + m;
1044 iwork = itaup + m;
1045 /**
1046 * Bidiagonalize L in WORK(IU), copying result to U
1047 * (Workspace: need M*M+4*M, prefer M*M+3*M+2*M*NB)
1048 **/
1049 dgebrd( m, m, &work_1( iu ), ldwrku, s,
1050 &work_1( ie ), &work_1( itauq ),
1051 &work_1( itaup ), &work_1( iwork ),
1052 lwork-iwork+1, &ierr );
1053 dlacpy( 'l', m, m, &work_1( iu ), ldwrku, u,
1054 ldu );
1055 /**
1056 * Generate right bidiagonalizing vectors in WORK(IU)
1057 * (Workspace: need M*M+4*M-1,
1058 * prefer M*M+3*M+(M-1)*NB)
1059 **/
1060 dorgbr( 'p', m, m, m, &work_1( iu ), ldwrku,
1061 &work_1( itaup ), &work_1( iwork ),
1062 lwork-iwork+1, &ierr );
1063 /**
1064 * Generate left bidiagonalizing vectors in U
1065 * (Workspace: need M*M+4*M, prefer M*M+3*M+M*NB)
1066 **/
1067 dorgbr( 'q', m, m, m, u, ldu, &work_1( itauq ),
1068 &work_1( iwork ), lwork-iwork+1, &ierr );
1069 iwork = ie + m;
1070 /**
1071 * Perform bidiagonal QR iteration, computing left
1072 * singular vectors of L in U and computing right
1073 * singular vectors of L in WORK(IU)
1074 * (Workspace: need M*M+5*M-4)
1075 **/
1076 dbdsqr( 'u', m, m, m, 0, s, &work_1( ie ),
1077 &work_1( iu ), ldwrku, u, ldu, dum, 1,
1078 &work_1( iwork ), info );
1079 /**
1080 * Multiply right singular vectors of L in WORK(IU) by
1081 * Q in A, storing result in VT
1082 * (Workspace: need M*M)
1083 **/
1084 cblas_dgemm(CblasColMajor, CblasNoTrans, CblasNoTrans, m, n, m,
1085 one, &work_1( iu ), ldwrku, a, lda, zero, vt, ldvt );
1086
1087 } else {
1088 /**
1089 * Insufficient workspace for a fast algorithm
1090 **/
1091 itau = 1;
1092 iwork = itau + m;
1093 /**
1094 * Compute A=L*Q, copying result to VT
1095 * (Workspace: need 2*M, prefer M+M*NB)
1096 **/
1097 dgelqf( m, n, a, lda, &work_1( itau ),
1098 &work_1( iwork ), lwork-iwork+1, &ierr );
1099 dlacpy( 'u', m, n, a, lda, vt, ldvt );
1100 /**
1101 * Generate Q in VT
1102 * (Workspace: need 2*M, prefer M+M*NB)
1103 **/
1104 dorglq( m, n, m, vt, ldvt, &work_1( itau ),
1105 &work_1( iwork ), lwork-iwork+1, &ierr );
1106 /**
1107 * Copy L to U, zeroing out above it
1108 **/
1109 dlacpy( 'l', m, m, a, lda, u, ldu );
1110 dlaset( 'u', m-1, m-1, zero, zero, &u_2( 1, 2 ),
1111 ldu );
1112 ie = itau;
1113 itauq = ie + m;
1114 itaup = itauq + m;
1115 iwork = itaup + m;
1116 /**
1117 * Bidiagonalize L in U
1118 * (Workspace: need 4*M, prefer 3*M+2*M*NB)
1119 **/
1120 dgebrd( m, m, u, ldu, s, &work_1( ie ),
1121 &work_1( itauq ), &work_1( itaup ),
1122 &work_1( iwork ), lwork-iwork+1, &ierr );
1123 /**
1124 * Multiply right bidiagonalizing vectors in U by Q
1125 * in VT
1126 * (Workspace: need 3*M+N, prefer 3*M+N*NB)
1127 **/
1128 dormbr( 'p', 'l', 't', m, n, m, u, ldu,
1129 &work_1( itaup ), vt, ldvt,
1130 &work_1( iwork ), lwork-iwork+1, &ierr );
1131 /**
1132 * Generate left bidiagonalizing vectors in U
1133 * (Workspace: need 4*M, prefer 3*M+M*NB)
1134 **/
1135 dorgbr( 'q', m, m, m, u, ldu, &work_1( itauq ),
1136 &work_1( iwork ), lwork-iwork+1, &ierr );
1137 iwork = ie + m;
1138 /**
1139 * Perform bidiagonal QR iteration, computing left
1140 * singular vectors of A in U and computing right
1141 * singular vectors of A in VT
1142 * (Workspace: need 5*M-4)
1143 **/
1144 dbdsqr( 'u', m, n, m, 0, s, &work_1( ie ), vt,
1145 ldvt, u, ldu, dum, 1, &work_1( iwork ),
1146 info );
1147
1148 }
1149
1150 }
1151
1152 } else if( wntva ) {
1153
1154 if( wntun ) {
1155 /**
1156 * Path 7t(N much larger than M, JOBU='N', JOBVT='A')
1157 * N right singular vectors to be computed in VT and
1158 * no left singular vectors to be computed
1159 **/
1160 if( lwork>=m*m+max( n+m, max(4*m, 5*m-4) ) ) {
1161 /**
1162 * Sufficient workspace for a fast algorithm
1163 **/
1164 ir = 1;
1165 if( lwork>=wrkbl+lda*m ) {
1166 /**
1167 * WORK(IR) is LDA by M
1168 **/
1169 ldwrkr = lda;
1170 } else {
1171 /**
1172 * WORK(IR) is M by M
1173 **/
1174 ldwrkr = m;
1175 }
1176 itau = ir + ldwrkr*m;
1177 iwork = itau + m;
1178 /**
1179 * Compute A=L*Q, copying result to VT
1180 * (Workspace: need M*M+2*M, prefer M*M+M+M*NB)
1181 **/
1182 dgelqf( m, n, a, lda, &work_1( itau ),
1183 &work_1( iwork ), lwork-iwork+1, &ierr );
1184 dlacpy( 'u', m, n, a, lda, vt, ldvt );
1185 /**
1186 * Copy L to WORK(IR), zeroing out above it
1187 **/
1188 dlacpy( 'l', m, m, a, lda, &work_1( ir ),
1189 ldwrkr );
1190 dlaset( 'u', m-1, m-1, zero, zero,
1191 &work_1( ir+ldwrkr ), ldwrkr );
1192 /**
1193 * Generate Q in VT
1194 * (Workspace: need M*M+M+N, prefer M*M+M+N*NB)
1195 **/
1196 dorglq( n, n, m, vt, ldvt, &work_1( itau ),
1197 &work_1( iwork ), lwork-iwork+1, &ierr );
1198 ie = itau;
1199 itauq = ie + m;
1200 itaup = itauq + m;
1201 iwork = itaup + m;
1202 /**
1203 * Bidiagonalize L in WORK(IR)
1204 * (Workspace: need M*M+4*M, prefer M*M+3*M+2*M*NB)
1205 **/
1206 dgebrd( m, m, &work_1( ir ), ldwrkr, s,
1207 &work_1( ie ), &work_1( itauq ),
1208 &work_1( itaup ), &work_1( iwork ),
1209 lwork-iwork+1, &ierr );
1210 /**
1211 * Generate right bidiagonalizing vectors in WORK(IR)
1212 * (Workspace: need M*M+4*M-1,
1213 * prefer M*M+3*M+(M-1)*NB)
1214 **/
1215 dorgbr( 'p', m, m, m, &work_1( ir ), ldwrkr,
1216 &work_1( itaup ), &work_1( iwork ),
1217 lwork-iwork+1, &ierr );
1218 iwork = ie + m;
1219 /**
1220 * Perform bidiagonal QR iteration, computing right
1221 * singular vectors of L in WORK(IR)
1222 * (Workspace: need M*M+5*M-4)
1223 **/
1224 dbdsqr( 'u', m, m, 0, 0, s, &work_1( ie ),
1225 &work_1( ir ), ldwrkr, dum, 1, dum, 1,
1226 &work_1( iwork ), info );
1227 /**
1228 * Multiply right singular vectors of L in WORK(IR) by
1229 * Q in VT, storing result in A
1230 * (Workspace: need M*M)
1231 **/
1232 cblas_dgemm(CblasColMajor, CblasNoTrans, CblasNoTrans, m, n, m,
1233 one, &work_1( ir ), ldwrkr, vt, ldvt, zero, a, lda );
1234 /**
1235 * Copy right singular vectors of A from A to VT
1236 **/
1237 dlacpy( 'f', m, n, a, lda, vt, ldvt );
1238
1239 } else {
1240 /**
1241 * Insufficient workspace for a fast algorithm
1242 **/
1243 itau = 1;
1244 iwork = itau + m;
1245 /**
1246 * Compute A=L*Q, copying result to VT
1247 * (Workspace: need 2*M, prefer M+M*NB)
1248 **/
1249 dgelqf( m, n, a, lda, &work_1( itau ),
1250 &work_1( iwork ), lwork-iwork+1, &ierr );
1251 dlacpy( 'u', m, n, a, lda, vt, ldvt );
1252 /**
1253 * Generate Q in VT
1254 * (Workspace: need M+N, prefer M+N*NB)
1255 **/
1256 dorglq( n, n, m, vt, ldvt, &work_1( itau ),
1257 &work_1( iwork ), lwork-iwork+1, &ierr );
1258 ie = itau;
1259 itauq = ie + m;
1260 itaup = itauq + m;
1261 iwork = itaup + m;
1262 /**
1263 * Zero out above L in A
1264 **/
1265 dlaset( 'u', m-1, m-1, zero, zero, &a_2( 1, 2 ),
1266 lda );
1267 /**
1268 * Bidiagonalize L in A
1269 * (Workspace: need 4*M, prefer 3*M+2*M*NB)
1270 **/
1271 dgebrd( m, m, a, lda, s, &work_1( ie ),
1272 &work_1( itauq ), &work_1( itaup ),
1273 &work_1( iwork ), lwork-iwork+1, &ierr );
1274 /**
1275 * Multiply right bidiagonalizing vectors in A by Q
1276 * in VT
1277 * (Workspace: need 3*M+N, prefer 3*M+N*NB)
1278 **/
1279 dormbr( 'p', 'l', 't', m, n, m, a, lda,
1280 &work_1( itaup ), vt, ldvt,
1281 &work_1( iwork ), lwork-iwork+1, &ierr );
1282 iwork = ie + m;
1283 /**
1284 * Perform bidiagonal QR iteration, computing right
1285 * singular vectors of A in VT
1286 * (Workspace: need 5*M-4)
1287 **/
1288 dbdsqr( 'u', m, n, 0, 0, s, &work_1( ie ), vt,
1289 ldvt, dum, 1, dum, 1, &work_1( iwork ),
1290 info );
1291
1292 }
1293
1294 } else if( wntuo ) {
1295 /**
1296 * Path 8t(N much larger than M, JOBU='O', JOBVT='A')
1297 * N right singular vectors to be computed in VT and
1298 * M left singular vectors to be overwritten on A
1299 **/
1300 if( lwork>=2*m*m+max( n+m, max(4*m, 5*m-4) ) ) {
1301 /**
1302 * Sufficient workspace for a fast algorithm
1303 **/
1304 iu = 1;
1305 if( lwork>=wrkbl+2*lda*m ) {
1306 /**
1307 * WORK(IU) is LDA by M and WORK(IR) is LDA by M
1308 **/
1309 ldwrku = lda;
1310 ir = iu + ldwrku*m;
1311 ldwrkr = lda;
1312 } else if( lwork>=wrkbl+( lda+m )*m ) {
1313 /**
1314 * WORK(IU) is LDA by M and WORK(IR) is M by M
1315 **/
1316 ldwrku = lda;
1317 ir = iu + ldwrku*m;
1318 ldwrkr = m;
1319 } else {
1320 /**
1321 * WORK(IU) is M by M and WORK(IR) is M by M
1322 **/
1323 ldwrku = m;
1324 ir = iu + ldwrku*m;
1325 ldwrkr = m;
1326 }
1327 itau = ir + ldwrkr*m;
1328 iwork = itau + m;
1329 /**
1330 * Compute A=L*Q, copying result to VT
1331 * (Workspace: need 2*M*M+2*M, prefer 2*M*M+M+M*NB)
1332 **/
1333 dgelqf( m, n, a, lda, &work_1( itau ),
1334 &work_1( iwork ), lwork-iwork+1, &ierr );
1335 dlacpy( 'u', m, n, a, lda, vt, ldvt );
1336 /**
1337 * Generate Q in VT
1338 * (Workspace: need 2*M*M+M+N, prefer 2*M*M+M+N*NB)
1339 **/
1340 dorglq( n, n, m, vt, ldvt, &work_1( itau ),
1341 &work_1( iwork ), lwork-iwork+1, &ierr );
1342 /**
1343 * Copy L to WORK(IU), zeroing out above it
1344 **/
1345 dlacpy( 'l', m, m, a, lda, &work_1( iu ),
1346 ldwrku );
1347 dlaset( 'u', m-1, m-1, zero, zero,
1348 &work_1( iu+ldwrku ), ldwrku );
1349 ie = itau;
1350 itauq = ie + m;
1351 itaup = itauq + m;
1352 iwork = itaup + m;
1353 /**
1354 * Bidiagonalize L in WORK(IU), copying result to
1355 * WORK(IR)
1356 * (Workspace: need 2*M*M+4*M,
1357 * prefer 2*M*M+3*M+2*M*NB)
1358 **/
1359 dgebrd( m, m, &work_1( iu ), ldwrku, s,
1360 &work_1( ie ), &work_1( itauq ),
1361 &work_1( itaup ), &work_1( iwork ),
1362 lwork-iwork+1, &ierr );
1363 dlacpy( 'l', m, m, &work_1( iu ), ldwrku,
1364 &work_1( ir ), ldwrkr );
1365 /**
1366 * Generate right bidiagonalizing vectors in WORK(IU)
1367 * (Workspace: need 2*M*M+4*M-1,
1368 * prefer 2*M*M+3*M+(M-1)*NB)
1369 **/
1370 dorgbr( 'p', m, m, m, &work_1( iu ), ldwrku,
1371 &work_1( itaup ), &work_1( iwork ),
1372 lwork-iwork+1, &ierr );
1373 /**
1374 * Generate left bidiagonalizing vectors in WORK(IR)
1375 * (Workspace: need 2*M*M+4*M, prefer 2*M*M+3*M+M*NB)
1376 **/
1377 dorgbr( 'q', m, m, m, &work_1( ir ), ldwrkr,
1378 &work_1( itauq ), &work_1( iwork ),
1379 lwork-iwork+1, &ierr );
1380 iwork = ie + m;
1381 /**
1382 * Perform bidiagonal QR iteration, computing left
1383 * singular vectors of L in WORK(IR) and computing
1384 * right singular vectors of L in WORK(IU)
1385 * (Workspace: need 2*M*M+5*M-4)
1386 **/
1387 dbdsqr( 'u', m, m, m, 0, s, &work_1( ie ),
1388 &work_1( iu ), ldwrku, &work_1( ir ),
1389 ldwrkr, dum, 1, &work_1( iwork ), info );
1390 /**
1391 * Multiply right singular vectors of L in WORK(IU) by
1392 * Q in VT, storing result in A
1393 * (Workspace: need M*M)
1394 **/
1395 cblas_dgemm(CblasColMajor, CblasNoTrans, CblasNoTrans, m, n, m,
1396 one, &work_1( iu ), ldwrku, vt, ldvt, zero, a, lda );
1397 /**
1398 * Copy right singular vectors of A from A to VT
1399 **/
1400 dlacpy( 'f', m, n, a, lda, vt, ldvt );
1401 /**
1402 * Copy left singular vectors of A from WORK(IR) to A
1403 **/
1404 dlacpy( 'f', m, m, &work_1( ir ), ldwrkr, a,
1405 lda );
1406
1407 } else {
1408 /**
1409 * Insufficient workspace for a fast algorithm
1410 **/
1411 itau = 1;
1412 iwork = itau + m;
1413 /**
1414 * Compute A=L*Q, copying result to VT
1415 * (Workspace: need 2*M, prefer M+M*NB)
1416 **/
1417 dgelqf( m, n, a, lda, &work_1( itau ),
1418 &work_1( iwork ), lwork-iwork+1, &ierr );
1419 dlacpy( 'u', m, n, a, lda, vt, ldvt );
1420 /**
1421 * Generate Q in VT
1422 * (Workspace: need M+N, prefer M+N*NB)
1423 **/
1424 dorglq( n, n, m, vt, ldvt, &work_1( itau ),
1425 &work_1( iwork ), lwork-iwork+1, &ierr );
1426 ie = itau;
1427 itauq = ie + m;
1428 itaup = itauq + m;
1429 iwork = itaup + m;
1430 /**
1431 * Zero out above L in A
1432 **/
1433 dlaset( 'u', m-1, m-1, zero, zero, &a_2( 1, 2 ),
1434 lda );
1435 /**
1436 * Bidiagonalize L in A
1437 * (Workspace: need 4*M, prefer 3*M+2*M*NB)
1438 **/
1439 dgebrd( m, m, a, lda, s, &work_1( ie ),
1440 &work_1( itauq ), &work_1( itaup ),
1441 &work_1( iwork ), lwork-iwork+1, &ierr );
1442 /**
1443 * Multiply right bidiagonalizing vectors in A by Q
1444 * in VT
1445 * (Workspace: need 3*M+N, prefer 3*M+N*NB)
1446 **/
1447 dormbr( 'p', 'l', 't', m, n, m, a, lda,
1448 &work_1( itaup ), vt, ldvt,
1449 &work_1( iwork ), lwork-iwork+1, &ierr );
1450 /**
1451 * Generate left bidiagonalizing vectors in A
1452 * (Workspace: need 4*M, prefer 3*M+M*NB)
1453 **/
1454 dorgbr( 'q', m, m, m, a, lda, &work_1( itauq ),
1455 &work_1( iwork ), lwork-iwork+1, &ierr );
1456 iwork = ie + m;
1457 /**
1458 * Perform bidiagonal QR iteration, computing left
1459 * singular vectors of A in A and computing right
1460 * singular vectors of A in VT
1461 * (Workspace: need 5*M-4)
1462 **/
1463 dbdsqr( 'u', m, n, m, 0, s, &work_1( ie ), vt,
1464 ldvt, a, lda, dum, 1, &work_1( iwork ),
1465 info );
1466
1467 }
1468
1469 } else if( wntuas ) {
1470 /**
1471 * Path 9t(N much larger than M, JOBU='S' or 'A',
1472 * JOBVT='A')
1473 * N right singular vectors to be computed in VT and
1474 * M left singular vectors to be computed in U
1475 **/
1476 if( lwork>=m*m+max( n+m, max(4*m, 5*m-4) ) ) {
1477 /**
1478 * Sufficient workspace for a fast algorithm
1479 **/
1480 iu = 1;
1481 if( lwork>=wrkbl+lda*m ) {
1482 /**
1483 * WORK(IU) is LDA by M
1484 **/
1485 ldwrku = lda;
1486 } else {
1487 /**
1488 * WORK(IU) is M by M
1489 **/
1490 ldwrku = m;
1491 }
1492 itau = iu + ldwrku*m;
1493 iwork = itau + m;
1494 /**
1495 * Compute A=L*Q, copying result to VT
1496 * (Workspace: need M*M+2*M, prefer M*M+M+M*NB)
1497 **/
1498 dgelqf( m, n, a, lda, &work_1( itau ),
1499 &work_1( iwork ), lwork-iwork+1, &ierr );
1500 dlacpy( 'u', m, n, a, lda, vt, ldvt );
1501 /**
1502 * Generate Q in VT
1503 * (Workspace: need M*M+M+N, prefer M*M+M+N*NB)
1504 **/
1505 dorglq( n, n, m, vt, ldvt, &work_1( itau ),
1506 &work_1( iwork ), lwork-iwork+1, &ierr );
1507 /**
1508 * Copy L to WORK(IU), zeroing out above it
1509 **/
1510 dlacpy( 'l', m, m, a, lda, &work_1( iu ),
1511 ldwrku );
1512 dlaset( 'u', m-1, m-1, zero, zero,
1513 &work_1( iu+ldwrku ), ldwrku );
1514 ie = itau;
1515 itauq = ie + m;
1516 itaup = itauq + m;
1517 iwork = itaup + m;
1518 /**
1519 * Bidiagonalize L in WORK(IU), copying result to U
1520 * (Workspace: need M*M+4*M, prefer M*M+3*M+2*M*NB)
1521 **/
1522 dgebrd( m, m, &work_1( iu ), ldwrku, s,
1523 &work_1( ie ), &work_1( itauq ),
1524 &work_1( itaup ), &work_1( iwork ),
1525 lwork-iwork+1, &ierr );
1526 dlacpy( 'l', m, m, &work_1( iu ), ldwrku, u,
1527 ldu );
1528 /**
1529 * Generate right bidiagonalizing vectors in WORK(IU)
1530 * (Workspace: need M*M+4*M, prefer M*M+3*M+(M-1)*NB)
1531 **/
1532 dorgbr( 'p', m, m, m, &work_1( iu ), ldwrku,
1533 &work_1( itaup ), &work_1( iwork ),
1534 lwork-iwork+1, &ierr );
1535 /**
1536 * Generate left bidiagonalizing vectors in U
1537 * (Workspace: need M*M+4*M, prefer M*M+3*M+M*NB)
1538 **/
1539 dorgbr( 'q', m, m, m, u, ldu, &work_1( itauq ),
1540 &work_1( iwork ), lwork-iwork+1, &ierr );
1541 iwork = ie + m;
1542 /**
1543 * Perform bidiagonal QR iteration, computing left
1544 * singular vectors of L in U and computing right
1545 * singular vectors of L in WORK(IU)
1546 * (Workspace: need M*M+5*M-4)
1547 **/
1548 dbdsqr( 'u', m, m, m, 0, s, &work_1( ie ),
1549 &work_1( iu ), ldwrku, u, ldu, dum, 1,
1550 &work_1( iwork ), info );
1551 /**
1552 * Multiply right singular vectors of L in WORK(IU) by
1553 * Q in VT, storing result in A
1554 * (Workspace: need M*M)
1555 **/
1556 cblas_dgemm(CblasColMajor, CblasNoTrans, CblasNoTrans, m, n, m,
1557 one, &work_1( iu ), ldwrku, vt, ldvt, zero, a, lda );
1558 /**
1559 * Copy right singular vectors of A from A to VT
1560 **/
1561 dlacpy( 'f', m, n, a, lda, vt, ldvt );
1562
1563 } else {
1564 /**
1565 * Insufficient workspace for a fast algorithm
1566 **/
1567 itau = 1;
1568 iwork = itau + m;
1569 /**
1570 * Compute A=L*Q, copying result to VT
1571 * (Workspace: need 2*M, prefer M+M*NB)
1572 **/
1573 dgelqf( m, n, a, lda, &work_1( itau ),
1574 &work_1( iwork ), lwork-iwork+1, &ierr );
1575 dlacpy( 'u', m, n, a, lda, vt, ldvt );
1576 /**
1577 * Generate Q in VT
1578 * (Workspace: need M+N, prefer M+N*NB)
1579 **/
1580 dorglq( n, n, m, vt, ldvt, &work_1( itau ),
1581 &work_1( iwork ), lwork-iwork+1, &ierr );
1582 /**
1583 * Copy L to U, zeroing out above it
1584 **/
1585 dlacpy( 'l', m, m, a, lda, u, ldu );
1586 dlaset( 'u', m-1, m-1, zero, zero, &u_2( 1, 2 ),
1587 ldu );
1588 ie = itau;
1589 itauq = ie + m;
1590 itaup = itauq + m;
1591 iwork = itaup + m;
1592 /**
1593 * Bidiagonalize L in U
1594 * (Workspace: need 4*M, prefer 3*M+2*M*NB)
1595 **/
1596 dgebrd( m, m, u, ldu, s, &work_1( ie ),
1597 &work_1( itauq ), &work_1( itaup ),
1598 &work_1( iwork ), lwork-iwork+1, &ierr );
1599 /**
1600 * Multiply right bidiagonalizing vectors in U by Q
1601 * in VT
1602 * (Workspace: need 3*M+N, prefer 3*M+N*NB)
1603 **/
1604 dormbr( 'p', 'l', 't', m, n, m, u, ldu,
1605 &work_1( itaup ), vt, ldvt,
1606 &work_1( iwork ), lwork-iwork+1, &ierr );
1607 /**
1608 * Generate left bidiagonalizing vectors in U
1609 * (Workspace: need 4*M, prefer 3*M+M*NB)
1610 **/
1611 dorgbr( 'q', m, m, m, u, ldu, &work_1( itauq ),
1612 &work_1( iwork ), lwork-iwork+1, &ierr );
1613 iwork = ie + m;
1614 /**
1615 * Perform bidiagonal QR iteration, computing left
1616 * singular vectors of A in U and computing right
1617 * singular vectors of A in VT
1618 * (Workspace: need 5*M-4)
1619 **/
1620 dbdsqr( 'u', m, n, m, 0, s, &work_1( ie ), vt,
1621 ldvt, u, ldu, dum, 1, &work_1( iwork ),
1622 info );
1623
1624 }
1625
1626 }
1627
1628 }
1629
1630 } else {
1631 /**
1632 * N .LT. MNTHR
1633 *
1634 * Path 10t(N greater than M, but not much larger)
1635 * Reduce to bidiagonal form without LQ decomposition
1636 **/
1637 ie = 1;
1638 itauq = ie + m;
1639 itaup = itauq + m;
1640 iwork = itaup + m;
1641 /**
1642 * Bidiagonalize A
1643 * (Workspace: need 3*M+N, prefer 3*M+(M+N)*NB)
1644 **/
1645 dgebrd( m, n, a, lda, s, &work_1( ie ), &work_1( itauq ),
1646 &work_1( itaup ), &work_1( iwork ), lwork-iwork+1,
1647 &ierr );
1648 if( wntuas ) {
1649 /**
1650 * If left singular vectors desired in U, copy result to U
1651 * and generate left bidiagonalizing vectors in U
1652 * (Workspace: need 4*M-1, prefer 3*M+(M-1)*NB)
1653 **/
1654 dlacpy( 'l', m, m, a, lda, u, ldu );
1655 dorgbr( 'q', m, m, n, u, ldu, &work_1( itauq ),
1656 &work_1( iwork ), lwork-iwork+1, &ierr );
1657 }
1658 if( wntvas ) {
1659 /**
1660 * If right singular vectors desired in VT, copy result to
1661 * VT and generate right bidiagonalizing vectors in VT
1662 * (Workspace: need 3*M+NRVT, prefer 3*M+NRVT*NB)
1663 **/
1664 dlacpy( 'u', m, n, a, lda, vt, ldvt );
1665 if( wntva )
1666 nrvt = n;
1667 if( wntvs )
1668 nrvt = m;
1669 dorgbr( 'p', nrvt, n, m, vt, ldvt, &work_1( itaup ),
1670 &work_1( iwork ), lwork-iwork+1, &ierr );
1671 }
1672 if( wntuo ) {
1673 /**
1674 * If left singular vectors desired in A, generate left
1675 * bidiagonalizing vectors in A
1676 * (Workspace: need 4*M-1, prefer 3*M+(M-1)*NB)
1677 **/
1678 dorgbr( 'q', m, m, n, a, lda, &work_1( itauq ),
1679 &work_1( iwork ), lwork-iwork+1, &ierr );
1680 }
1681 if( wntvo ) {
1682 /**
1683 * If right singular vectors desired in A, generate right
1684 * bidiagonalizing vectors in A
1685 * (Workspace: need 4*M, prefer 3*M+M*NB)
1686 **/
1687 dorgbr( 'p', m, n, m, a, lda, &work_1( itaup ),
1688 &work_1( iwork ), lwork-iwork+1, &ierr );
1689 }
1690 iwork = ie + m;
1691 if( wntuas || wntuo )
1692 nru = m;
1693 if( wntun )
1694 nru = 0;
1695 if( wntvas || wntvo )
1696 ncvt = n;
1697 if( wntvn )
1698 ncvt = 0;
1699 if( ( !wntuo ) && ( !wntvo ) ) {
1700 /**
1701 * Perform bidiagonal QR iteration, if desired, computing
1702 * left singular vectors in U and computing right singular
1703 * vectors in VT
1704 * (Workspace: need 5*M-4)
1705 **/
1706 dbdsqr( 'l', m, ncvt, nru, 0, s, &work_1( ie ), vt,
1707 ldvt, u, ldu, dum, 1, &work_1( iwork ), info );
1708 } else if( ( !wntuo ) && wntvo ) {
1709 /**
1710 * Perform bidiagonal QR iteration, if desired, computing
1711 * left singular vectors in U and computing right singular
1712 * vectors in A
1713 * (Workspace: need 5*M-4)
1714 **/
1715 dbdsqr( 'l', m, ncvt, nru, 0, s, &work_1( ie ), a, lda,
1716 u, ldu, dum, 1, &work_1( iwork ), info );
1717 } else {
1718 /**
1719 * Perform bidiagonal QR iteration, if desired, computing
1720 * left singular vectors in A and computing right singular
1721 * vectors in VT
1722 * (Workspace: need 5*M-4)
1723 **/
1724 dbdsqr( 'l', m, ncvt, nru, 0, s, &work_1( ie ), vt,
1725 ldvt, a, lda, dum, 1, &work_1( iwork ), info );
1726 }
1727
1728 }
1729
1730 /**
1731 * If DBDSQR failed to converge, copy unconverged superdiagonals
1732 * to WORK( 2:MINMN )
1733 **/
1734 if( *info!=0 ) {
1735 if( ie>2 ) {
1736 for (i=1 ; i<=minmn - 1 ; i+=1) {
1737 work_1( i+1 ) = work_1( i+ie-1 );
1738 }
1739 }
1740 if( ie<2 ) {
1741 for (i=minmn - 1 ; i>=1 ; i+=-1) {
1742 work_1( i+1 ) = work_1( i+ie-1 );
1743 }
1744 }
1745 }
1746 /**
1747 * Undo scaling if necessary
1748 **/
1749 if( iscl==1 ) {
1750 if( anrm>bignum )
1751 dlascl( 'g', 0, 0, bignum, anrm, minmn, 1, s, minmn,
1752 &ierr );
1753 if( *info!=0 && anrm>bignum )
1754 dlascl( 'g', 0, 0, bignum, anrm, minmn-1, 1, &work_1( 2 ),
1755 minmn, &ierr );
1756 if( anrm<smlnum )
1757 dlascl( 'g', 0, 0, smlnum, anrm, minmn, 1, s, minmn,
1758 &ierr );
1759 if( *info!=0 && anrm<smlnum )
1760 dlascl( 'g', 0, 0, smlnum, anrm, minmn-1, 1, &work_1( 2 ),
1761 minmn, &ierr );
1762 }
1763 /**
1764 * Return optimal workspace in WORK(1)
1765 **/
1766 work_1( 1 ) = maxwrk;
1767
1768 return;
1769 /**
1770 * End of DGESVD
1771 **/
1772 }
1773