1 /*
2  * $Id: dgesv2.c,v 1.1 2005-09-18 22:04:46 dhmunro Exp $
3  * LAPACK routine to return the SVD of a matrix, m<n branch (see dgesvd.c).
4  */
5 /* Copyright (c) 2005, The Regents of the University of California.
6  * All rights reserved.
7  * This file is part of yorick (http://yorick.sourceforge.net).
8  * Read the accompanying LICENSE file for details.
9  */
10 
11 #include "dg.h"
12 
13 extern void dgesv2( char jobu, char jobvt, long m, long n,
14                    double a[], long lda, double s[], double u[], long ldu,
15                    double vt[], long ldvt,
16                    double work[], long lwork, long *info );
17 
18 /*---blas routines---*/
19 /* dgemm */
20 
21 
22 
23 /*---converted nutty string switches to single characters (lower case)---*/
24 #define lsame(x,y) ((x)==(y))
25 
26 
27 /*-----Fortran intrinsics converted-----*/
28 extern double sqrt(double);
29 #define min(x,y) ((x)<(y)?(x):(y))
30 #define max(x,y) ((x)>(y)?(x):(y))
31 /*-----end of Fortran intrinsics-----*/
32 
33 
34 
dgesv2(char jobu,char jobvt,long m,long n,double a[],long lda,double s[],double u[],long ldu,double vt[],long ldvt,double work[],long lwork,long * info)35 void dgesv2( char jobu, char jobvt, long m, long n,
36             double a[], long lda, double s[], double u[], long ldu,
37             double vt[], long ldvt,
38             double work[], long lwork, long *info )
39 {
40   /**
41    *  -- LAPACK driver routine (version 1.1) --
42    *     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
43    *     Courant Institute, Argonne National Lab, and Rice University
44    *     March 31, 1993
45    *
46    * This is m<n branch of original dgesvd routine (DHM).
47    *
48    *     .. Scalar Arguments ..*/
49   /**     ..
50    *     .. Array Arguments ..*/
51 #undef work_1
52 #define work_1(a1) work[a1-1]
53 #undef vt_2
54 #define vt_2(a1,a2) vt[a1-1+ldvt*(a2-1)]
55 #undef u_2
56 #define u_2(a1,a2) u[a1-1+ldu*(a2-1)]
57 #undef s_1
58 #define s_1(a1) s[a1-1]
59 #undef a_2
60 #define a_2(a1,a2) a[a1-1+lda*(a2-1)]
61   /*     .. Parameters ..*/
62 #undef zero
63 #define zero 0.0e0
64 #undef one
65 #define one 1.0e0
66   /**     ..
67    *     .. Local Scalars ..*/
68   int            wntua, wntuas, wntun, wntuo, wntus, wntva,
69   wntvas, wntvn, wntvo, wntvs;
70   long   blk, chunk, i, ie=0, ierr, ir, iscl, itau, itaup,
71          itauq, iu, iwork, ldwrkr, ldwrku, maxwrk=0,
72          minmn, minwrk, mnthr, ncvt=0, nru=0, nrvt=0,
73          wrkbl=0;
74   double    anrm, bignum, eps, smlnum;
75   char job_u_vt[3];
76   /**     ..
77    *     .. Local Arrays ..*/
78   double    dum[1];
79 #undef dum_1
80 #define dum_1(a1) [a1-1]
81   /**     ..
82    *     .. Intrinsic Functions ..*/
83   /*      intrinsic          max, min, sqrt;*/
84   /**     ..
85    *     .. Executable Statements ..
86    *
87    *     Test the input arguments
88    **/
89   /*-----implicit-declarations-----*/
90   /*-----end-of-declarations-----*/
91   *info = 0;
92   minmn = min( m, n );
93   job_u_vt[0]= jobu;
94   job_u_vt[1]= jobvt;
95   job_u_vt[2]= '\0';
96   mnthr = ilaenv( 6, "dgesvd", job_u_vt, m, n, 0, 0 );
97   wntua = lsame( jobu, 'a' );
98   wntus = lsame( jobu, 's' );
99   wntuas = wntua || wntus;
100   wntuo = lsame( jobu, 'o' );
101   wntun = lsame( jobu, 'n' );
102   wntva = lsame( jobvt, 'a' );
103   wntvs = lsame( jobvt, 's' );
104   wntvas = wntva || wntvs;
105   wntvo = lsame( jobvt, 'o' );
106   wntvn = lsame( jobvt, 'n' );
107   minwrk = 1;
108 
109   if( !( wntua || wntus || wntuo || wntun ) ) {
110     *info = -1;
111   } else if( !( wntva || wntvs || wntvo || wntvn ) ||
112             ( wntvo && wntuo ) ) {
113     *info = -2;
114   } else if( m<0 ) {
115     *info = -3;
116   } else if( n<0 ) {
117     *info = -4;
118   } else if( lda<max( 1, m ) ) {
119     *info = -6;
120   } else if( ldu<1 || ( wntuas && ldu<m ) ) {
121     *info = -9;
122   } else if( ldvt<1 || ( wntva && ldvt<n ) ||
123             ( wntvs && ldvt<minmn ) ) {
124     *info = -11;
125   }
126   /**
127    *     Compute workspace
128    *      (Note: Comments in the code beginning "Workspace:" describe the
129    *       minimal amount of workspace needed at that point in the code,
130    *       as well as the preferred amount for good performance.
131    *       NB refers to the optimal block size for the immediately
132    *       following subroutine, as returned by ILAENV.)
133    **/
134   if( *info==0 && lwork>=1 && m>0 && n>0 ) {
135     if( n>=mnthr ) {
136       if( wntvn ) {
137         /**
138          *                 Path 1t(N much larger than M, JOBVT='N')
139          **/
140         maxwrk = m + m*ilaenv( 1, "dgelqf", " ", m, n, -1,
141                               -1 );
142         maxwrk = max( maxwrk, 3*m+2*m*
143                      ilaenv( 1, "dgebrd", " ", m, m, -1, -1 ) );
144         if( wntuo || wntuas )
145           maxwrk = max( maxwrk, 3*m+m*
146                        ilaenv( 1, "dorgbr", "q", m, m, m, -1 ) );
147         maxwrk = max( maxwrk, 5*m-4 );
148         minwrk = max( 4*m, 5*m-4 );
149       } else if( wntvo && wntun ) {
150         /**
151          *                 Path 2t(N much larger than M, JOBU='N', JOBVT='O')
152          **/
153         wrkbl = m + m*ilaenv( 1, "dgelqf", " ", m, n, -1, -1 );
154         wrkbl = max( wrkbl, m+m*ilaenv( 1, "dorglq", " ", m,
155                                        n, m, -1 ) );
156         wrkbl = max( wrkbl, 3*m+2*m*
157                     ilaenv( 1, "dgebrd", " ", m, m, -1, -1 ) );
158         wrkbl = max( wrkbl, 3*m+( m-1 )*
159                     ilaenv( 1, "dorgbr", "p", m, m, m, -1 ) );
160         wrkbl = max( wrkbl, 5*m-4 );
161         maxwrk = max( m*m+wrkbl, m*m+m*n+m );
162         minwrk = max( 3*m+n, 5*m-4 );
163         minwrk = min( minwrk, maxwrk );
164       } else if( wntvo && wntuas ) {
165         /**
166          *                 Path 3t(N much larger than M, JOBU='S' or 'A',
167          *                 JOBVT='O')
168          **/
169         wrkbl = m + m*ilaenv( 1, "dgelqf", " ", m, n, -1, -1 );
170         wrkbl = max( wrkbl, m+m*ilaenv( 1, "dorglq", " ", m,
171                                        n, m, -1 ) );
172         wrkbl = max( wrkbl, 3*m+2*m*
173                     ilaenv( 1, "dgebrd", " ", m, m, -1, -1 ) );
174         wrkbl = max( wrkbl, 3*m+( m-1 )*
175                     ilaenv( 1, "dorgbr", "p", m, m, m, -1 ) );
176         wrkbl = max( wrkbl, 3*m+m*
177                     ilaenv( 1, "dorgbr", "q", m, m, m, -1 ) );
178         wrkbl = max( wrkbl, 5*m-4 );
179         maxwrk = max( m*m+wrkbl, m*m+m*n+m );
180         minwrk = max( 3*m+n, 5*m-4 );
181         minwrk = min( minwrk, maxwrk );
182       } else if( wntvs && wntun ) {
183         /**
184          *                 Path 4t(N much larger than M, JOBU='N', JOBVT='S')
185          **/
186         wrkbl = m + m*ilaenv( 1, "dgelqf", " ", m, n, -1, -1 );
187         wrkbl = max( wrkbl, m+m*ilaenv( 1, "dorglq", " ", m,
188                                        n, m, -1 ) );
189         wrkbl = max( wrkbl, 3*m+2*m*
190                     ilaenv( 1, "dgebrd", " ", m, m, -1, -1 ) );
191         wrkbl = max( wrkbl, 3*m+( m-1 )*
192                     ilaenv( 1, "dorgbr", "p", m, m, m, -1 ) );
193         wrkbl = max( wrkbl, 5*m-4 );
194         maxwrk = m*m + wrkbl;
195         minwrk = max( 3*m+n, 5*m-4 );
196         minwrk = min( minwrk, maxwrk );
197       } else if( wntvs && wntuo ) {
198         /**
199          *                 Path 5t(N much larger than M, JOBU='O', JOBVT='S')
200          **/
201         wrkbl = m + m*ilaenv( 1, "dgelqf", " ", m, n, -1, -1 );
202         wrkbl = max( wrkbl, m+m*ilaenv( 1, "dorglq", " ", m,
203                                        n, m, -1 ) );
204         wrkbl = max( wrkbl, 3*m+2*m*
205                     ilaenv( 1, "dgebrd", " ", m, m, -1, -1 ) );
206         wrkbl = max( wrkbl, 3*m+( m-1 )*
207                     ilaenv( 1, "dorgbr", "p", m, m, m, -1 ) );
208         wrkbl = max( wrkbl, 3*m+m*
209                     ilaenv( 1, "dorgbr", "q", m, m, m, -1 ) );
210         wrkbl = max( wrkbl, 5*m-4 );
211         maxwrk = 2*m*m + wrkbl;
212         minwrk = max( 3*m+n, 5*m-4 );
213         minwrk = min( minwrk, maxwrk );
214       } else if( wntvs && wntuas ) {
215         /**
216          *                 Path 6t(N much larger than M, JOBU='S' or 'A',
217          *                 JOBVT='S')
218          **/
219         wrkbl = m + m*ilaenv( 1, "dgelqf", " ", m, n, -1, -1 );
220         wrkbl = max( wrkbl, m+m*ilaenv( 1, "dorglq", " ", m,
221                                        n, m, -1 ) );
222         wrkbl = max( wrkbl, 3*m+2*m*
223                     ilaenv( 1, "dgebrd", " ", m, m, -1, -1 ) );
224         wrkbl = max( wrkbl, 3*m+( m-1 )*
225                     ilaenv( 1, "dorgbr", "p", m, m, m, -1 ) );
226         wrkbl = max( wrkbl, 3*m+m*
227                     ilaenv( 1, "dorgbr", "q", m, m, m, -1 ) );
228         wrkbl = max( wrkbl, 5*m-4 );
229         maxwrk = m*m + wrkbl;
230         minwrk = max( 3*m+n, 5*m-4 );
231         minwrk = min( minwrk, maxwrk );
232       } else if( wntva && wntun ) {
233         /**
234          *                 Path 7t(N much larger than M, JOBU='N', JOBVT='A')
235          **/
236         wrkbl = m + m*ilaenv( 1, "dgelqf", " ", m, n, -1, -1 );
237         wrkbl = max( wrkbl, m+n*ilaenv( 1, "dorglq", " ", n,
238                                        n, m, -1 ) );
239         wrkbl = max( wrkbl, 3*m+2*m*
240                     ilaenv( 1, "dgebrd", " ", m, m, -1, -1 ) );
241         wrkbl = max( wrkbl, 3*m+( m-1 )*
242                     ilaenv( 1, "dorgbr", "p", m, m, m, -1 ) );
243         wrkbl = max( wrkbl, 5*m-4 );
244         maxwrk = m*m + wrkbl;
245         minwrk = max( 3*m+n, 5*m-4 );
246         minwrk = min( minwrk, maxwrk );
247       } else if( wntva && wntuo ) {
248         /**
249          *                 Path 8t(N much larger than M, JOBU='O', JOBVT='A')
250          **/
251         wrkbl = m + m*ilaenv( 1, "dgelqf", " ", m, n, -1, -1 );
252         wrkbl = max( wrkbl, m+n*ilaenv( 1, "dorglq", " ", n,
253                                        n, m, -1 ) );
254         wrkbl = max( wrkbl, 3*m+2*m*
255                     ilaenv( 1, "dgebrd", " ", m, m, -1, -1 ) );
256         wrkbl = max( wrkbl, 3*m+( m-1 )*
257                     ilaenv( 1, "dorgbr", "p", m, m, m, -1 ) );
258         wrkbl = max( wrkbl, 3*m+m*
259                     ilaenv( 1, "dorgbr", "q", m, m, m, -1 ) );
260         wrkbl = max( wrkbl, 5*m-4 );
261         maxwrk = 2*m*m + wrkbl;
262         minwrk = max( 3*m+n, 5*m-4 );
263         minwrk = min( minwrk, maxwrk );
264       } else if( wntva && wntuas ) {
265         /**
266          *                 Path 9t(N much larger than M, JOBU='S' or 'A',
267          *                 JOBVT='A')
268          **/
269         wrkbl = m + m*ilaenv( 1, "dgelqf", " ", m, n, -1, -1 );
270         wrkbl = max( wrkbl, m+n*ilaenv( 1, "dorglq", " ", n,
271                                        n, m, -1 ) );
272         wrkbl = max( wrkbl, 3*m+2*m*
273                     ilaenv( 1, "dgebrd", " ", m, m, -1, -1 ) );
274         wrkbl = max( wrkbl, 3*m+( m-1 )*
275                     ilaenv( 1, "dorgbr", "p", m, m, m, -1 ) );
276         wrkbl = max( wrkbl, 3*m+m*
277                     ilaenv( 1, "dorgbr", "q", m, m, m, -1 ) );
278         wrkbl = max( wrkbl, 5*m-4 );
279         maxwrk = m*m + wrkbl;
280         minwrk = max( 3*m+n, 5*m-4 );
281         minwrk = min( minwrk, maxwrk );
282       }
283     } else {
284       /**
285        *              Path 10t(N greater than M, but not much larger)
286        **/
287       maxwrk = 3*m + ( m+n )*ilaenv( 1, "dgebrd", " ", m, n,
288                                     -1, -1 );
289       if( wntvs || wntvo )
290         maxwrk = max( maxwrk, 3*m+m*
291                      ilaenv( 1, "dorgbr", "p", m, n, m, -1 ) );
292       if( wntva )
293         maxwrk = max( maxwrk, 3*m+n*
294                      ilaenv( 1, "dorgbr", "p", n, n, m, -1 ) );
295       if( !wntun )
296         maxwrk = max( maxwrk, 3*m+( m-1 )*
297                      ilaenv( 1, "dorgbr", "q", m, m, m, -1 ) );
298       maxwrk = max( maxwrk, 5*m-4 );
299       minwrk = max( 3*m+n, 5*m-4 );
300     }
301     work_1( 1 ) = maxwrk;
302   }
303 
304   if( lwork<minwrk ) {
305     *info = -13;
306   }
307   if( *info!=0 ) {
308     xerbla( "dgesvd", -*info );
309     return;
310   }
311   /**
312    *     Quick return if possible
313    **/
314   if( m==0 || n==0 ) {
315     if( lwork>=1 )
316       work_1( 1 ) = one;
317     return;
318   }
319   /**
320    *     Get machine constants
321    **/
322   eps = dlamch( 'p' );
323   smlnum = sqrt( dlamch( 's' ) ) / eps;
324   bignum = one / smlnum;
325   /**
326    *     Scale A if max entry outside range [SMLNUM,BIGNUM]
327    **/
328   anrm = dlange( 'm', m, n, a, lda, dum );
329   iscl = 0;
330   if( anrm>zero && anrm<smlnum ) {
331     iscl = 1;
332     dlascl( 'g', 0, 0, anrm, smlnum, m, n, a, lda, &ierr );
333   } else if( anrm>bignum ) {
334     iscl = 1;
335     dlascl( 'g', 0, 0, anrm, bignum, m, n, a, lda, &ierr );
336   }
337 
338   /**
339    *        A has more columns than rows. If A has sufficiently more
340    *        columns than rows, first reduce using the LQ decomposition (if
341    *        sufficient workspace available)
342    **/
343   if( n>=mnthr ) {
344 
345     if( wntvn ) {
346       /**
347        *              Path 1t(N much larger than M, JOBVT='N')
348        *              No right singular vectors to be computed
349        **/
350       itau = 1;
351       iwork = itau + m;
352       /**
353        *              Compute A=L*Q
354        *              (Workspace: need 2*M, prefer M+M*NB)
355        **/
356       dgelqf( m, n, a, lda, &work_1( itau ), &work_1( iwork ),
357              lwork-iwork+1, &ierr );
358       /**
359        *              Zero out above L
360        **/
361       dlaset( 'u', m-1, m-1, zero, zero, &a_2( 1, 2 ), lda );
362       ie = 1;
363       itauq = ie + m;
364       itaup = itauq + m;
365       iwork = itaup + m;
366       /**
367        *              Bidiagonalize L in A
368        *              (Workspace: need 4*M, prefer 3*M+2*M*NB)
369        **/
370       dgebrd( m, m, a, lda, s, &work_1( ie ), &work_1( itauq ),
371              &work_1( itaup ), &work_1( iwork ), lwork-iwork+1,
372              &ierr );
373       if( wntuo || wntuas ) {
374         /**
375          *                 If left singular vectors desired, generate Q
376          *                 (Workspace: need 4*M, prefer 3*M+M*NB)
377          **/
378         dorgbr( 'q', m, m, m, a, lda, &work_1( itauq ),
379                &work_1( iwork ), lwork-iwork+1, &ierr );
380       }
381       iwork = ie + m;
382       nru = 0;
383       if( wntuo || wntuas )
384         nru = m;
385       /**
386        *              Perform bidiagonal QR iteration, computing left singular
387        *              vectors of A in A if desired
388        *              (Workspace: need 5*M-4)
389        **/
390       dbdsqr( 'u', m, 0, nru, 0, s, &work_1( ie ), dum, 1, a,
391              lda, dum, 1, &work_1( iwork ), info );
392       /**
393        *              If left singular vectors desired in U, copy them there
394        **/
395       if( wntuas )
396         dlacpy( 'f', m, m, a, lda, u, ldu );
397 
398     } else if( wntvo && wntun ) {
399       /**
400        *              Path 2t(N much larger than M, JOBU='N', JOBVT='O')
401        *              M right singular vectors to be overwritten on A and
402        *              no left singular vectors to be computed
403        **/
404       if( lwork>=m*m+max( 4*m, 5*m-4 ) ) {
405         /**
406          *                 Sufficient workspace for a fast algorithm
407          **/
408         ir = 1;
409         if( lwork>=max( wrkbl, lda*n+m )+lda*m ) {
410           /**
411            *                    WORK(IU) is LDA by N and WORK(IR) is LDA by M
412            **/
413           ldwrku = lda;
414           chunk = n;
415           ldwrkr = lda;
416         } else if( lwork>=max( wrkbl, lda*n+m )+m*m ) {
417           /**
418            *                    WORK(IU) is LDA by N and WORK(IR) is M by M
419            **/
420           ldwrku = lda;
421           chunk = n;
422           ldwrkr = m;
423         } else {
424           /**
425            *                    WORK(IU) is M by CHUNK and WORK(IR) is M by M
426            **/
427           ldwrku = m;
428           chunk = ( lwork-m*m-m ) / m;
429           ldwrkr = m;
430         }
431         itau = ir + ldwrkr*m;
432         iwork = itau + m;
433         /**
434          *                 Compute A=L*Q
435          *                 (Workspace: need M*M+2*M, prefer M*M+M+M*NB)
436          **/
437         dgelqf( m, n, a, lda, &work_1( itau ),
438                &work_1( iwork ), lwork-iwork+1, &ierr );
439         /**
440          *                 Copy L to WORK(IR) and zero out above it
441          **/
442         dlacpy( 'l', m, m, a, lda, &work_1( ir ), ldwrkr );
443         dlaset( 'u', m-1, m-1, zero, zero,
444                &work_1( ir+ldwrkr ), ldwrkr );
445         /**
446          *                 Generate Q in A
447          *                 (Workspace: need M*M+2*M, prefer M*M+M+M*NB)
448          **/
449         dorglq( m, n, m, a, lda, &work_1( itau ),
450                &work_1( iwork ), lwork-iwork+1, &ierr );
451         ie = itau;
452         itauq = ie + m;
453         itaup = itauq + m;
454         iwork = itaup + m;
455         /**
456          *                 Bidiagonalize L in WORK(IR)
457          *                 (Workspace: need M*M+4*M, prefer M*M+3*M+2*M*NB)
458          **/
459         dgebrd( m, m, &work_1( ir ), ldwrkr, s, &work_1( ie ),
460                &work_1( itauq ), &work_1( itaup ),
461                &work_1( iwork ), lwork-iwork+1, &ierr );
462         /**
463          *                 Generate right vectors bidiagonalizing L
464          *                 (Workspace: need M*M+4*M-1, prefer M*M+3*M+(M-1)*NB)
465          **/
466         dorgbr( 'p', m, m, m, &work_1( ir ), ldwrkr,
467                &work_1( itaup ), &work_1( iwork ),
468                lwork-iwork+1, &ierr );
469         iwork = ie + m;
470         /**
471          *                 Perform bidiagonal QR iteration, computing right
472          *                 singular vectors of L in WORK(IR)
473          *                 (Workspace: need M*M+5*M-4)
474          **/
475         dbdsqr( 'u', m, m, 0, 0, s, &work_1( ie ),
476                &work_1( ir ), ldwrkr, dum, 1, dum, 1,
477                &work_1( iwork ), info );
478         iu = ie + m;
479         /**
480          *                 Multiply right singular vectors of L in WORK(IR) by Q
481          *                 in A, storing result in WORK(IU) and copying to A
482          *                 (Workspace: need M*M+2*M, prefer M*M+M*N+M)
483          **/
484         for (i=1 ; chunk>0?i<=n:i>=n ; i+=chunk) {
485           blk = min( n-i+1, chunk );
486           cblas_dgemm(CblasColMajor, CblasNoTrans, CblasNoTrans, m, blk, m,
487                       one, &work_1( ir ), ldwrkr, &a_2( 1, i ), lda, zero,
488                       &work_1( iu ), ldwrku );
489           dlacpy( 'f', m, blk, &work_1( iu ), ldwrku,
490                  &a_2( 1, i ), lda );
491         }
492 
493       } else {
494         /**
495          *                 Insufficient workspace for a fast algorithm
496          **/
497         ie = 1;
498         itauq = ie + m;
499         itaup = itauq + m;
500         iwork = itaup + m;
501         /**
502          *                 Bidiagonalize A
503          *                 (Workspace: need 3*M+N, prefer 3*M+(M+N)*NB)
504          **/
505         dgebrd( m, n, a, lda, s, &work_1( ie ),
506                &work_1( itauq ), &work_1( itaup ),
507                &work_1( iwork ), lwork-iwork+1, &ierr );
508         /**
509          *                 Generate right vectors bidiagonalizing A
510          *                 (Workspace: need 4*M, prefer 3*M+M*NB)
511          **/
512         dorgbr( 'p', m, n, m, a, lda, &work_1( itaup ),
513                &work_1( iwork ), lwork-iwork+1, &ierr );
514         iwork = ie + m;
515         /**
516          *                 Perform bidiagonal QR iteration, computing right
517          *                 singular vectors of A in A
518          *                 (Workspace: need 5*M-4)
519          **/
520         dbdsqr( 'l', m, n, 0, 0, s, &work_1( ie ), a, lda,
521                dum, 1, dum, 1, &work_1( iwork ), info );
522 
523       }
524 
525     } else if( wntvo && wntuas ) {
526       /**
527        *              Path 3t(N much larger than M, JOBU='S' or 'A', JOBVT='O')
528        *              M right singular vectors to be overwritten on A and
529        *              M left singular vectors to be computed in U
530        **/
531       if( lwork>=m*m+max( 4*m, 5*m-4 ) ) {
532         /**
533          *                 Sufficient workspace for a fast algorithm
534          **/
535         ir = 1;
536         if( lwork>=max( wrkbl, lda*n+m )+lda*m ) {
537           /**
538            *                    WORK(IU) is LDA by N and WORK(IR) is LDA by M
539            **/
540           ldwrku = lda;
541           chunk = n;
542           ldwrkr = lda;
543         } else if( lwork>=max( wrkbl, lda*n+m )+m*m ) {
544           /**
545            *                    WORK(IU) is LDA by N and WORK(IR) is M by M
546            **/
547           ldwrku = lda;
548           chunk = n;
549           ldwrkr = m;
550         } else {
551           /**
552            *                    WORK(IU) is M by CHUNK and WORK(IR) is M by M
553            **/
554           ldwrku = m;
555           chunk = ( lwork-m*m-m ) / m;
556           ldwrkr = m;
557         }
558         itau = ir + ldwrkr*m;
559         iwork = itau + m;
560         /**
561          *                 Compute A=L*Q
562          *                 (Workspace: need M*M+2*M, prefer M*M+M+M*NB)
563          **/
564         dgelqf( m, n, a, lda, &work_1( itau ),
565                &work_1( iwork ), lwork-iwork+1, &ierr );
566         /**
567          *                 Copy L to U, zeroing about above it
568          **/
569         dlacpy( 'l', m, m, a, lda, u, ldu );
570         dlaset( 'u', m-1, m-1, zero, zero, &u_2( 1, 2 ),
571                ldu );
572         /**
573          *                 Generate Q in A
574          *                 (Workspace: need M*M+2*M, prefer M*M+M+M*NB)
575          **/
576         dorglq( m, n, m, a, lda, &work_1( itau ),
577                &work_1( iwork ), lwork-iwork+1, &ierr );
578         ie = itau;
579         itauq = ie + m;
580         itaup = itauq + m;
581         iwork = itaup + m;
582         /**
583          *                 Bidiagonalize L in U, copying result to WORK(IR)
584          *                 (Workspace: need M*M+4*M, prefer M*M+3*M+2*M*NB)
585          **/
586         dgebrd( m, m, u, ldu, s, &work_1( ie ),
587                &work_1( itauq ), &work_1( itaup ),
588                &work_1( iwork ), lwork-iwork+1, &ierr );
589         dlacpy( 'u', m, m, u, ldu, &work_1( ir ), ldwrkr );
590         /**
591          *                 Generate right vectors bidiagonalizing L in WORK(IR)
592          *                 (Workspace: need M*M+4*M-1, prefer M*M+3*M+(M-1)*NB)
593          **/
594         dorgbr( 'p', m, m, m, &work_1( ir ), ldwrkr,
595                &work_1( itaup ), &work_1( iwork ),
596                lwork-iwork+1, &ierr );
597         /**
598          *                 Generate left vectors bidiagonalizing L in U
599          *                 (Workspace: need M*M+4*M, prefer M*M+3*M+M*NB)
600          **/
601         dorgbr( 'q', m, m, m, u, ldu, &work_1( itauq ),
602                &work_1( iwork ), lwork-iwork+1, &ierr );
603         iwork = ie + m;
604         /**
605          *                 Perform bidiagonal QR iteration, computing left
606          *                 singular vectors of L in U, and computing right
607          *                 singular vectors of L in WORK(IR)
608          *                 (Workspace: need M*M+5*M-4)
609          **/
610         dbdsqr( 'u', m, m, m, 0, s, &work_1( ie ),
611                &work_1( ir ), ldwrkr, u, ldu, dum, 1,
612                &work_1( iwork ), info );
613         iu = ie + m;
614         /**
615          *                 Multiply right singular vectors of L in WORK(IR) by Q
616          *                 in A, storing result in WORK(IU) and copying to A
617          *                 (Workspace: need M*M+2*M, prefer M*M+M*N+M))
618          **/
619         for (i=1 ; chunk>0?i<=n:i>=n ; i+=chunk) {
620           blk = min( n-i+1, chunk );
621           cblas_dgemm(CblasColMajor, CblasNoTrans, CblasNoTrans, m, blk, m,
622                       one, &work_1( ir ), ldwrkr, &a_2( 1, i ), lda, zero,
623                       &work_1( iu ), ldwrku );
624           dlacpy( 'f', m, blk, &work_1( iu ), ldwrku,
625                  &a_2( 1, i ), lda );
626         }
627 
628       } else {
629         /**
630          *                 Insufficient workspace for a fast algorithm
631          **/
632         itau = 1;
633         iwork = itau + m;
634         /**
635          *                 Compute A=L*Q
636          *                 (Workspace: need 2*M, prefer M+M*NB)
637          **/
638         dgelqf( m, n, a, lda, &work_1( itau ),
639                &work_1( iwork ), lwork-iwork+1, &ierr );
640         /**
641          *                 Copy L to U, zeroing out above it
642          **/
643         dlacpy( 'l', m, m, a, lda, u, ldu );
644         dlaset( 'u', m-1, m-1, zero, zero, &u_2( 1, 2 ),
645                ldu );
646         /**
647          *                 Generate Q in A
648          *                 (Workspace: need 2*M, prefer M+M*NB)
649          **/
650         dorglq( m, n, m, a, lda, &work_1( itau ),
651                &work_1( iwork ), lwork-iwork+1, &ierr );
652         ie = itau;
653         itauq = ie + m;
654         itaup = itauq + m;
655         iwork = itaup + m;
656         /**
657          *                 Bidiagonalize L in U
658          *                 (Workspace: need 4*M, prefer 3*M+2*M*NB)
659          **/
660         dgebrd( m, m, u, ldu, s, &work_1( ie ),
661                &work_1( itauq ), &work_1( itaup ),
662                &work_1( iwork ), lwork-iwork+1, &ierr );
663         /**
664          *                 Multiply right vectors bidiagonalizing L by Q in A
665          *                 (Workspace: need 3*M+N, prefer 3*M+N*NB)
666          **/
667         dormbr( 'p', 'l', 't', m, n, m, u, ldu,
668                &work_1( itaup ), a, lda, &work_1( iwork ),
669                lwork-iwork+1, &ierr );
670         /**
671          *                 Generate left vectors bidiagonalizing L in U
672          *                 (Workspace: need 4*M, prefer 3*M+M*NB)
673          **/
674         dorgbr( 'q', m, m, m, u, ldu, &work_1( itauq ),
675                &work_1( iwork ), lwork-iwork+1, &ierr );
676         iwork = ie + m;
677         /**
678          *                 Perform bidiagonal QR iteration, computing left
679          *                 singular vectors of A in U and computing right
680          *                 singular vectors of A in A
681          *                 (Workspace: need 5*M-4)
682          **/
683         dbdsqr( 'u', m, n, m, 0, s, &work_1( ie ), a, lda,
684                u, ldu, dum, 1, &work_1( iwork ), info );
685 
686       }
687 
688     } else if( wntvs ) {
689 
690       if( wntun ) {
691         /**
692          *                 Path 4t(N much larger than M, JOBU='N', JOBVT='S')
693          *                 M right singular vectors to be computed in VT and
694          *                 no left singular vectors to be computed
695          **/
696         if( lwork>=m*m+max( 4*m, 5*m-4 ) ) {
697           /**
698            *                    Sufficient workspace for a fast algorithm
699            **/
700           ir = 1;
701           if( lwork>=wrkbl+lda*m ) {
702             /**
703              *                       WORK(IR) is LDA by M
704              **/
705             ldwrkr = lda;
706           } else {
707             /**
708              *                       WORK(IR) is M by M
709              **/
710             ldwrkr = m;
711           }
712           itau = ir + ldwrkr*m;
713           iwork = itau + m;
714           /**
715            *                    Compute A=L*Q
716            *                    (Workspace: need M*M+2*M, prefer M*M+M+M*NB)
717            **/
718           dgelqf( m, n, a, lda, &work_1( itau ),
719                  &work_1( iwork ), lwork-iwork+1, &ierr );
720           /**
721            *                    Copy L to WORK(IR), zeroing out above it
722            **/
723           dlacpy( 'l', m, m, a, lda, &work_1( ir ),
724                  ldwrkr );
725           dlaset( 'u', m-1, m-1, zero, zero,
726                  &work_1( ir+ldwrkr ), ldwrkr );
727           /**
728            *                    Generate Q in A
729            *                    (Workspace: need M*M+2*M, prefer M*M+M+M*NB)
730            **/
731           dorglq( m, n, m, a, lda, &work_1( itau ),
732                  &work_1( iwork ), lwork-iwork+1, &ierr );
733           ie = itau;
734           itauq = ie + m;
735           itaup = itauq + m;
736           iwork = itaup + m;
737           /**
738            *                    Bidiagonalize L in WORK(IR)
739            *                    (Workspace: need M*M+4*M, prefer M*M+3*M+2*M*NB)
740            **/
741           dgebrd( m, m, &work_1( ir ), ldwrkr, s,
742                  &work_1( ie ), &work_1( itauq ),
743                  &work_1( itaup ), &work_1( iwork ),
744                  lwork-iwork+1, &ierr );
745           /**
746            *                    Generate right vectors bidiagonalizing L in
747            *                    WORK(IR)
748            *                    (Workspace: need M*M+4*M, prefer M*M+3*M+(M-1)*NB)
749            **/
750           dorgbr( 'p', m, m, m, &work_1( ir ), ldwrkr,
751                  &work_1( itaup ), &work_1( iwork ),
752                  lwork-iwork+1, &ierr );
753           iwork = ie + m;
754           /**
755            *                    Perform bidiagonal QR iteration, computing right
756            *                    singular vectors of L in WORK(IR)
757            *                    (Workspace: need M*M+5*M-4)
758            **/
759           dbdsqr( 'u', m, m, 0, 0, s, &work_1( ie ),
760                  &work_1( ir ), ldwrkr, dum, 1, dum, 1,
761                  &work_1( iwork ), info );
762           /**
763            *                    Multiply right singular vectors of L in WORK(IR) by
764            *                    Q in A, storing result in VT
765            *                    (Workspace: need M*M)
766            **/
767           cblas_dgemm(CblasColMajor, CblasNoTrans, CblasNoTrans, m, n, m,
768                       one, &work_1( ir ), ldwrkr, a, lda, zero, vt, ldvt );
769 
770         } else {
771           /**
772            *                    Insufficient workspace for a fast algorithm
773            **/
774           itau = 1;
775           iwork = itau + m;
776           /**
777            *                    Compute A=L*Q
778            *                    (Workspace: need 2*M, prefer M+M*NB)
779            **/
780           dgelqf( m, n, a, lda, &work_1( itau ),
781                  &work_1( iwork ), lwork-iwork+1, &ierr );
782           /**
783            *                    Copy result to VT
784            **/
785           dlacpy( 'u', m, n, a, lda, vt, ldvt );
786           /**
787            *                    Generate Q in VT
788            *                    (Workspace: need 2*M, prefer M+M*NB)
789            **/
790           dorglq( m, n, m, vt, ldvt, &work_1( itau ),
791                  &work_1( iwork ), lwork-iwork+1, &ierr );
792           ie = itau;
793           itauq = ie + m;
794           itaup = itauq + m;
795           iwork = itaup + m;
796           /**
797            *                    Zero out above L in A
798            **/
799           dlaset( 'u', m-1, m-1, zero, zero, &a_2( 1, 2 ),
800                  lda );
801           /**
802            *                    Bidiagonalize L in A
803            *                    (Workspace: need 4*M, prefer 3*M+2*M*NB)
804            **/
805           dgebrd( m, m, a, lda, s, &work_1( ie ),
806                  &work_1( itauq ), &work_1( itaup ),
807                  &work_1( iwork ), lwork-iwork+1, &ierr );
808           /**
809            *                    Multiply right vectors bidiagonalizing L by Q in VT
810            *                    (Workspace: need 3*M+N, prefer 3*M+N*NB)
811            **/
812           dormbr( 'p', 'l', 't', m, n, m, a, lda,
813                  &work_1( itaup ), vt, ldvt,
814                  &work_1( iwork ), lwork-iwork+1, &ierr );
815           iwork = ie + m;
816           /**
817            *                    Perform bidiagonal QR iteration, computing right
818            *                    singular vectors of A in VT
819            *                    (Workspace: need 5*M-4)
820            **/
821           dbdsqr( 'u', m, n, 0, 0, s, &work_1( ie ), vt,
822                  ldvt, dum, 1, dum, 1, &work_1( iwork ),
823                  info );
824 
825         }
826 
827       } else if( wntuo ) {
828         /**
829          *                 Path 5t(N much larger than M, JOBU='O', JOBVT='S')
830          *                 M right singular vectors to be computed in VT and
831          *                 M left singular vectors to be overwritten on A
832          **/
833         if( lwork>=2*m*m+max( 4*m, 5*m-4 ) ) {
834           /**
835            *                    Sufficient workspace for a fast algorithm
836            **/
837           iu = 1;
838           if( lwork>=wrkbl+2*lda*m ) {
839             /**
840              *                       WORK(IU) is LDA by M and WORK(IR) is LDA by M
841              **/
842             ldwrku = lda;
843             ir = iu + ldwrku*m;
844             ldwrkr = lda;
845           } else if( lwork>=wrkbl+( lda+m )*m ) {
846             /**
847              *                       WORK(IU) is LDA by M and WORK(IR) is M by M
848              **/
849             ldwrku = lda;
850             ir = iu + ldwrku*m;
851             ldwrkr = m;
852           } else {
853             /**
854              *                       WORK(IU) is M by M and WORK(IR) is M by M
855              **/
856             ldwrku = m;
857             ir = iu + ldwrku*m;
858             ldwrkr = m;
859           }
860           itau = ir + ldwrkr*m;
861           iwork = itau + m;
862           /**
863            *                    Compute A=L*Q
864            *                    (Workspace: need 2*M*M+2*M, prefer 2*M*M+M+M*NB)
865            **/
866           dgelqf( m, n, a, lda, &work_1( itau ),
867                  &work_1( iwork ), lwork-iwork+1, &ierr );
868           /**
869            *                    Copy L to WORK(IU), zeroing out below it
870            **/
871           dlacpy( 'l', m, m, a, lda, &work_1( iu ),
872                  ldwrku );
873           dlaset( 'u', m-1, m-1, zero, zero,
874                  &work_1( iu+ldwrku ), ldwrku );
875           /**
876            *                    Generate Q in A
877            *                    (Workspace: need 2*M*M+2*M, prefer 2*M*M+M+M*NB)
878            **/
879           dorglq( m, n, m, a, lda, &work_1( itau ),
880                  &work_1( iwork ), lwork-iwork+1, &ierr );
881           ie = itau;
882           itauq = ie + m;
883           itaup = itauq + m;
884           iwork = itaup + m;
885           /**
886            *                    Bidiagonalize L in WORK(IU), copying result to
887            *                    WORK(IR)
888            *                    (Workspace: need 2*M*M+4*M,
889            *                                prefer 2*M*M+3*M+2*M*NB)
890            **/
891           dgebrd( m, m, &work_1( iu ), ldwrku, s,
892                  &work_1( ie ), &work_1( itauq ),
893                  &work_1( itaup ), &work_1( iwork ),
894                  lwork-iwork+1, &ierr );
895           dlacpy( 'l', m, m, &work_1( iu ), ldwrku,
896                  &work_1( ir ), ldwrkr );
897           /**
898            *                    Generate right bidiagonalizing vectors in WORK(IU)
899            *                    (Workspace: need 2*M*M+4*M-1,
900            *                                prefer 2*M*M+3*M+(M-1)*NB)
901            **/
902           dorgbr( 'p', m, m, m, &work_1( iu ), ldwrku,
903                  &work_1( itaup ), &work_1( iwork ),
904                  lwork-iwork+1, &ierr );
905           /**
906            *                    Generate left bidiagonalizing vectors in WORK(IR)
907            *                    (Workspace: need 2*M*M+4*M, prefer 2*M*M+3*M+M*NB)
908            **/
909           dorgbr( 'q', m, m, m, &work_1( ir ), ldwrkr,
910                  &work_1( itauq ), &work_1( iwork ),
911                  lwork-iwork+1, &ierr );
912           iwork = ie + m;
913           /**
914            *                    Perform bidiagonal QR iteration, computing left
915            *                    singular vectors of L in WORK(IR) and computing
916            *                    right singular vectors of L in WORK(IU)
917            *                    (Workspace: need 2*M*M+5*M-4)
918            **/
919           dbdsqr( 'u', m, m, m, 0, s, &work_1( ie ),
920                  &work_1( iu ), ldwrku, &work_1( ir ),
921                  ldwrkr, dum, 1, &work_1( iwork ), info );
922           /**
923            *                    Multiply right singular vectors of L in WORK(IU) by
924            *                    Q in A, storing result in VT
925            *                    (Workspace: need M*M)
926            **/
927           cblas_dgemm(CblasColMajor, CblasNoTrans, CblasNoTrans, m, n, m,
928                       one, &work_1( iu ), ldwrku, a, lda, zero, vt, ldvt );
929           /**
930            *                    Copy left singular vectors of L to A
931            *                    (Workspace: need M*M)
932            **/
933           dlacpy( 'f', m, m, &work_1( ir ), ldwrkr, a,
934                  lda );
935 
936         } else {
937           /**
938            *                    Insufficient workspace for a fast algorithm
939            **/
940           itau = 1;
941           iwork = itau + m;
942           /**
943            *                    Compute A=L*Q, copying result to VT
944            *                    (Workspace: need 2*M, prefer M+M*NB)
945            **/
946           dgelqf( m, n, a, lda, &work_1( itau ),
947                  &work_1( iwork ), lwork-iwork+1, &ierr );
948           dlacpy( 'u', m, n, a, lda, vt, ldvt );
949           /**
950            *                    Generate Q in VT
951            *                    (Workspace: need 2*M, prefer M+M*NB)
952            **/
953           dorglq( m, n, m, vt, ldvt, &work_1( itau ),
954                  &work_1( iwork ), lwork-iwork+1, &ierr );
955           ie = itau;
956           itauq = ie + m;
957           itaup = itauq + m;
958           iwork = itaup + m;
959           /**
960            *                    Zero out above L in A
961            **/
962           dlaset( 'u', m-1, m-1, zero, zero, &a_2( 1, 2 ),
963                  lda );
964           /**
965            *                    Bidiagonalize L in A
966            *                    (Workspace: need 4*M, prefer 3*M+2*M*NB)
967            **/
968           dgebrd( m, m, a, lda, s, &work_1( ie ),
969                  &work_1( itauq ), &work_1( itaup ),
970                  &work_1( iwork ), lwork-iwork+1, &ierr );
971           /**
972            *                    Multiply right vectors bidiagonalizing L by Q in VT
973            *                    (Workspace: need 3*M+N, prefer 3*M+N*NB)
974            **/
975           dormbr( 'p', 'l', 't', m, n, m, a, lda,
976                  &work_1( itaup ), vt, ldvt,
977                  &work_1( iwork ), lwork-iwork+1, &ierr );
978           /**
979            *                    Generate left bidiagonalizing vectors of L in A
980            *                    (Workspace: need 4*M, prefer 3*M+M*NB)
981            **/
982           dorgbr( 'q', m, m, m, a, lda, &work_1( itauq ),
983                  &work_1( iwork ), lwork-iwork+1, &ierr );
984           iwork = ie + m;
985           /**
986            *                    Perform bidiagonal QR iteration, compute left
987            *                    singular vectors of A in A and compute right
988            *                    singular vectors of A in VT
989            *                    (Workspace: need 5*M-4)
990            **/
991           dbdsqr( 'u', m, n, m, 0, s, &work_1( ie ), vt,
992                  ldvt, a, lda, dum, 1, &work_1( iwork ),
993                  info );
994 
995         }
996 
997       } else if( wntuas ) {
998         /**
999          *                 Path 6t(N much larger than M, JOBU='S' or 'A',
1000          *                         JOBVT='S')
1001          *                 M right singular vectors to be computed in VT and
1002          *                 M left singular vectors to be computed in U
1003          **/
1004         if( lwork>=m*m+max( 4*m, 5*m-4 ) ) {
1005           /**
1006            *                    Sufficient workspace for a fast algorithm
1007            **/
1008           iu = 1;
1009           if( lwork>=wrkbl+lda*m ) {
1010             /**
1011              *                       WORK(IU) is LDA by N
1012              **/
1013             ldwrku = lda;
1014           } else {
1015             /**
1016              *                       WORK(IU) is LDA by M
1017              **/
1018             ldwrku = m;
1019           }
1020           itau = iu + ldwrku*m;
1021           iwork = itau + m;
1022           /**
1023            *                    Compute A=L*Q
1024            *                    (Workspace: need M*M+2*M, prefer M*M+M+M*NB)
1025            **/
1026           dgelqf( m, n, a, lda, &work_1( itau ),
1027                  &work_1( iwork ), lwork-iwork+1, &ierr );
1028           /**
1029            *                    Copy L to WORK(IU), zeroing out above it
1030            **/
1031           dlacpy( 'l', m, m, a, lda, &work_1( iu ),
1032                  ldwrku );
1033           dlaset( 'u', m-1, m-1, zero, zero,
1034                  &work_1( iu+ldwrku ), ldwrku );
1035           /**
1036            *                    Generate Q in A
1037            *                    (Workspace: need M*M+2*M, prefer M*M+M+M*NB)
1038            **/
1039           dorglq( m, n, m, a, lda, &work_1( itau ),
1040                  &work_1( iwork ), lwork-iwork+1, &ierr );
1041           ie = itau;
1042           itauq = ie + m;
1043           itaup = itauq + m;
1044           iwork = itaup + m;
1045           /**
1046            *                    Bidiagonalize L in WORK(IU), copying result to U
1047            *                    (Workspace: need M*M+4*M, prefer M*M+3*M+2*M*NB)
1048            **/
1049           dgebrd( m, m, &work_1( iu ), ldwrku, s,
1050                  &work_1( ie ), &work_1( itauq ),
1051                  &work_1( itaup ), &work_1( iwork ),
1052                  lwork-iwork+1, &ierr );
1053           dlacpy( 'l', m, m, &work_1( iu ), ldwrku, u,
1054                  ldu );
1055           /**
1056            *                    Generate right bidiagonalizing vectors in WORK(IU)
1057            *                    (Workspace: need M*M+4*M-1,
1058            *                                prefer M*M+3*M+(M-1)*NB)
1059            **/
1060           dorgbr( 'p', m, m, m, &work_1( iu ), ldwrku,
1061                  &work_1( itaup ), &work_1( iwork ),
1062                  lwork-iwork+1, &ierr );
1063           /**
1064            *                    Generate left bidiagonalizing vectors in U
1065            *                    (Workspace: need M*M+4*M, prefer M*M+3*M+M*NB)
1066            **/
1067           dorgbr( 'q', m, m, m, u, ldu, &work_1( itauq ),
1068                  &work_1( iwork ), lwork-iwork+1, &ierr );
1069           iwork = ie + m;
1070           /**
1071            *                    Perform bidiagonal QR iteration, computing left
1072            *                    singular vectors of L in U and computing right
1073            *                    singular vectors of L in WORK(IU)
1074            *                    (Workspace: need M*M+5*M-4)
1075            **/
1076           dbdsqr( 'u', m, m, m, 0, s, &work_1( ie ),
1077                  &work_1( iu ), ldwrku, u, ldu, dum, 1,
1078                  &work_1( iwork ), info );
1079           /**
1080            *                    Multiply right singular vectors of L in WORK(IU) by
1081            *                    Q in A, storing result in VT
1082            *                    (Workspace: need M*M)
1083            **/
1084           cblas_dgemm(CblasColMajor, CblasNoTrans, CblasNoTrans, m, n, m,
1085                       one, &work_1( iu ), ldwrku, a, lda, zero, vt, ldvt );
1086 
1087         } else {
1088           /**
1089            *                    Insufficient workspace for a fast algorithm
1090            **/
1091           itau = 1;
1092           iwork = itau + m;
1093           /**
1094            *                    Compute A=L*Q, copying result to VT
1095            *                    (Workspace: need 2*M, prefer M+M*NB)
1096            **/
1097           dgelqf( m, n, a, lda, &work_1( itau ),
1098                  &work_1( iwork ), lwork-iwork+1, &ierr );
1099           dlacpy( 'u', m, n, a, lda, vt, ldvt );
1100           /**
1101            *                    Generate Q in VT
1102            *                    (Workspace: need 2*M, prefer M+M*NB)
1103            **/
1104           dorglq( m, n, m, vt, ldvt, &work_1( itau ),
1105                  &work_1( iwork ), lwork-iwork+1, &ierr );
1106           /**
1107            *                    Copy L to U, zeroing out above it
1108            **/
1109           dlacpy( 'l', m, m, a, lda, u, ldu );
1110           dlaset( 'u', m-1, m-1, zero, zero, &u_2( 1, 2 ),
1111                  ldu );
1112           ie = itau;
1113           itauq = ie + m;
1114           itaup = itauq + m;
1115           iwork = itaup + m;
1116           /**
1117            *                    Bidiagonalize L in U
1118            *                    (Workspace: need 4*M, prefer 3*M+2*M*NB)
1119            **/
1120           dgebrd( m, m, u, ldu, s, &work_1( ie ),
1121                  &work_1( itauq ), &work_1( itaup ),
1122                  &work_1( iwork ), lwork-iwork+1, &ierr );
1123           /**
1124            *                    Multiply right bidiagonalizing vectors in U by Q
1125            *                    in VT
1126            *                    (Workspace: need 3*M+N, prefer 3*M+N*NB)
1127            **/
1128           dormbr( 'p', 'l', 't', m, n, m, u, ldu,
1129                  &work_1( itaup ), vt, ldvt,
1130                  &work_1( iwork ), lwork-iwork+1, &ierr );
1131           /**
1132            *                    Generate left bidiagonalizing vectors in U
1133            *                    (Workspace: need 4*M, prefer 3*M+M*NB)
1134            **/
1135           dorgbr( 'q', m, m, m, u, ldu, &work_1( itauq ),
1136                  &work_1( iwork ), lwork-iwork+1, &ierr );
1137           iwork = ie + m;
1138           /**
1139            *                    Perform bidiagonal QR iteration, computing left
1140            *                    singular vectors of A in U and computing right
1141            *                    singular vectors of A in VT
1142            *                    (Workspace: need 5*M-4)
1143            **/
1144           dbdsqr( 'u', m, n, m, 0, s, &work_1( ie ), vt,
1145                  ldvt, u, ldu, dum, 1, &work_1( iwork ),
1146                  info );
1147 
1148         }
1149 
1150       }
1151 
1152     } else if( wntva ) {
1153 
1154       if( wntun ) {
1155         /**
1156          *                 Path 7t(N much larger than M, JOBU='N', JOBVT='A')
1157          *                 N right singular vectors to be computed in VT and
1158          *                 no left singular vectors to be computed
1159          **/
1160         if( lwork>=m*m+max( n+m, max(4*m, 5*m-4) ) ) {
1161           /**
1162            *                    Sufficient workspace for a fast algorithm
1163            **/
1164           ir = 1;
1165           if( lwork>=wrkbl+lda*m ) {
1166             /**
1167              *                       WORK(IR) is LDA by M
1168              **/
1169             ldwrkr = lda;
1170           } else {
1171             /**
1172              *                       WORK(IR) is M by M
1173              **/
1174             ldwrkr = m;
1175           }
1176           itau = ir + ldwrkr*m;
1177           iwork = itau + m;
1178           /**
1179            *                    Compute A=L*Q, copying result to VT
1180            *                    (Workspace: need M*M+2*M, prefer M*M+M+M*NB)
1181            **/
1182           dgelqf( m, n, a, lda, &work_1( itau ),
1183                  &work_1( iwork ), lwork-iwork+1, &ierr );
1184           dlacpy( 'u', m, n, a, lda, vt, ldvt );
1185           /**
1186            *                    Copy L to WORK(IR), zeroing out above it
1187            **/
1188           dlacpy( 'l', m, m, a, lda, &work_1( ir ),
1189                  ldwrkr );
1190           dlaset( 'u', m-1, m-1, zero, zero,
1191                  &work_1( ir+ldwrkr ), ldwrkr );
1192           /**
1193            *                    Generate Q in VT
1194            *                    (Workspace: need M*M+M+N, prefer M*M+M+N*NB)
1195            **/
1196           dorglq( n, n, m, vt, ldvt, &work_1( itau ),
1197                  &work_1( iwork ), lwork-iwork+1, &ierr );
1198           ie = itau;
1199           itauq = ie + m;
1200           itaup = itauq + m;
1201           iwork = itaup + m;
1202           /**
1203            *                    Bidiagonalize L in WORK(IR)
1204            *                    (Workspace: need M*M+4*M, prefer M*M+3*M+2*M*NB)
1205            **/
1206           dgebrd( m, m, &work_1( ir ), ldwrkr, s,
1207                  &work_1( ie ), &work_1( itauq ),
1208                  &work_1( itaup ), &work_1( iwork ),
1209                  lwork-iwork+1, &ierr );
1210           /**
1211            *                    Generate right bidiagonalizing vectors in WORK(IR)
1212            *                    (Workspace: need M*M+4*M-1,
1213            *                                prefer M*M+3*M+(M-1)*NB)
1214            **/
1215           dorgbr( 'p', m, m, m, &work_1( ir ), ldwrkr,
1216                  &work_1( itaup ), &work_1( iwork ),
1217                  lwork-iwork+1, &ierr );
1218           iwork = ie + m;
1219           /**
1220            *                    Perform bidiagonal QR iteration, computing right
1221            *                    singular vectors of L in WORK(IR)
1222            *                    (Workspace: need M*M+5*M-4)
1223            **/
1224           dbdsqr( 'u', m, m, 0, 0, s, &work_1( ie ),
1225                  &work_1( ir ), ldwrkr, dum, 1, dum, 1,
1226                  &work_1( iwork ), info );
1227           /**
1228            *                    Multiply right singular vectors of L in WORK(IR) by
1229            *                    Q in VT, storing result in A
1230            *                    (Workspace: need M*M)
1231            **/
1232           cblas_dgemm(CblasColMajor, CblasNoTrans, CblasNoTrans, m, n, m,
1233                       one, &work_1( ir ), ldwrkr, vt, ldvt, zero, a, lda );
1234           /**
1235            *                    Copy right singular vectors of A from A to VT
1236            **/
1237           dlacpy( 'f', m, n, a, lda, vt, ldvt );
1238 
1239         } else {
1240           /**
1241            *                    Insufficient workspace for a fast algorithm
1242            **/
1243           itau = 1;
1244           iwork = itau + m;
1245           /**
1246            *                    Compute A=L*Q, copying result to VT
1247            *                    (Workspace: need 2*M, prefer M+M*NB)
1248            **/
1249           dgelqf( m, n, a, lda, &work_1( itau ),
1250                  &work_1( iwork ), lwork-iwork+1, &ierr );
1251           dlacpy( 'u', m, n, a, lda, vt, ldvt );
1252           /**
1253            *                    Generate Q in VT
1254            *                    (Workspace: need M+N, prefer M+N*NB)
1255            **/
1256           dorglq( n, n, m, vt, ldvt, &work_1( itau ),
1257                  &work_1( iwork ), lwork-iwork+1, &ierr );
1258           ie = itau;
1259           itauq = ie + m;
1260           itaup = itauq + m;
1261           iwork = itaup + m;
1262           /**
1263            *                    Zero out above L in A
1264            **/
1265           dlaset( 'u', m-1, m-1, zero, zero, &a_2( 1, 2 ),
1266                  lda );
1267           /**
1268            *                    Bidiagonalize L in A
1269            *                    (Workspace: need 4*M, prefer 3*M+2*M*NB)
1270            **/
1271           dgebrd( m, m, a, lda, s, &work_1( ie ),
1272                  &work_1( itauq ), &work_1( itaup ),
1273                  &work_1( iwork ), lwork-iwork+1, &ierr );
1274           /**
1275            *                    Multiply right bidiagonalizing vectors in A by Q
1276            *                    in VT
1277            *                    (Workspace: need 3*M+N, prefer 3*M+N*NB)
1278            **/
1279           dormbr( 'p', 'l', 't', m, n, m, a, lda,
1280                  &work_1( itaup ), vt, ldvt,
1281                  &work_1( iwork ), lwork-iwork+1, &ierr );
1282           iwork = ie + m;
1283           /**
1284            *                    Perform bidiagonal QR iteration, computing right
1285            *                    singular vectors of A in VT
1286            *                    (Workspace: need 5*M-4)
1287            **/
1288           dbdsqr( 'u', m, n, 0, 0, s, &work_1( ie ), vt,
1289                  ldvt, dum, 1, dum, 1, &work_1( iwork ),
1290                  info );
1291 
1292         }
1293 
1294       } else if( wntuo ) {
1295         /**
1296          *                 Path 8t(N much larger than M, JOBU='O', JOBVT='A')
1297          *                 N right singular vectors to be computed in VT and
1298          *                 M left singular vectors to be overwritten on A
1299          **/
1300         if( lwork>=2*m*m+max( n+m, max(4*m, 5*m-4) ) ) {
1301           /**
1302            *                    Sufficient workspace for a fast algorithm
1303            **/
1304           iu = 1;
1305           if( lwork>=wrkbl+2*lda*m ) {
1306             /**
1307              *                       WORK(IU) is LDA by M and WORK(IR) is LDA by M
1308              **/
1309             ldwrku = lda;
1310             ir = iu + ldwrku*m;
1311             ldwrkr = lda;
1312           } else if( lwork>=wrkbl+( lda+m )*m ) {
1313             /**
1314              *                       WORK(IU) is LDA by M and WORK(IR) is M by M
1315              **/
1316             ldwrku = lda;
1317             ir = iu + ldwrku*m;
1318             ldwrkr = m;
1319           } else {
1320             /**
1321              *                       WORK(IU) is M by M and WORK(IR) is M by M
1322              **/
1323             ldwrku = m;
1324             ir = iu + ldwrku*m;
1325             ldwrkr = m;
1326           }
1327           itau = ir + ldwrkr*m;
1328           iwork = itau + m;
1329           /**
1330            *                    Compute A=L*Q, copying result to VT
1331            *                    (Workspace: need 2*M*M+2*M, prefer 2*M*M+M+M*NB)
1332            **/
1333           dgelqf( m, n, a, lda, &work_1( itau ),
1334                  &work_1( iwork ), lwork-iwork+1, &ierr );
1335           dlacpy( 'u', m, n, a, lda, vt, ldvt );
1336           /**
1337            *                    Generate Q in VT
1338            *                    (Workspace: need 2*M*M+M+N, prefer 2*M*M+M+N*NB)
1339            **/
1340           dorglq( n, n, m, vt, ldvt, &work_1( itau ),
1341                  &work_1( iwork ), lwork-iwork+1, &ierr );
1342           /**
1343            *                    Copy L to WORK(IU), zeroing out above it
1344            **/
1345           dlacpy( 'l', m, m, a, lda, &work_1( iu ),
1346                  ldwrku );
1347           dlaset( 'u', m-1, m-1, zero, zero,
1348                  &work_1( iu+ldwrku ), ldwrku );
1349           ie = itau;
1350           itauq = ie + m;
1351           itaup = itauq + m;
1352           iwork = itaup + m;
1353           /**
1354            *                    Bidiagonalize L in WORK(IU), copying result to
1355            *                    WORK(IR)
1356            *                    (Workspace: need 2*M*M+4*M,
1357            *                                prefer 2*M*M+3*M+2*M*NB)
1358            **/
1359           dgebrd( m, m, &work_1( iu ), ldwrku, s,
1360                  &work_1( ie ), &work_1( itauq ),
1361                  &work_1( itaup ), &work_1( iwork ),
1362                  lwork-iwork+1, &ierr );
1363           dlacpy( 'l', m, m, &work_1( iu ), ldwrku,
1364                  &work_1( ir ), ldwrkr );
1365           /**
1366            *                    Generate right bidiagonalizing vectors in WORK(IU)
1367            *                    (Workspace: need 2*M*M+4*M-1,
1368            *                                prefer 2*M*M+3*M+(M-1)*NB)
1369            **/
1370           dorgbr( 'p', m, m, m, &work_1( iu ), ldwrku,
1371                  &work_1( itaup ), &work_1( iwork ),
1372                  lwork-iwork+1, &ierr );
1373           /**
1374            *                    Generate left bidiagonalizing vectors in WORK(IR)
1375            *                    (Workspace: need 2*M*M+4*M, prefer 2*M*M+3*M+M*NB)
1376            **/
1377           dorgbr( 'q', m, m, m, &work_1( ir ), ldwrkr,
1378                  &work_1( itauq ), &work_1( iwork ),
1379                  lwork-iwork+1, &ierr );
1380           iwork = ie + m;
1381           /**
1382            *                    Perform bidiagonal QR iteration, computing left
1383            *                    singular vectors of L in WORK(IR) and computing
1384            *                    right singular vectors of L in WORK(IU)
1385            *                    (Workspace: need 2*M*M+5*M-4)
1386            **/
1387           dbdsqr( 'u', m, m, m, 0, s, &work_1( ie ),
1388                  &work_1( iu ), ldwrku, &work_1( ir ),
1389                  ldwrkr, dum, 1, &work_1( iwork ), info );
1390           /**
1391            *                    Multiply right singular vectors of L in WORK(IU) by
1392            *                    Q in VT, storing result in A
1393            *                    (Workspace: need M*M)
1394            **/
1395           cblas_dgemm(CblasColMajor, CblasNoTrans, CblasNoTrans, m, n, m,
1396                       one, &work_1( iu ), ldwrku, vt, ldvt, zero, a, lda );
1397           /**
1398            *                    Copy right singular vectors of A from A to VT
1399            **/
1400           dlacpy( 'f', m, n, a, lda, vt, ldvt );
1401           /**
1402            *                    Copy left singular vectors of A from WORK(IR) to A
1403            **/
1404           dlacpy( 'f', m, m, &work_1( ir ), ldwrkr, a,
1405                  lda );
1406 
1407         } else {
1408           /**
1409            *                    Insufficient workspace for a fast algorithm
1410            **/
1411           itau = 1;
1412           iwork = itau + m;
1413           /**
1414            *                    Compute A=L*Q, copying result to VT
1415            *                    (Workspace: need 2*M, prefer M+M*NB)
1416            **/
1417           dgelqf( m, n, a, lda, &work_1( itau ),
1418                  &work_1( iwork ), lwork-iwork+1, &ierr );
1419           dlacpy( 'u', m, n, a, lda, vt, ldvt );
1420           /**
1421            *                    Generate Q in VT
1422            *                    (Workspace: need M+N, prefer M+N*NB)
1423            **/
1424           dorglq( n, n, m, vt, ldvt, &work_1( itau ),
1425                  &work_1( iwork ), lwork-iwork+1, &ierr );
1426           ie = itau;
1427           itauq = ie + m;
1428           itaup = itauq + m;
1429           iwork = itaup + m;
1430           /**
1431            *                    Zero out above L in A
1432            **/
1433           dlaset( 'u', m-1, m-1, zero, zero, &a_2( 1, 2 ),
1434                  lda );
1435           /**
1436            *                    Bidiagonalize L in A
1437            *                    (Workspace: need 4*M, prefer 3*M+2*M*NB)
1438            **/
1439           dgebrd( m, m, a, lda, s, &work_1( ie ),
1440                  &work_1( itauq ), &work_1( itaup ),
1441                  &work_1( iwork ), lwork-iwork+1, &ierr );
1442           /**
1443            *                    Multiply right bidiagonalizing vectors in A by Q
1444            *                    in VT
1445            *                    (Workspace: need 3*M+N, prefer 3*M+N*NB)
1446            **/
1447           dormbr( 'p', 'l', 't', m, n, m, a, lda,
1448                  &work_1( itaup ), vt, ldvt,
1449                  &work_1( iwork ), lwork-iwork+1, &ierr );
1450           /**
1451            *                    Generate left bidiagonalizing vectors in A
1452            *                    (Workspace: need 4*M, prefer 3*M+M*NB)
1453            **/
1454           dorgbr( 'q', m, m, m, a, lda, &work_1( itauq ),
1455                  &work_1( iwork ), lwork-iwork+1, &ierr );
1456           iwork = ie + m;
1457           /**
1458            *                    Perform bidiagonal QR iteration, computing left
1459            *                    singular vectors of A in A and computing right
1460            *                    singular vectors of A in VT
1461            *                    (Workspace: need 5*M-4)
1462            **/
1463           dbdsqr( 'u', m, n, m, 0, s, &work_1( ie ), vt,
1464                  ldvt, a, lda, dum, 1, &work_1( iwork ),
1465                  info );
1466 
1467         }
1468 
1469       } else if( wntuas ) {
1470         /**
1471          *                 Path 9t(N much larger than M, JOBU='S' or 'A',
1472          *                         JOBVT='A')
1473          *                 N right singular vectors to be computed in VT and
1474          *                 M left singular vectors to be computed in U
1475          **/
1476         if( lwork>=m*m+max( n+m, max(4*m, 5*m-4) ) ) {
1477           /**
1478            *                    Sufficient workspace for a fast algorithm
1479            **/
1480           iu = 1;
1481           if( lwork>=wrkbl+lda*m ) {
1482             /**
1483              *                       WORK(IU) is LDA by M
1484              **/
1485             ldwrku = lda;
1486           } else {
1487             /**
1488              *                       WORK(IU) is M by M
1489              **/
1490             ldwrku = m;
1491           }
1492           itau = iu + ldwrku*m;
1493           iwork = itau + m;
1494           /**
1495            *                    Compute A=L*Q, copying result to VT
1496            *                    (Workspace: need M*M+2*M, prefer M*M+M+M*NB)
1497            **/
1498           dgelqf( m, n, a, lda, &work_1( itau ),
1499                  &work_1( iwork ), lwork-iwork+1, &ierr );
1500           dlacpy( 'u', m, n, a, lda, vt, ldvt );
1501           /**
1502            *                    Generate Q in VT
1503            *                    (Workspace: need M*M+M+N, prefer M*M+M+N*NB)
1504            **/
1505           dorglq( n, n, m, vt, ldvt, &work_1( itau ),
1506                  &work_1( iwork ), lwork-iwork+1, &ierr );
1507           /**
1508            *                    Copy L to WORK(IU), zeroing out above it
1509            **/
1510           dlacpy( 'l', m, m, a, lda, &work_1( iu ),
1511                  ldwrku );
1512           dlaset( 'u', m-1, m-1, zero, zero,
1513                  &work_1( iu+ldwrku ), ldwrku );
1514           ie = itau;
1515           itauq = ie + m;
1516           itaup = itauq + m;
1517           iwork = itaup + m;
1518           /**
1519            *                    Bidiagonalize L in WORK(IU), copying result to U
1520            *                    (Workspace: need M*M+4*M, prefer M*M+3*M+2*M*NB)
1521            **/
1522           dgebrd( m, m, &work_1( iu ), ldwrku, s,
1523                  &work_1( ie ), &work_1( itauq ),
1524                  &work_1( itaup ), &work_1( iwork ),
1525                  lwork-iwork+1, &ierr );
1526           dlacpy( 'l', m, m, &work_1( iu ), ldwrku, u,
1527                  ldu );
1528           /**
1529            *                    Generate right bidiagonalizing vectors in WORK(IU)
1530            *                    (Workspace: need M*M+4*M, prefer M*M+3*M+(M-1)*NB)
1531            **/
1532           dorgbr( 'p', m, m, m, &work_1( iu ), ldwrku,
1533                  &work_1( itaup ), &work_1( iwork ),
1534                  lwork-iwork+1, &ierr );
1535           /**
1536            *                    Generate left bidiagonalizing vectors in U
1537            *                    (Workspace: need M*M+4*M, prefer M*M+3*M+M*NB)
1538            **/
1539           dorgbr( 'q', m, m, m, u, ldu, &work_1( itauq ),
1540                  &work_1( iwork ), lwork-iwork+1, &ierr );
1541           iwork = ie + m;
1542           /**
1543            *                    Perform bidiagonal QR iteration, computing left
1544            *                    singular vectors of L in U and computing right
1545            *                    singular vectors of L in WORK(IU)
1546            *                    (Workspace: need M*M+5*M-4)
1547            **/
1548           dbdsqr( 'u', m, m, m, 0, s, &work_1( ie ),
1549                  &work_1( iu ), ldwrku, u, ldu, dum, 1,
1550                  &work_1( iwork ), info );
1551           /**
1552            *                    Multiply right singular vectors of L in WORK(IU) by
1553            *                    Q in VT, storing result in A
1554            *                    (Workspace: need M*M)
1555            **/
1556           cblas_dgemm(CblasColMajor, CblasNoTrans, CblasNoTrans, m, n, m,
1557                       one, &work_1( iu ), ldwrku, vt, ldvt, zero, a, lda );
1558           /**
1559            *                    Copy right singular vectors of A from A to VT
1560            **/
1561           dlacpy( 'f', m, n, a, lda, vt, ldvt );
1562 
1563         } else {
1564           /**
1565            *                    Insufficient workspace for a fast algorithm
1566            **/
1567           itau = 1;
1568           iwork = itau + m;
1569           /**
1570            *                    Compute A=L*Q, copying result to VT
1571            *                    (Workspace: need 2*M, prefer M+M*NB)
1572            **/
1573           dgelqf( m, n, a, lda, &work_1( itau ),
1574                  &work_1( iwork ), lwork-iwork+1, &ierr );
1575           dlacpy( 'u', m, n, a, lda, vt, ldvt );
1576           /**
1577            *                    Generate Q in VT
1578            *                    (Workspace: need M+N, prefer M+N*NB)
1579            **/
1580           dorglq( n, n, m, vt, ldvt, &work_1( itau ),
1581                  &work_1( iwork ), lwork-iwork+1, &ierr );
1582           /**
1583            *                    Copy L to U, zeroing out above it
1584            **/
1585           dlacpy( 'l', m, m, a, lda, u, ldu );
1586           dlaset( 'u', m-1, m-1, zero, zero, &u_2( 1, 2 ),
1587                  ldu );
1588           ie = itau;
1589           itauq = ie + m;
1590           itaup = itauq + m;
1591           iwork = itaup + m;
1592           /**
1593            *                    Bidiagonalize L in U
1594            *                    (Workspace: need 4*M, prefer 3*M+2*M*NB)
1595            **/
1596           dgebrd( m, m, u, ldu, s, &work_1( ie ),
1597                  &work_1( itauq ), &work_1( itaup ),
1598                  &work_1( iwork ), lwork-iwork+1, &ierr );
1599           /**
1600            *                    Multiply right bidiagonalizing vectors in U by Q
1601            *                    in VT
1602            *                    (Workspace: need 3*M+N, prefer 3*M+N*NB)
1603            **/
1604           dormbr( 'p', 'l', 't', m, n, m, u, ldu,
1605                  &work_1( itaup ), vt, ldvt,
1606                  &work_1( iwork ), lwork-iwork+1, &ierr );
1607           /**
1608            *                    Generate left bidiagonalizing vectors in U
1609            *                    (Workspace: need 4*M, prefer 3*M+M*NB)
1610            **/
1611           dorgbr( 'q', m, m, m, u, ldu, &work_1( itauq ),
1612                  &work_1( iwork ), lwork-iwork+1, &ierr );
1613           iwork = ie + m;
1614           /**
1615            *                    Perform bidiagonal QR iteration, computing left
1616            *                    singular vectors of A in U and computing right
1617            *                    singular vectors of A in VT
1618            *                    (Workspace: need 5*M-4)
1619            **/
1620           dbdsqr( 'u', m, n, m, 0, s, &work_1( ie ), vt,
1621                  ldvt, u, ldu, dum, 1, &work_1( iwork ),
1622                  info );
1623 
1624         }
1625 
1626       }
1627 
1628     }
1629 
1630   } else {
1631     /**
1632      *           N .LT. MNTHR
1633      *
1634      *           Path 10t(N greater than M, but not much larger)
1635      *           Reduce to bidiagonal form without LQ decomposition
1636      **/
1637     ie = 1;
1638     itauq = ie + m;
1639     itaup = itauq + m;
1640     iwork = itaup + m;
1641     /**
1642      *           Bidiagonalize A
1643      *           (Workspace: need 3*M+N, prefer 3*M+(M+N)*NB)
1644      **/
1645     dgebrd( m, n, a, lda, s, &work_1( ie ), &work_1( itauq ),
1646            &work_1( itaup ), &work_1( iwork ), lwork-iwork+1,
1647            &ierr );
1648     if( wntuas ) {
1649       /**
1650        *              If left singular vectors desired in U, copy result to U
1651        *              and generate left bidiagonalizing vectors in U
1652        *              (Workspace: need 4*M-1, prefer 3*M+(M-1)*NB)
1653        **/
1654       dlacpy( 'l', m, m, a, lda, u, ldu );
1655       dorgbr( 'q', m, m, n, u, ldu, &work_1( itauq ),
1656              &work_1( iwork ), lwork-iwork+1, &ierr );
1657     }
1658     if( wntvas ) {
1659       /**
1660        *              If right singular vectors desired in VT, copy result to
1661        *              VT and generate right bidiagonalizing vectors in VT
1662        *              (Workspace: need 3*M+NRVT, prefer 3*M+NRVT*NB)
1663        **/
1664       dlacpy( 'u', m, n, a, lda, vt, ldvt );
1665       if( wntva )
1666         nrvt = n;
1667       if( wntvs )
1668         nrvt = m;
1669       dorgbr( 'p', nrvt, n, m, vt, ldvt, &work_1( itaup ),
1670              &work_1( iwork ), lwork-iwork+1, &ierr );
1671     }
1672     if( wntuo ) {
1673       /**
1674        *              If left singular vectors desired in A, generate left
1675        *              bidiagonalizing vectors in A
1676        *              (Workspace: need 4*M-1, prefer 3*M+(M-1)*NB)
1677        **/
1678       dorgbr( 'q', m, m, n, a, lda, &work_1( itauq ),
1679              &work_1( iwork ), lwork-iwork+1, &ierr );
1680     }
1681     if( wntvo ) {
1682       /**
1683        *              If right singular vectors desired in A, generate right
1684        *              bidiagonalizing vectors in A
1685        *              (Workspace: need 4*M, prefer 3*M+M*NB)
1686        **/
1687       dorgbr( 'p', m, n, m, a, lda, &work_1( itaup ),
1688              &work_1( iwork ), lwork-iwork+1, &ierr );
1689     }
1690     iwork = ie + m;
1691     if( wntuas || wntuo )
1692       nru = m;
1693     if( wntun )
1694       nru = 0;
1695     if( wntvas || wntvo )
1696       ncvt = n;
1697     if( wntvn )
1698       ncvt = 0;
1699     if( ( !wntuo ) && ( !wntvo ) ) {
1700       /**
1701        *              Perform bidiagonal QR iteration, if desired, computing
1702        *              left singular vectors in U and computing right singular
1703        *              vectors in VT
1704        *              (Workspace: need 5*M-4)
1705        **/
1706       dbdsqr( 'l', m, ncvt, nru, 0, s, &work_1( ie ), vt,
1707              ldvt, u, ldu, dum, 1, &work_1( iwork ), info );
1708     } else if( ( !wntuo ) && wntvo ) {
1709       /**
1710        *              Perform bidiagonal QR iteration, if desired, computing
1711        *              left singular vectors in U and computing right singular
1712        *              vectors in A
1713        *              (Workspace: need 5*M-4)
1714        **/
1715       dbdsqr( 'l', m, ncvt, nru, 0, s, &work_1( ie ), a, lda,
1716              u, ldu, dum, 1, &work_1( iwork ), info );
1717     } else {
1718       /**
1719        *              Perform bidiagonal QR iteration, if desired, computing
1720        *              left singular vectors in A and computing right singular
1721        *              vectors in VT
1722        *              (Workspace: need 5*M-4)
1723        **/
1724       dbdsqr( 'l', m, ncvt, nru, 0, s, &work_1( ie ), vt,
1725              ldvt, a, lda, dum, 1, &work_1( iwork ), info );
1726     }
1727 
1728   }
1729 
1730   /**
1731    *     If DBDSQR failed to converge, copy unconverged superdiagonals
1732    *     to WORK( 2:MINMN )
1733    **/
1734   if( *info!=0 ) {
1735     if( ie>2 ) {
1736       for (i=1 ; i<=minmn - 1 ; i+=1) {
1737         work_1( i+1 ) = work_1( i+ie-1 );
1738       }
1739     }
1740     if( ie<2 ) {
1741       for (i=minmn - 1 ; i>=1 ; i+=-1) {
1742         work_1( i+1 ) = work_1( i+ie-1 );
1743       }
1744     }
1745   }
1746   /**
1747    *     Undo scaling if necessary
1748    **/
1749   if( iscl==1 ) {
1750     if( anrm>bignum )
1751       dlascl( 'g', 0, 0, bignum, anrm, minmn, 1, s, minmn,
1752              &ierr );
1753     if( *info!=0 && anrm>bignum )
1754       dlascl( 'g', 0, 0, bignum, anrm, minmn-1, 1, &work_1( 2 ),
1755              minmn, &ierr );
1756     if( anrm<smlnum )
1757       dlascl( 'g', 0, 0, smlnum, anrm, minmn, 1, s, minmn,
1758              &ierr );
1759     if( *info!=0 && anrm<smlnum )
1760       dlascl( 'g', 0, 0, smlnum, anrm, minmn-1, 1, &work_1( 2 ),
1761              minmn, &ierr );
1762   }
1763   /**
1764    *     Return optimal workspace in WORK(1)
1765    **/
1766   work_1( 1 ) = maxwrk;
1767 
1768   return;
1769   /**
1770    *     End of DGESVD
1771    **/
1772 }
1773