1      SUBROUTINE DGESVD( JOBU, JOBVT, M, N, A, LDA, S, U, LDU, VT, LDVT,
2     $                   WORK, LWORK, INFO )
3*
4*  -- LAPACK driver routine (version 3.0) --
5*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
6*     Courant Institute, Argonne National Lab, and Rice University
7*     October 31, 1999
8*
9*     .. Scalar Arguments ..
10      CHARACTER          JOBU, JOBVT
11      INTEGER            INFO, LDA, LDU, LDVT, LWORK, M, N
12*     ..
13*     .. Array Arguments ..
14      DOUBLE PRECISION   A( LDA, * ), S( * ), U( LDU, * ),
15     $                   VT( LDVT, * ), WORK( * )
16*     ..
17*
18*  Purpose
19*  =======
20*
21*  DGESVD computes the singular value decomposition (SVD) of a real
22*  M-by-N matrix A, optionally computing the left and/or right singular
23*  vectors. The SVD is written
24*
25*       A = U * SIGMA * transpose(V)
26*
27*  where SIGMA is an M-by-N matrix which is zero except for its
28*  min(m,n) diagonal elements, U is an M-by-M orthogonal matrix, and
29*  V is an N-by-N orthogonal matrix.  The diagonal elements of SIGMA
30*  are the singular values of A; they are real and non-negative, and
31*  are returned in descending order.  The first min(m,n) columns of
32*  U and V are the left and right singular vectors of A.
33*
34*  Note that the routine returns V**T, not V.
35*
36*  Arguments
37*  =========
38*
39*  JOBU    (input) CHARACTER*1
40*          Specifies options for computing all or part of the matrix U:
41*          = 'A':  all M columns of U are returned in array U:
42*          = 'S':  the first min(m,n) columns of U (the left singular
43*                  vectors) are returned in the array U;
44*          = 'O':  the first min(m,n) columns of U (the left singular
45*                  vectors) are overwritten on the array A;
46*          = 'N':  no columns of U (no left singular vectors) are
47*                  computed.
48*
49*  JOBVT   (input) CHARACTER*1
50*          Specifies options for computing all or part of the matrix
51*          V**T:
52*          = 'A':  all N rows of V**T are returned in the array VT;
53*          = 'S':  the first min(m,n) rows of V**T (the right singular
54*                  vectors) are returned in the array VT;
55*          = 'O':  the first min(m,n) rows of V**T (the right singular
56*                  vectors) are overwritten on the array A;
57*          = 'N':  no rows of V**T (no right singular vectors) are
58*                  computed.
59*
60*          JOBVT and JOBU cannot both be 'O'.
61*
62*  M       (input) INTEGER
63*          The number of rows of the input matrix A.  M >= 0.
64*
65*  N       (input) INTEGER
66*          The number of columns of the input matrix A.  N >= 0.
67*
68*  A       (input/output) DOUBLE PRECISION array, dimension (LDA,N)
69*          On entry, the M-by-N matrix A.
70*          On exit,
71*          if JOBU = 'O',  A is overwritten with the first min(m,n)
72*                          columns of U (the left singular vectors,
73*                          stored columnwise);
74*          if JOBVT = 'O', A is overwritten with the first min(m,n)
75*                          rows of V**T (the right singular vectors,
76*                          stored rowwise);
77*          if JOBU .ne. 'O' and JOBVT .ne. 'O', the contents of A
78*                          are destroyed.
79*
80*  LDA     (input) INTEGER
81*          The leading dimension of the array A.  LDA >= max(1,M).
82*
83*  S       (output) DOUBLE PRECISION array, dimension (min(M,N))
84*          The singular values of A, sorted so that S(i) >= S(i+1).
85*
86*  U       (output) DOUBLE PRECISION array, dimension (LDU,UCOL)
87*          (LDU,M) if JOBU = 'A' or (LDU,min(M,N)) if JOBU = 'S'.
88*          If JOBU = 'A', U contains the M-by-M orthogonal matrix U;
89*          if JOBU = 'S', U contains the first min(m,n) columns of U
90*          (the left singular vectors, stored columnwise);
91*          if JOBU = 'N' or 'O', U is not referenced.
92*
93*  LDU     (input) INTEGER
94*          The leading dimension of the array U.  LDU >= 1; if
95*          JOBU = 'S' or 'A', LDU >= M.
96*
97*  VT      (output) DOUBLE PRECISION array, dimension (LDVT,N)
98*          If JOBVT = 'A', VT contains the N-by-N orthogonal matrix
99*          V**T;
100*          if JOBVT = 'S', VT contains the first min(m,n) rows of
101*          V**T (the right singular vectors, stored rowwise);
102*          if JOBVT = 'N' or 'O', VT is not referenced.
103*
104*  LDVT    (input) INTEGER
105*          The leading dimension of the array VT.  LDVT >= 1; if
106*          JOBVT = 'A', LDVT >= N; if JOBVT = 'S', LDVT >= min(M,N).
107*
108*  WORK    (workspace/output) DOUBLE PRECISION array, dimension (LWORK)
109*          On exit, if INFO = 0, WORK(1) returns the optimal LWORK;
110*          if INFO > 0, WORK(2:MIN(M,N)) contains the unconverged
111*          superdiagonal elements of an upper bidiagonal matrix B
112*          whose diagonal is in S (not necessarily sorted). B
113*          satisfies A = U * B * VT, so it has the same singular values
114*          as A, and singular vectors related by U and VT.
115*
116*  LWORK   (input) INTEGER
117*          The dimension of the array WORK. LWORK >= 1.
118*          LWORK >= MAX(3*MIN(M,N)+MAX(M,N),5*MIN(M,N)).
119*          For good performance, LWORK should generally be larger.
120*
121*          If LWORK = -1, then a workspace query is assumed; the routine
122*          only calculates the optimal size of the WORK array, returns
123*          this value as the first entry of the WORK array, and no error
124*          message related to LWORK is issued by XERBLA.
125*
126*  INFO    (output) INTEGER
127*          = 0:  successful exit.
128*          < 0:  if INFO = -i, the i-th argument had an illegal value.
129*          > 0:  if DBDSQR did not converge, INFO specifies how many
130*                superdiagonals of an intermediate bidiagonal form B
131*                did not converge to zero. See the description of WORK
132*                above for details.
133*
134*  =====================================================================
135*
136*     .. Parameters ..
137      DOUBLE PRECISION   ZERO, ONE
138      PARAMETER          ( ZERO = 0.0D0, ONE = 1.0D0 )
139*     ..
140*     .. Local Scalars ..
141      LOGICAL            LQUERY, WNTUA, WNTUAS, WNTUN, WNTUO, WNTUS,
142     $                   WNTVA, WNTVAS, WNTVN, WNTVO, WNTVS
143      INTEGER            BDSPAC, BLK, CHUNK, I, IE, IERR, IR, ISCL,
144     $                   ITAU, ITAUP, ITAUQ, IU, IWORK, LDWRKR, LDWRKU,
145     $                   MAXWRK, MINMN, MINWRK, MNTHR, NCU, NCVT, NRU,
146     $                   NRVT, WRKBL
147      DOUBLE PRECISION   ANRM, BIGNUM, EPS, SMLNUM
148*     ..
149*     .. Local Arrays ..
150      DOUBLE PRECISION   DUM( 1 )
151*     ..
152*     .. External Subroutines ..
153      EXTERNAL           DBDSQR, DGEBRD, DGELQF, DGEMM, DGEQRF, DLACPY,
154     $                   DLASCL, DLASET, DORGBR, DORGLQ, DORGQR, DORMBR,
155     $                   XERBLA
156*     ..
157*     .. External Functions ..
158      LOGICAL            LSAME
159      INTEGER            ILAENV
160      DOUBLE PRECISION   DLAMCH, DLANGE
161      EXTERNAL           LSAME, ILAENV, DLAMCH, DLANGE
162*     ..
163*     .. Intrinsic Functions ..
164      INTRINSIC          MAX, MIN, SQRT
165*     ..
166*     .. Executable Statements ..
167*
168*     Test the input arguments
169*
170      INFO = 0
171      MINMN = MIN( M, N )
172      MNTHR = ILAENV( 6, 'DGESVD', JOBU // JOBVT, M, N, 0, 0 )
173      WNTUA = LSAME( JOBU, 'A' )
174      WNTUS = LSAME( JOBU, 'S' )
175      WNTUAS = WNTUA .OR. WNTUS
176      WNTUO = LSAME( JOBU, 'O' )
177      WNTUN = LSAME( JOBU, 'N' )
178      WNTVA = LSAME( JOBVT, 'A' )
179      WNTVS = LSAME( JOBVT, 'S' )
180      WNTVAS = WNTVA .OR. WNTVS
181      WNTVO = LSAME( JOBVT, 'O' )
182      WNTVN = LSAME( JOBVT, 'N' )
183      MINWRK = 1
184      LQUERY = ( LWORK.EQ.-1 )
185*
186      IF( .NOT.( WNTUA .OR. WNTUS .OR. WNTUO .OR. WNTUN ) ) THEN
187         INFO = -1
188      ELSE IF( .NOT.( WNTVA .OR. WNTVS .OR. WNTVO .OR. WNTVN ) .OR.
189     $         ( WNTVO .AND. WNTUO ) ) THEN
190         INFO = -2
191      ELSE IF( M.LT.0 ) THEN
192         INFO = -3
193      ELSE IF( N.LT.0 ) THEN
194         INFO = -4
195      ELSE IF( LDA.LT.MAX( 1, M ) ) THEN
196         INFO = -6
197      ELSE IF( LDU.LT.1 .OR. ( WNTUAS .AND. LDU.LT.M ) ) THEN
198         INFO = -9
199      ELSE IF( LDVT.LT.1 .OR. ( WNTVA .AND. LDVT.LT.N ) .OR.
200     $         ( WNTVS .AND. LDVT.LT.MINMN ) ) THEN
201         INFO = -11
202      END IF
203*
204*     Compute workspace
205*      (Note: Comments in the code beginning "Workspace:" describe the
206*       minimal amount of workspace needed at that point in the code,
207*       as well as the preferred amount for good performance.
208*       NB refers to the optimal block size for the immediately
209*       following subroutine, as returned by ILAENV.)
210*
211      IF( INFO.EQ.0 .AND. ( LWORK.GE.1 .OR. LQUERY ) .AND. M.GT.0 .AND.
212     $    N.GT.0 ) THEN
213         IF( M.GE.N ) THEN
214*
215*           Compute space needed for DBDSQR
216*
217            BDSPAC = 5*N
218            IF( M.GE.MNTHR ) THEN
219               IF( WNTUN ) THEN
220*
221*                 Path 1 (M much larger than N, JOBU='N')
222*
223                  MAXWRK = N + N*ILAENV( 1, 'DGEQRF', ' ', M, N, -1,
224     $                     -1 )
225                  MAXWRK = MAX( MAXWRK, 3*N+2*N*
226     $                     ILAENV( 1, 'DGEBRD', ' ', N, N, -1, -1 ) )
227                  IF( WNTVO .OR. WNTVAS )
228     $               MAXWRK = MAX( MAXWRK, 3*N+( N-1 )*
229     $                        ILAENV( 1, 'DORGBR', 'P', N, N, N, -1 ) )
230                  MAXWRK = MAX( MAXWRK, BDSPAC )
231                  MINWRK = MAX( 4*N, BDSPAC )
232                  MAXWRK = MAX( MAXWRK, MINWRK )
233               ELSE IF( WNTUO .AND. WNTVN ) THEN
234*
235*                 Path 2 (M much larger than N, JOBU='O', JOBVT='N')
236*
237                  WRKBL = N + N*ILAENV( 1, 'DGEQRF', ' ', M, N, -1, -1 )
238                  WRKBL = MAX( WRKBL, N+N*ILAENV( 1, 'DORGQR', ' ', M,
239     $                    N, N, -1 ) )
240                  WRKBL = MAX( WRKBL, 3*N+2*N*
241     $                    ILAENV( 1, 'DGEBRD', ' ', N, N, -1, -1 ) )
242                  WRKBL = MAX( WRKBL, 3*N+N*
243     $                    ILAENV( 1, 'DORGBR', 'Q', N, N, N, -1 ) )
244                  WRKBL = MAX( WRKBL, BDSPAC )
245                  MAXWRK = MAX( N*N+WRKBL, N*N+M*N+N )
246                  MINWRK = MAX( 3*N+M, BDSPAC )
247                  MAXWRK = MAX( MAXWRK, MINWRK )
248               ELSE IF( WNTUO .AND. WNTVAS ) THEN
249*
250*                 Path 3 (M much larger than N, JOBU='O', JOBVT='S' or
251*                 'A')
252*
253                  WRKBL = N + N*ILAENV( 1, 'DGEQRF', ' ', M, N, -1, -1 )
254                  WRKBL = MAX( WRKBL, N+N*ILAENV( 1, 'DORGQR', ' ', M,
255     $                    N, N, -1 ) )
256                  WRKBL = MAX( WRKBL, 3*N+2*N*
257     $                    ILAENV( 1, 'DGEBRD', ' ', N, N, -1, -1 ) )
258                  WRKBL = MAX( WRKBL, 3*N+N*
259     $                    ILAENV( 1, 'DORGBR', 'Q', N, N, N, -1 ) )
260                  WRKBL = MAX( WRKBL, 3*N+( N-1 )*
261     $                    ILAENV( 1, 'DORGBR', 'P', N, N, N, -1 ) )
262                  WRKBL = MAX( WRKBL, BDSPAC )
263                  MAXWRK = MAX( N*N+WRKBL, N*N+M*N+N )
264                  MINWRK = MAX( 3*N+M, BDSPAC )
265                  MAXWRK = MAX( MAXWRK, MINWRK )
266               ELSE IF( WNTUS .AND. WNTVN ) THEN
267*
268*                 Path 4 (M much larger than N, JOBU='S', JOBVT='N')
269*
270                  WRKBL = N + N*ILAENV( 1, 'DGEQRF', ' ', M, N, -1, -1 )
271                  WRKBL = MAX( WRKBL, N+N*ILAENV( 1, 'DORGQR', ' ', M,
272     $                    N, N, -1 ) )
273                  WRKBL = MAX( WRKBL, 3*N+2*N*
274     $                    ILAENV( 1, 'DGEBRD', ' ', N, N, -1, -1 ) )
275                  WRKBL = MAX( WRKBL, 3*N+N*
276     $                    ILAENV( 1, 'DORGBR', 'Q', N, N, N, -1 ) )
277                  WRKBL = MAX( WRKBL, BDSPAC )
278                  MAXWRK = N*N + WRKBL
279                  MINWRK = MAX( 3*N+M, BDSPAC )
280                  MAXWRK = MAX( MAXWRK, MINWRK )
281               ELSE IF( WNTUS .AND. WNTVO ) THEN
282*
283*                 Path 5 (M much larger than N, JOBU='S', JOBVT='O')
284*
285                  WRKBL = N + N*ILAENV( 1, 'DGEQRF', ' ', M, N, -1, -1 )
286                  WRKBL = MAX( WRKBL, N+N*ILAENV( 1, 'DORGQR', ' ', M,
287     $                    N, N, -1 ) )
288                  WRKBL = MAX( WRKBL, 3*N+2*N*
289     $                    ILAENV( 1, 'DGEBRD', ' ', N, N, -1, -1 ) )
290                  WRKBL = MAX( WRKBL, 3*N+N*
291     $                    ILAENV( 1, 'DORGBR', 'Q', N, N, N, -1 ) )
292                  WRKBL = MAX( WRKBL, 3*N+( N-1 )*
293     $                    ILAENV( 1, 'DORGBR', 'P', N, N, N, -1 ) )
294                  WRKBL = MAX( WRKBL, BDSPAC )
295                  MAXWRK = 2*N*N + WRKBL
296                  MINWRK = MAX( 3*N+M, BDSPAC )
297                  MAXWRK = MAX( MAXWRK, MINWRK )
298               ELSE IF( WNTUS .AND. WNTVAS ) THEN
299*
300*                 Path 6 (M much larger than N, JOBU='S', JOBVT='S' or
301*                 'A')
302*
303                  WRKBL = N + N*ILAENV( 1, 'DGEQRF', ' ', M, N, -1, -1 )
304                  WRKBL = MAX( WRKBL, N+N*ILAENV( 1, 'DORGQR', ' ', M,
305     $                    N, N, -1 ) )
306                  WRKBL = MAX( WRKBL, 3*N+2*N*
307     $                    ILAENV( 1, 'DGEBRD', ' ', N, N, -1, -1 ) )
308                  WRKBL = MAX( WRKBL, 3*N+N*
309     $                    ILAENV( 1, 'DORGBR', 'Q', N, N, N, -1 ) )
310                  WRKBL = MAX( WRKBL, 3*N+( N-1 )*
311     $                    ILAENV( 1, 'DORGBR', 'P', N, N, N, -1 ) )
312                  WRKBL = MAX( WRKBL, BDSPAC )
313                  MAXWRK = N*N + WRKBL
314                  MINWRK = MAX( 3*N+M, BDSPAC )
315                  MAXWRK = MAX( MAXWRK, MINWRK )
316               ELSE IF( WNTUA .AND. WNTVN ) THEN
317*
318*                 Path 7 (M much larger than N, JOBU='A', JOBVT='N')
319*
320                  WRKBL = N + N*ILAENV( 1, 'DGEQRF', ' ', M, N, -1, -1 )
321                  WRKBL = MAX( WRKBL, N+M*ILAENV( 1, 'DORGQR', ' ', M,
322     $                    M, N, -1 ) )
323                  WRKBL = MAX( WRKBL, 3*N+2*N*
324     $                    ILAENV( 1, 'DGEBRD', ' ', N, N, -1, -1 ) )
325                  WRKBL = MAX( WRKBL, 3*N+N*
326     $                    ILAENV( 1, 'DORGBR', 'Q', N, N, N, -1 ) )
327                  WRKBL = MAX( WRKBL, BDSPAC )
328                  MAXWRK = N*N + WRKBL
329                  MINWRK = MAX( 3*N+M, BDSPAC )
330                  MAXWRK = MAX( MAXWRK, MINWRK )
331               ELSE IF( WNTUA .AND. WNTVO ) THEN
332*
333*                 Path 8 (M much larger than N, JOBU='A', JOBVT='O')
334*
335                  WRKBL = N + N*ILAENV( 1, 'DGEQRF', ' ', M, N, -1, -1 )
336                  WRKBL = MAX( WRKBL, N+M*ILAENV( 1, 'DORGQR', ' ', M,
337     $                    M, N, -1 ) )
338                  WRKBL = MAX( WRKBL, 3*N+2*N*
339     $                    ILAENV( 1, 'DGEBRD', ' ', N, N, -1, -1 ) )
340                  WRKBL = MAX( WRKBL, 3*N+N*
341     $                    ILAENV( 1, 'DORGBR', 'Q', N, N, N, -1 ) )
342                  WRKBL = MAX( WRKBL, 3*N+( N-1 )*
343     $                    ILAENV( 1, 'DORGBR', 'P', N, N, N, -1 ) )
344                  WRKBL = MAX( WRKBL, BDSPAC )
345                  MAXWRK = 2*N*N + WRKBL
346                  MINWRK = MAX( 3*N+M, BDSPAC )
347                  MAXWRK = MAX( MAXWRK, MINWRK )
348               ELSE IF( WNTUA .AND. WNTVAS ) THEN
349*
350*                 Path 9 (M much larger than N, JOBU='A', JOBVT='S' or
351*                 'A')
352*
353                  WRKBL = N + N*ILAENV( 1, 'DGEQRF', ' ', M, N, -1, -1 )
354                  WRKBL = MAX( WRKBL, N+M*ILAENV( 1, 'DORGQR', ' ', M,
355     $                    M, N, -1 ) )
356                  WRKBL = MAX( WRKBL, 3*N+2*N*
357     $                    ILAENV( 1, 'DGEBRD', ' ', N, N, -1, -1 ) )
358                  WRKBL = MAX( WRKBL, 3*N+N*
359     $                    ILAENV( 1, 'DORGBR', 'Q', N, N, N, -1 ) )
360                  WRKBL = MAX( WRKBL, 3*N+( N-1 )*
361     $                    ILAENV( 1, 'DORGBR', 'P', N, N, N, -1 ) )
362                  WRKBL = MAX( WRKBL, BDSPAC )
363                  MAXWRK = N*N + WRKBL
364                  MINWRK = MAX( 3*N+M, BDSPAC )
365                  MAXWRK = MAX( MAXWRK, MINWRK )
366               END IF
367            ELSE
368*
369*              Path 10 (M at least N, but not much larger)
370*
371               MAXWRK = 3*N + ( M+N )*ILAENV( 1, 'DGEBRD', ' ', M, N,
372     $                  -1, -1 )
373               IF( WNTUS .OR. WNTUO )
374     $            MAXWRK = MAX( MAXWRK, 3*N+N*
375     $                     ILAENV( 1, 'DORGBR', 'Q', M, N, N, -1 ) )
376               IF( WNTUA )
377     $            MAXWRK = MAX( MAXWRK, 3*N+M*
378     $                     ILAENV( 1, 'DORGBR', 'Q', M, M, N, -1 ) )
379               IF( .NOT.WNTVN )
380     $            MAXWRK = MAX( MAXWRK, 3*N+( N-1 )*
381     $                     ILAENV( 1, 'DORGBR', 'P', N, N, N, -1 ) )
382               MAXWRK = MAX( MAXWRK, BDSPAC )
383               MINWRK = MAX( 3*N+M, BDSPAC )
384               MAXWRK = MAX( MAXWRK, MINWRK )
385            END IF
386         ELSE
387*
388*           Compute space needed for DBDSQR
389*
390            BDSPAC = 5*M
391            IF( N.GE.MNTHR ) THEN
392               IF( WNTVN ) THEN
393*
394*                 Path 1t(N much larger than M, JOBVT='N')
395*
396                  MAXWRK = M + M*ILAENV( 1, 'DGELQF', ' ', M, N, -1,
397     $                     -1 )
398                  MAXWRK = MAX( MAXWRK, 3*M+2*M*
399     $                     ILAENV( 1, 'DGEBRD', ' ', M, M, -1, -1 ) )
400                  IF( WNTUO .OR. WNTUAS )
401     $               MAXWRK = MAX( MAXWRK, 3*M+M*
402     $                        ILAENV( 1, 'DORGBR', 'Q', M, M, M, -1 ) )
403                  MAXWRK = MAX( MAXWRK, BDSPAC )
404                  MINWRK = MAX( 4*M, BDSPAC )
405                  MAXWRK = MAX( MAXWRK, MINWRK )
406               ELSE IF( WNTVO .AND. WNTUN ) THEN
407*
408*                 Path 2t(N much larger than M, JOBU='N', JOBVT='O')
409*
410                  WRKBL = M + M*ILAENV( 1, 'DGELQF', ' ', M, N, -1, -1 )
411                  WRKBL = MAX( WRKBL, M+M*ILAENV( 1, 'DORGLQ', ' ', M,
412     $                    N, M, -1 ) )
413                  WRKBL = MAX( WRKBL, 3*M+2*M*
414     $                    ILAENV( 1, 'DGEBRD', ' ', M, M, -1, -1 ) )
415                  WRKBL = MAX( WRKBL, 3*M+( M-1 )*
416     $                    ILAENV( 1, 'DORGBR', 'P', M, M, M, -1 ) )
417                  WRKBL = MAX( WRKBL, BDSPAC )
418                  MAXWRK = MAX( M*M+WRKBL, M*M+M*N+M )
419                  MINWRK = MAX( 3*M+N, BDSPAC )
420                  MAXWRK = MAX( MAXWRK, MINWRK )
421               ELSE IF( WNTVO .AND. WNTUAS ) THEN
422*
423*                 Path 3t(N much larger than M, JOBU='S' or 'A',
424*                 JOBVT='O')
425*
426                  WRKBL = M + M*ILAENV( 1, 'DGELQF', ' ', M, N, -1, -1 )
427                  WRKBL = MAX( WRKBL, M+M*ILAENV( 1, 'DORGLQ', ' ', M,
428     $                    N, M, -1 ) )
429                  WRKBL = MAX( WRKBL, 3*M+2*M*
430     $                    ILAENV( 1, 'DGEBRD', ' ', M, M, -1, -1 ) )
431                  WRKBL = MAX( WRKBL, 3*M+( M-1 )*
432     $                    ILAENV( 1, 'DORGBR', 'P', M, M, M, -1 ) )
433                  WRKBL = MAX( WRKBL, 3*M+M*
434     $                    ILAENV( 1, 'DORGBR', 'Q', M, M, M, -1 ) )
435                  WRKBL = MAX( WRKBL, BDSPAC )
436                  MAXWRK = MAX( M*M+WRKBL, M*M+M*N+M )
437                  MINWRK = MAX( 3*M+N, BDSPAC )
438                  MAXWRK = MAX( MAXWRK, MINWRK )
439               ELSE IF( WNTVS .AND. WNTUN ) THEN
440*
441*                 Path 4t(N much larger than M, JOBU='N', JOBVT='S')
442*
443                  WRKBL = M + M*ILAENV( 1, 'DGELQF', ' ', M, N, -1, -1 )
444                  WRKBL = MAX( WRKBL, M+M*ILAENV( 1, 'DORGLQ', ' ', M,
445     $                    N, M, -1 ) )
446                  WRKBL = MAX( WRKBL, 3*M+2*M*
447     $                    ILAENV( 1, 'DGEBRD', ' ', M, M, -1, -1 ) )
448                  WRKBL = MAX( WRKBL, 3*M+( M-1 )*
449     $                    ILAENV( 1, 'DORGBR', 'P', M, M, M, -1 ) )
450                  WRKBL = MAX( WRKBL, BDSPAC )
451                  MAXWRK = M*M + WRKBL
452                  MINWRK = MAX( 3*M+N, BDSPAC )
453                  MAXWRK = MAX( MAXWRK, MINWRK )
454               ELSE IF( WNTVS .AND. WNTUO ) THEN
455*
456*                 Path 5t(N much larger than M, JOBU='O', JOBVT='S')
457*
458                  WRKBL = M + M*ILAENV( 1, 'DGELQF', ' ', M, N, -1, -1 )
459                  WRKBL = MAX( WRKBL, M+M*ILAENV( 1, 'DORGLQ', ' ', M,
460     $                    N, M, -1 ) )
461                  WRKBL = MAX( WRKBL, 3*M+2*M*
462     $                    ILAENV( 1, 'DGEBRD', ' ', M, M, -1, -1 ) )
463                  WRKBL = MAX( WRKBL, 3*M+( M-1 )*
464     $                    ILAENV( 1, 'DORGBR', 'P', M, M, M, -1 ) )
465                  WRKBL = MAX( WRKBL, 3*M+M*
466     $                    ILAENV( 1, 'DORGBR', 'Q', M, M, M, -1 ) )
467                  WRKBL = MAX( WRKBL, BDSPAC )
468                  MAXWRK = 2*M*M + WRKBL
469                  MINWRK = MAX( 3*M+N, BDSPAC )
470                  MAXWRK = MAX( MAXWRK, MINWRK )
471               ELSE IF( WNTVS .AND. WNTUAS ) THEN
472*
473*                 Path 6t(N much larger than M, JOBU='S' or 'A',
474*                 JOBVT='S')
475*
476                  WRKBL = M + M*ILAENV( 1, 'DGELQF', ' ', M, N, -1, -1 )
477                  WRKBL = MAX( WRKBL, M+M*ILAENV( 1, 'DORGLQ', ' ', M,
478     $                    N, M, -1 ) )
479                  WRKBL = MAX( WRKBL, 3*M+2*M*
480     $                    ILAENV( 1, 'DGEBRD', ' ', M, M, -1, -1 ) )
481                  WRKBL = MAX( WRKBL, 3*M+( M-1 )*
482     $                    ILAENV( 1, 'DORGBR', 'P', M, M, M, -1 ) )
483                  WRKBL = MAX( WRKBL, 3*M+M*
484     $                    ILAENV( 1, 'DORGBR', 'Q', M, M, M, -1 ) )
485                  WRKBL = MAX( WRKBL, BDSPAC )
486                  MAXWRK = M*M + WRKBL
487                  MINWRK = MAX( 3*M+N, BDSPAC )
488                  MAXWRK = MAX( MAXWRK, MINWRK )
489               ELSE IF( WNTVA .AND. WNTUN ) THEN
490*
491*                 Path 7t(N much larger than M, JOBU='N', JOBVT='A')
492*
493                  WRKBL = M + M*ILAENV( 1, 'DGELQF', ' ', M, N, -1, -1 )
494                  WRKBL = MAX( WRKBL, M+N*ILAENV( 1, 'DORGLQ', ' ', N,
495     $                    N, M, -1 ) )
496                  WRKBL = MAX( WRKBL, 3*M+2*M*
497     $                    ILAENV( 1, 'DGEBRD', ' ', M, M, -1, -1 ) )
498                  WRKBL = MAX( WRKBL, 3*M+( M-1 )*
499     $                    ILAENV( 1, 'DORGBR', 'P', M, M, M, -1 ) )
500                  WRKBL = MAX( WRKBL, BDSPAC )
501                  MAXWRK = M*M + WRKBL
502                  MINWRK = MAX( 3*M+N, BDSPAC )
503                  MAXWRK = MAX( MAXWRK, MINWRK )
504               ELSE IF( WNTVA .AND. WNTUO ) THEN
505*
506*                 Path 8t(N much larger than M, JOBU='O', JOBVT='A')
507*
508                  WRKBL = M + M*ILAENV( 1, 'DGELQF', ' ', M, N, -1, -1 )
509                  WRKBL = MAX( WRKBL, M+N*ILAENV( 1, 'DORGLQ', ' ', N,
510     $                    N, M, -1 ) )
511                  WRKBL = MAX( WRKBL, 3*M+2*M*
512     $                    ILAENV( 1, 'DGEBRD', ' ', M, M, -1, -1 ) )
513                  WRKBL = MAX( WRKBL, 3*M+( M-1 )*
514     $                    ILAENV( 1, 'DORGBR', 'P', M, M, M, -1 ) )
515                  WRKBL = MAX( WRKBL, 3*M+M*
516     $                    ILAENV( 1, 'DORGBR', 'Q', M, M, M, -1 ) )
517                  WRKBL = MAX( WRKBL, BDSPAC )
518                  MAXWRK = 2*M*M + WRKBL
519                  MINWRK = MAX( 3*M+N, BDSPAC )
520                  MAXWRK = MAX( MAXWRK, MINWRK )
521               ELSE IF( WNTVA .AND. WNTUAS ) THEN
522*
523*                 Path 9t(N much larger than M, JOBU='S' or 'A',
524*                 JOBVT='A')
525*
526                  WRKBL = M + M*ILAENV( 1, 'DGELQF', ' ', M, N, -1, -1 )
527                  WRKBL = MAX( WRKBL, M+N*ILAENV( 1, 'DORGLQ', ' ', N,
528     $                    N, M, -1 ) )
529                  WRKBL = MAX( WRKBL, 3*M+2*M*
530     $                    ILAENV( 1, 'DGEBRD', ' ', M, M, -1, -1 ) )
531                  WRKBL = MAX( WRKBL, 3*M+( M-1 )*
532     $                    ILAENV( 1, 'DORGBR', 'P', M, M, M, -1 ) )
533                  WRKBL = MAX( WRKBL, 3*M+M*
534     $                    ILAENV( 1, 'DORGBR', 'Q', M, M, M, -1 ) )
535                  WRKBL = MAX( WRKBL, BDSPAC )
536                  MAXWRK = M*M + WRKBL
537                  MINWRK = MAX( 3*M+N, BDSPAC )
538                  MAXWRK = MAX( MAXWRK, MINWRK )
539               END IF
540            ELSE
541*
542*              Path 10t(N greater than M, but not much larger)
543*
544               MAXWRK = 3*M + ( M+N )*ILAENV( 1, 'DGEBRD', ' ', M, N,
545     $                  -1, -1 )
546               IF( WNTVS .OR. WNTVO )
547     $            MAXWRK = MAX( MAXWRK, 3*M+M*
548     $                     ILAENV( 1, 'DORGBR', 'P', M, N, M, -1 ) )
549               IF( WNTVA )
550     $            MAXWRK = MAX( MAXWRK, 3*M+N*
551     $                     ILAENV( 1, 'DORGBR', 'P', N, N, M, -1 ) )
552               IF( .NOT.WNTUN )
553     $            MAXWRK = MAX( MAXWRK, 3*M+( M-1 )*
554     $                     ILAENV( 1, 'DORGBR', 'Q', M, M, M, -1 ) )
555               MAXWRK = MAX( MAXWRK, BDSPAC )
556               MINWRK = MAX( 3*M+N, BDSPAC )
557               MAXWRK = MAX( MAXWRK, MINWRK )
558            END IF
559         END IF
560         WORK( 1 ) = MAXWRK
561      END IF
562*
563      IF( LWORK.LT.MINWRK .AND. .NOT.LQUERY ) THEN
564         INFO = -13
565      END IF
566      IF( INFO.NE.0 ) THEN
567         CALL XERBLA( 'DGESVD', -INFO )
568         RETURN
569      ELSE IF( LQUERY ) THEN
570         RETURN
571      END IF
572*
573*     Quick return if possible
574*
575      IF( M.EQ.0 .OR. N.EQ.0 ) THEN
576         IF( LWORK.GE.1 )
577     $      WORK( 1 ) = ONE
578         RETURN
579      END IF
580*
581*     Get machine constants
582*
583      EPS = DLAMCH( 'P' )
584      SMLNUM = SQRT( DLAMCH( 'S' ) ) / EPS
585      BIGNUM = ONE / SMLNUM
586*
587*     Scale A if max element outside range [SMLNUM,BIGNUM]
588*
589      ANRM = DLANGE( 'M', M, N, A, LDA, DUM )
590      ISCL = 0
591      IF( ANRM.GT.ZERO .AND. ANRM.LT.SMLNUM ) THEN
592         ISCL = 1
593         CALL DLASCL( 'G', 0, 0, ANRM, SMLNUM, M, N, A, LDA, IERR )
594      ELSE IF( ANRM.GT.BIGNUM ) THEN
595         ISCL = 1
596         CALL DLASCL( 'G', 0, 0, ANRM, BIGNUM, M, N, A, LDA, IERR )
597      END IF
598*
599      IF( M.GE.N ) THEN
600*
601*        A has at least as many rows as columns. If A has sufficiently
602*        more rows than columns, first reduce using the QR
603*        decomposition (if sufficient workspace available)
604*
605         IF( M.GE.MNTHR ) THEN
606*
607            IF( WNTUN ) THEN
608*
609*              Path 1 (M much larger than N, JOBU='N')
610*              No left singular vectors to be computed
611*
612               ITAU = 1
613               IWORK = ITAU + N
614*
615*              Compute A=Q*R
616*              (Workspace: need 2*N, prefer N+N*NB)
617*
618               CALL DGEQRF( M, N, A, LDA, WORK( ITAU ), WORK( IWORK ),
619     $                      LWORK-IWORK+1, IERR )
620*
621*              Zero out below R
622*
623               CALL DLASET( 'L', N-1, N-1, ZERO, ZERO, A( 2, 1 ), LDA )
624               IE = 1
625               ITAUQ = IE + N
626               ITAUP = ITAUQ + N
627               IWORK = ITAUP + N
628*
629*              Bidiagonalize R in A
630*              (Workspace: need 4*N, prefer 3*N+2*N*NB)
631*
632               CALL DGEBRD( N, N, A, LDA, S, WORK( IE ), WORK( ITAUQ ),
633     $                      WORK( ITAUP ), WORK( IWORK ), LWORK-IWORK+1,
634     $                      IERR )
635               NCVT = 0
636               IF( WNTVO .OR. WNTVAS ) THEN
637*
638*                 If right singular vectors desired, generate P'.
639*                 (Workspace: need 4*N-1, prefer 3*N+(N-1)*NB)
640*
641                  CALL DORGBR( 'P', N, N, N, A, LDA, WORK( ITAUP ),
642     $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
643                  NCVT = N
644               END IF
645               IWORK = IE + N
646*
647*              Perform bidiagonal QR iteration, computing right
648*              singular vectors of A in A if desired
649*              (Workspace: need BDSPAC)
650*
651               CALL DBDSQR( 'U', N, NCVT, 0, 0, S, WORK( IE ), A, LDA,
652     $                      DUM, 1, DUM, 1, WORK( IWORK ), INFO )
653*
654*              If right singular vectors desired in VT, copy them there
655*
656               IF( WNTVAS )
657     $            CALL DLACPY( 'F', N, N, A, LDA, VT, LDVT )
658*
659            ELSE IF( WNTUO .AND. WNTVN ) THEN
660*
661*              Path 2 (M much larger than N, JOBU='O', JOBVT='N')
662*              N left singular vectors to be overwritten on A and
663*              no right singular vectors to be computed
664*
665               IF( LWORK.GE.N*N+MAX( 4*N, BDSPAC ) ) THEN
666*
667*                 Sufficient workspace for a fast algorithm
668*
669                  IR = 1
670                  IF( LWORK.GE.MAX( WRKBL, LDA*N+N )+LDA*N ) THEN
671*
672*                    WORK(IU) is LDA by N, WORK(IR) is LDA by N
673*
674                     LDWRKU = LDA
675                     LDWRKR = LDA
676                  ELSE IF( LWORK.GE.MAX( WRKBL, LDA*N+N )+N*N ) THEN
677*
678*                    WORK(IU) is LDA by N, WORK(IR) is N by N
679*
680                     LDWRKU = LDA
681                     LDWRKR = N
682                  ELSE
683*
684*                    WORK(IU) is LDWRKU by N, WORK(IR) is N by N
685*
686                     LDWRKU = ( LWORK-N*N-N ) / N
687                     LDWRKR = N
688                  END IF
689                  ITAU = IR + LDWRKR*N
690                  IWORK = ITAU + N
691*
692*                 Compute A=Q*R
693*                 (Workspace: need N*N+2*N, prefer N*N+N+N*NB)
694*
695                  CALL DGEQRF( M, N, A, LDA, WORK( ITAU ),
696     $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
697*
698*                 Copy R to WORK(IR) and zero out below it
699*
700                  CALL DLACPY( 'U', N, N, A, LDA, WORK( IR ), LDWRKR )
701                  CALL DLASET( 'L', N-1, N-1, ZERO, ZERO, WORK( IR+1 ),
702     $                         LDWRKR )
703*
704*                 Generate Q in A
705*                 (Workspace: need N*N+2*N, prefer N*N+N+N*NB)
706*
707                  CALL DORGQR( M, N, N, A, LDA, WORK( ITAU ),
708     $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
709                  IE = ITAU
710                  ITAUQ = IE + N
711                  ITAUP = ITAUQ + N
712                  IWORK = ITAUP + N
713*
714*                 Bidiagonalize R in WORK(IR)
715*                 (Workspace: need N*N+4*N, prefer N*N+3*N+2*N*NB)
716*
717                  CALL DGEBRD( N, N, WORK( IR ), LDWRKR, S, WORK( IE ),
718     $                         WORK( ITAUQ ), WORK( ITAUP ),
719     $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
720*
721*                 Generate left vectors bidiagonalizing R
722*                 (Workspace: need N*N+4*N, prefer N*N+3*N+N*NB)
723*
724                  CALL DORGBR( 'Q', N, N, N, WORK( IR ), LDWRKR,
725     $                         WORK( ITAUQ ), WORK( IWORK ),
726     $                         LWORK-IWORK+1, IERR )
727                  IWORK = IE + N
728*
729*                 Perform bidiagonal QR iteration, computing left
730*                 singular vectors of R in WORK(IR)
731*                 (Workspace: need N*N+BDSPAC)
732*
733                  CALL DBDSQR( 'U', N, 0, N, 0, S, WORK( IE ), DUM, 1,
734     $                         WORK( IR ), LDWRKR, DUM, 1,
735     $                         WORK( IWORK ), INFO )
736                  IU = IE + N
737*
738*                 Multiply Q in A by left singular vectors of R in
739*                 WORK(IR), storing result in WORK(IU) and copying to A
740*                 (Workspace: need N*N+2*N, prefer N*N+M*N+N)
741*
742                  DO 10 I = 1, M, LDWRKU
743                     CHUNK = MIN( M-I+1, LDWRKU )
744                     CALL DGEMM( 'N', 'N', CHUNK, N, N, ONE, A( I, 1 ),
745     $                           LDA, WORK( IR ), LDWRKR, ZERO,
746     $                           WORK( IU ), LDWRKU )
747                     CALL DLACPY( 'F', CHUNK, N, WORK( IU ), LDWRKU,
748     $                            A( I, 1 ), LDA )
749   10             CONTINUE
750*
751               ELSE
752*
753*                 Insufficient workspace for a fast algorithm
754*
755                  IE = 1
756                  ITAUQ = IE + N
757                  ITAUP = ITAUQ + N
758                  IWORK = ITAUP + N
759*
760*                 Bidiagonalize A
761*                 (Workspace: need 3*N+M, prefer 3*N+(M+N)*NB)
762*
763                  CALL DGEBRD( M, N, A, LDA, S, WORK( IE ),
764     $                         WORK( ITAUQ ), WORK( ITAUP ),
765     $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
766*
767*                 Generate left vectors bidiagonalizing A
768*                 (Workspace: need 4*N, prefer 3*N+N*NB)
769*
770                  CALL DORGBR( 'Q', M, N, N, A, LDA, WORK( ITAUQ ),
771     $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
772                  IWORK = IE + N
773*
774*                 Perform bidiagonal QR iteration, computing left
775*                 singular vectors of A in A
776*                 (Workspace: need BDSPAC)
777*
778                  CALL DBDSQR( 'U', N, 0, M, 0, S, WORK( IE ), DUM, 1,
779     $                         A, LDA, DUM, 1, WORK( IWORK ), INFO )
780*
781               END IF
782*
783            ELSE IF( WNTUO .AND. WNTVAS ) THEN
784*
785*              Path 3 (M much larger than N, JOBU='O', JOBVT='S' or 'A')
786*              N left singular vectors to be overwritten on A and
787*              N right singular vectors to be computed in VT
788*
789               IF( LWORK.GE.N*N+MAX( 4*N, BDSPAC ) ) THEN
790*
791*                 Sufficient workspace for a fast algorithm
792*
793                  IR = 1
794                  IF( LWORK.GE.MAX( WRKBL, LDA*N+N )+LDA*N ) THEN
795*
796*                    WORK(IU) is LDA by N and WORK(IR) is LDA by N
797*
798                     LDWRKU = LDA
799                     LDWRKR = LDA
800                  ELSE IF( LWORK.GE.MAX( WRKBL, LDA*N+N )+N*N ) THEN
801*
802*                    WORK(IU) is LDA by N and WORK(IR) is N by N
803*
804                     LDWRKU = LDA
805                     LDWRKR = N
806                  ELSE
807*
808*                    WORK(IU) is LDWRKU by N and WORK(IR) is N by N
809*
810                     LDWRKU = ( LWORK-N*N-N ) / N
811                     LDWRKR = N
812                  END IF
813                  ITAU = IR + LDWRKR*N
814                  IWORK = ITAU + N
815*
816*                 Compute A=Q*R
817*                 (Workspace: need N*N+2*N, prefer N*N+N+N*NB)
818*
819                  CALL DGEQRF( M, N, A, LDA, WORK( ITAU ),
820     $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
821*
822*                 Copy R to VT, zeroing out below it
823*
824                  CALL DLACPY( 'U', N, N, A, LDA, VT, LDVT )
825                  CALL DLASET( 'L', N-1, N-1, ZERO, ZERO, VT( 2, 1 ),
826     $                         LDVT )
827*
828*                 Generate Q in A
829*                 (Workspace: need N*N+2*N, prefer N*N+N+N*NB)
830*
831                  CALL DORGQR( M, N, N, A, LDA, WORK( ITAU ),
832     $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
833                  IE = ITAU
834                  ITAUQ = IE + N
835                  ITAUP = ITAUQ + N
836                  IWORK = ITAUP + N
837*
838*                 Bidiagonalize R in VT, copying result to WORK(IR)
839*                 (Workspace: need N*N+4*N, prefer N*N+3*N+2*N*NB)
840*
841                  CALL DGEBRD( N, N, VT, LDVT, S, WORK( IE ),
842     $                         WORK( ITAUQ ), WORK( ITAUP ),
843     $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
844                  CALL DLACPY( 'L', N, N, VT, LDVT, WORK( IR ), LDWRKR )
845*
846*                 Generate left vectors bidiagonalizing R in WORK(IR)
847*                 (Workspace: need N*N+4*N, prefer N*N+3*N+N*NB)
848*
849                  CALL DORGBR( 'Q', N, N, N, WORK( IR ), LDWRKR,
850     $                         WORK( ITAUQ ), WORK( IWORK ),
851     $                         LWORK-IWORK+1, IERR )
852*
853*                 Generate right vectors bidiagonalizing R in VT
854*                 (Workspace: need N*N+4*N-1, prefer N*N+3*N+(N-1)*NB)
855*
856                  CALL DORGBR( 'P', N, N, N, VT, LDVT, WORK( ITAUP ),
857     $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
858                  IWORK = IE + N
859*
860*                 Perform bidiagonal QR iteration, computing left
861*                 singular vectors of R in WORK(IR) and computing right
862*                 singular vectors of R in VT
863*                 (Workspace: need N*N+BDSPAC)
864*
865                  CALL DBDSQR( 'U', N, N, N, 0, S, WORK( IE ), VT, LDVT,
866     $                         WORK( IR ), LDWRKR, DUM, 1,
867     $                         WORK( IWORK ), INFO )
868                  IU = IE + N
869*
870*                 Multiply Q in A by left singular vectors of R in
871*                 WORK(IR), storing result in WORK(IU) and copying to A
872*                 (Workspace: need N*N+2*N, prefer N*N+M*N+N)
873*
874                  DO 20 I = 1, M, LDWRKU
875                     CHUNK = MIN( M-I+1, LDWRKU )
876                     CALL DGEMM( 'N', 'N', CHUNK, N, N, ONE, A( I, 1 ),
877     $                           LDA, WORK( IR ), LDWRKR, ZERO,
878     $                           WORK( IU ), LDWRKU )
879                     CALL DLACPY( 'F', CHUNK, N, WORK( IU ), LDWRKU,
880     $                            A( I, 1 ), LDA )
881   20             CONTINUE
882*
883               ELSE
884*
885*                 Insufficient workspace for a fast algorithm
886*
887                  ITAU = 1
888                  IWORK = ITAU + N
889*
890*                 Compute A=Q*R
891*                 (Workspace: need 2*N, prefer N+N*NB)
892*
893                  CALL DGEQRF( M, N, A, LDA, WORK( ITAU ),
894     $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
895*
896*                 Copy R to VT, zeroing out below it
897*
898                  CALL DLACPY( 'U', N, N, A, LDA, VT, LDVT )
899                  CALL DLASET( 'L', N-1, N-1, ZERO, ZERO, VT( 2, 1 ),
900     $                         LDVT )
901*
902*                 Generate Q in A
903*                 (Workspace: need 2*N, prefer N+N*NB)
904*
905                  CALL DORGQR( M, N, N, A, LDA, WORK( ITAU ),
906     $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
907                  IE = ITAU
908                  ITAUQ = IE + N
909                  ITAUP = ITAUQ + N
910                  IWORK = ITAUP + N
911*
912*                 Bidiagonalize R in VT
913*                 (Workspace: need 4*N, prefer 3*N+2*N*NB)
914*
915                  CALL DGEBRD( N, N, VT, LDVT, S, WORK( IE ),
916     $                         WORK( ITAUQ ), WORK( ITAUP ),
917     $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
918*
919*                 Multiply Q in A by left vectors bidiagonalizing R
920*                 (Workspace: need 3*N+M, prefer 3*N+M*NB)
921*
922                  CALL DORMBR( 'Q', 'R', 'N', M, N, N, VT, LDVT,
923     $                         WORK( ITAUQ ), A, LDA, WORK( IWORK ),
924     $                         LWORK-IWORK+1, IERR )
925*
926*                 Generate right vectors bidiagonalizing R in VT
927*                 (Workspace: need 4*N-1, prefer 3*N+(N-1)*NB)
928*
929                  CALL DORGBR( 'P', N, N, N, VT, LDVT, WORK( ITAUP ),
930     $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
931                  IWORK = IE + N
932*
933*                 Perform bidiagonal QR iteration, computing left
934*                 singular vectors of A in A and computing right
935*                 singular vectors of A in VT
936*                 (Workspace: need BDSPAC)
937*
938                  CALL DBDSQR( 'U', N, N, M, 0, S, WORK( IE ), VT, LDVT,
939     $                         A, LDA, DUM, 1, WORK( IWORK ), INFO )
940*
941               END IF
942*
943            ELSE IF( WNTUS ) THEN
944*
945               IF( WNTVN ) THEN
946*
947*                 Path 4 (M much larger than N, JOBU='S', JOBVT='N')
948*                 N left singular vectors to be computed in U and
949*                 no right singular vectors to be computed
950*
951                  IF( LWORK.GE.N*N+MAX( 4*N, BDSPAC ) ) THEN
952*
953*                    Sufficient workspace for a fast algorithm
954*
955                     IR = 1
956                     IF( LWORK.GE.WRKBL+LDA*N ) THEN
957*
958*                       WORK(IR) is LDA by N
959*
960                        LDWRKR = LDA
961                     ELSE
962*
963*                       WORK(IR) is N by N
964*
965                        LDWRKR = N
966                     END IF
967                     ITAU = IR + LDWRKR*N
968                     IWORK = ITAU + N
969*
970*                    Compute A=Q*R
971*                    (Workspace: need N*N+2*N, prefer N*N+N+N*NB)
972*
973                     CALL DGEQRF( M, N, A, LDA, WORK( ITAU ),
974     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
975*
976*                    Copy R to WORK(IR), zeroing out below it
977*
978                     CALL DLACPY( 'U', N, N, A, LDA, WORK( IR ),
979     $                            LDWRKR )
980                     CALL DLASET( 'L', N-1, N-1, ZERO, ZERO,
981     $                            WORK( IR+1 ), LDWRKR )
982*
983*                    Generate Q in A
984*                    (Workspace: need N*N+2*N, prefer N*N+N+N*NB)
985*
986                     CALL DORGQR( M, N, N, A, LDA, WORK( ITAU ),
987     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
988                     IE = ITAU
989                     ITAUQ = IE + N
990                     ITAUP = ITAUQ + N
991                     IWORK = ITAUP + N
992*
993*                    Bidiagonalize R in WORK(IR)
994*                    (Workspace: need N*N+4*N, prefer N*N+3*N+2*N*NB)
995*
996                     CALL DGEBRD( N, N, WORK( IR ), LDWRKR, S,
997     $                            WORK( IE ), WORK( ITAUQ ),
998     $                            WORK( ITAUP ), WORK( IWORK ),
999     $                            LWORK-IWORK+1, IERR )
1000*
1001*                    Generate left vectors bidiagonalizing R in WORK(IR)
1002*                    (Workspace: need N*N+4*N, prefer N*N+3*N+N*NB)
1003*
1004                     CALL DORGBR( 'Q', N, N, N, WORK( IR ), LDWRKR,
1005     $                            WORK( ITAUQ ), WORK( IWORK ),
1006     $                            LWORK-IWORK+1, IERR )
1007                     IWORK = IE + N
1008*
1009*                    Perform bidiagonal QR iteration, computing left
1010*                    singular vectors of R in WORK(IR)
1011*                    (Workspace: need N*N+BDSPAC)
1012*
1013                     CALL DBDSQR( 'U', N, 0, N, 0, S, WORK( IE ), DUM,
1014     $                            1, WORK( IR ), LDWRKR, DUM, 1,
1015     $                            WORK( IWORK ), INFO )
1016*
1017*                    Multiply Q in A by left singular vectors of R in
1018*                    WORK(IR), storing result in U
1019*                    (Workspace: need N*N)
1020*
1021                     CALL DGEMM( 'N', 'N', M, N, N, ONE, A, LDA,
1022     $                           WORK( IR ), LDWRKR, ZERO, U, LDU )
1023*
1024                  ELSE
1025*
1026*                    Insufficient workspace for a fast algorithm
1027*
1028                     ITAU = 1
1029                     IWORK = ITAU + N
1030*
1031*                    Compute A=Q*R, copying result to U
1032*                    (Workspace: need 2*N, prefer N+N*NB)
1033*
1034                     CALL DGEQRF( M, N, A, LDA, WORK( ITAU ),
1035     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
1036                     CALL DLACPY( 'L', M, N, A, LDA, U, LDU )
1037*
1038*                    Generate Q in U
1039*                    (Workspace: need 2*N, prefer N+N*NB)
1040*
1041                     CALL DORGQR( M, N, N, U, LDU, WORK( ITAU ),
1042     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
1043                     IE = ITAU
1044                     ITAUQ = IE + N
1045                     ITAUP = ITAUQ + N
1046                     IWORK = ITAUP + N
1047*
1048*                    Zero out below R in A
1049*
1050                     CALL DLASET( 'L', N-1, N-1, ZERO, ZERO, A( 2, 1 ),
1051     $                            LDA )
1052*
1053*                    Bidiagonalize R in A
1054*                    (Workspace: need 4*N, prefer 3*N+2*N*NB)
1055*
1056                     CALL DGEBRD( N, N, A, LDA, S, WORK( IE ),
1057     $                            WORK( ITAUQ ), WORK( ITAUP ),
1058     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
1059*
1060*                    Multiply Q in U by left vectors bidiagonalizing R
1061*                    (Workspace: need 3*N+M, prefer 3*N+M*NB)
1062*
1063                     CALL DORMBR( 'Q', 'R', 'N', M, N, N, A, LDA,
1064     $                            WORK( ITAUQ ), U, LDU, WORK( IWORK ),
1065     $                            LWORK-IWORK+1, IERR )
1066                     IWORK = IE + N
1067*
1068*                    Perform bidiagonal QR iteration, computing left
1069*                    singular vectors of A in U
1070*                    (Workspace: need BDSPAC)
1071*
1072                     CALL DBDSQR( 'U', N, 0, M, 0, S, WORK( IE ), DUM,
1073     $                            1, U, LDU, DUM, 1, WORK( IWORK ),
1074     $                            INFO )
1075*
1076                  END IF
1077*
1078               ELSE IF( WNTVO ) THEN
1079*
1080*                 Path 5 (M much larger than N, JOBU='S', JOBVT='O')
1081*                 N left singular vectors to be computed in U and
1082*                 N right singular vectors to be overwritten on A
1083*
1084                  IF( LWORK.GE.2*N*N+MAX( 4*N, BDSPAC ) ) THEN
1085*
1086*                    Sufficient workspace for a fast algorithm
1087*
1088                     IU = 1
1089                     IF( LWORK.GE.WRKBL+2*LDA*N ) THEN
1090*
1091*                       WORK(IU) is LDA by N and WORK(IR) is LDA by N
1092*
1093                        LDWRKU = LDA
1094                        IR = IU + LDWRKU*N
1095                        LDWRKR = LDA
1096                     ELSE IF( LWORK.GE.WRKBL+( LDA+N )*N ) THEN
1097*
1098*                       WORK(IU) is LDA by N and WORK(IR) is N by N
1099*
1100                        LDWRKU = LDA
1101                        IR = IU + LDWRKU*N
1102                        LDWRKR = N
1103                     ELSE
1104*
1105*                       WORK(IU) is N by N and WORK(IR) is N by N
1106*
1107                        LDWRKU = N
1108                        IR = IU + LDWRKU*N
1109                        LDWRKR = N
1110                     END IF
1111                     ITAU = IR + LDWRKR*N
1112                     IWORK = ITAU + N
1113*
1114*                    Compute A=Q*R
1115*                    (Workspace: need 2*N*N+2*N, prefer 2*N*N+N+N*NB)
1116*
1117                     CALL DGEQRF( M, N, A, LDA, WORK( ITAU ),
1118     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
1119*
1120*                    Copy R to WORK(IU), zeroing out below it
1121*
1122                     CALL DLACPY( 'U', N, N, A, LDA, WORK( IU ),
1123     $                            LDWRKU )
1124                     CALL DLASET( 'L', N-1, N-1, ZERO, ZERO,
1125     $                            WORK( IU+1 ), LDWRKU )
1126*
1127*                    Generate Q in A
1128*                    (Workspace: need 2*N*N+2*N, prefer 2*N*N+N+N*NB)
1129*
1130                     CALL DORGQR( M, N, N, A, LDA, WORK( ITAU ),
1131     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
1132                     IE = ITAU
1133                     ITAUQ = IE + N
1134                     ITAUP = ITAUQ + N
1135                     IWORK = ITAUP + N
1136*
1137*                    Bidiagonalize R in WORK(IU), copying result to
1138*                    WORK(IR)
1139*                    (Workspace: need 2*N*N+4*N,
1140*                                prefer 2*N*N+3*N+2*N*NB)
1141*
1142                     CALL DGEBRD( N, N, WORK( IU ), LDWRKU, S,
1143     $                            WORK( IE ), WORK( ITAUQ ),
1144     $                            WORK( ITAUP ), WORK( IWORK ),
1145     $                            LWORK-IWORK+1, IERR )
1146                     CALL DLACPY( 'U', N, N, WORK( IU ), LDWRKU,
1147     $                            WORK( IR ), LDWRKR )
1148*
1149*                    Generate left bidiagonalizing vectors in WORK(IU)
1150*                    (Workspace: need 2*N*N+4*N, prefer 2*N*N+3*N+N*NB)
1151*
1152                     CALL DORGBR( 'Q', N, N, N, WORK( IU ), LDWRKU,
1153     $                            WORK( ITAUQ ), WORK( IWORK ),
1154     $                            LWORK-IWORK+1, IERR )
1155*
1156*                    Generate right bidiagonalizing vectors in WORK(IR)
1157*                    (Workspace: need 2*N*N+4*N-1,
1158*                                prefer 2*N*N+3*N+(N-1)*NB)
1159*
1160                     CALL DORGBR( 'P', N, N, N, WORK( IR ), LDWRKR,
1161     $                            WORK( ITAUP ), WORK( IWORK ),
1162     $                            LWORK-IWORK+1, IERR )
1163                     IWORK = IE + N
1164*
1165*                    Perform bidiagonal QR iteration, computing left
1166*                    singular vectors of R in WORK(IU) and computing
1167*                    right singular vectors of R in WORK(IR)
1168*                    (Workspace: need 2*N*N+BDSPAC)
1169*
1170                     CALL DBDSQR( 'U', N, N, N, 0, S, WORK( IE ),
1171     $                            WORK( IR ), LDWRKR, WORK( IU ),
1172     $                            LDWRKU, DUM, 1, WORK( IWORK ), INFO )
1173*
1174*                    Multiply Q in A by left singular vectors of R in
1175*                    WORK(IU), storing result in U
1176*                    (Workspace: need N*N)
1177*
1178                     CALL DGEMM( 'N', 'N', M, N, N, ONE, A, LDA,
1179     $                           WORK( IU ), LDWRKU, ZERO, U, LDU )
1180*
1181*                    Copy right singular vectors of R to A
1182*                    (Workspace: need N*N)
1183*
1184                     CALL DLACPY( 'F', N, N, WORK( IR ), LDWRKR, A,
1185     $                            LDA )
1186*
1187                  ELSE
1188*
1189*                    Insufficient workspace for a fast algorithm
1190*
1191                     ITAU = 1
1192                     IWORK = ITAU + N
1193*
1194*                    Compute A=Q*R, copying result to U
1195*                    (Workspace: need 2*N, prefer N+N*NB)
1196*
1197                     CALL DGEQRF( M, N, A, LDA, WORK( ITAU ),
1198     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
1199                     CALL DLACPY( 'L', M, N, A, LDA, U, LDU )
1200*
1201*                    Generate Q in U
1202*                    (Workspace: need 2*N, prefer N+N*NB)
1203*
1204                     CALL DORGQR( M, N, N, U, LDU, WORK( ITAU ),
1205     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
1206                     IE = ITAU
1207                     ITAUQ = IE + N
1208                     ITAUP = ITAUQ + N
1209                     IWORK = ITAUP + N
1210*
1211*                    Zero out below R in A
1212*
1213                     CALL DLASET( 'L', N-1, N-1, ZERO, ZERO, A( 2, 1 ),
1214     $                            LDA )
1215*
1216*                    Bidiagonalize R in A
1217*                    (Workspace: need 4*N, prefer 3*N+2*N*NB)
1218*
1219                     CALL DGEBRD( N, N, A, LDA, S, WORK( IE ),
1220     $                            WORK( ITAUQ ), WORK( ITAUP ),
1221     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
1222*
1223*                    Multiply Q in U by left vectors bidiagonalizing R
1224*                    (Workspace: need 3*N+M, prefer 3*N+M*NB)
1225*
1226                     CALL DORMBR( 'Q', 'R', 'N', M, N, N, A, LDA,
1227     $                            WORK( ITAUQ ), U, LDU, WORK( IWORK ),
1228     $                            LWORK-IWORK+1, IERR )
1229*
1230*                    Generate right vectors bidiagonalizing R in A
1231*                    (Workspace: need 4*N-1, prefer 3*N+(N-1)*NB)
1232*
1233                     CALL DORGBR( 'P', N, N, N, A, LDA, WORK( ITAUP ),
1234     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
1235                     IWORK = IE + N
1236*
1237*                    Perform bidiagonal QR iteration, computing left
1238*                    singular vectors of A in U and computing right
1239*                    singular vectors of A in A
1240*                    (Workspace: need BDSPAC)
1241*
1242                     CALL DBDSQR( 'U', N, N, M, 0, S, WORK( IE ), A,
1243     $                            LDA, U, LDU, DUM, 1, WORK( IWORK ),
1244     $                            INFO )
1245*
1246                  END IF
1247*
1248               ELSE IF( WNTVAS ) THEN
1249*
1250*                 Path 6 (M much larger than N, JOBU='S', JOBVT='S'
1251*                         or 'A')
1252*                 N left singular vectors to be computed in U and
1253*                 N right singular vectors to be computed in VT
1254*
1255                  IF( LWORK.GE.N*N+MAX( 4*N, BDSPAC ) ) THEN
1256*
1257*                    Sufficient workspace for a fast algorithm
1258*
1259                     IU = 1
1260                     IF( LWORK.GE.WRKBL+LDA*N ) THEN
1261*
1262*                       WORK(IU) is LDA by N
1263*
1264                        LDWRKU = LDA
1265                     ELSE
1266*
1267*                       WORK(IU) is N by N
1268*
1269                        LDWRKU = N
1270                     END IF
1271                     ITAU = IU + LDWRKU*N
1272                     IWORK = ITAU + N
1273*
1274*                    Compute A=Q*R
1275*                    (Workspace: need N*N+2*N, prefer N*N+N+N*NB)
1276*
1277                     CALL DGEQRF( M, N, A, LDA, WORK( ITAU ),
1278     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
1279*
1280*                    Copy R to WORK(IU), zeroing out below it
1281*
1282                     CALL DLACPY( 'U', N, N, A, LDA, WORK( IU ),
1283     $                            LDWRKU )
1284                     CALL DLASET( 'L', N-1, N-1, ZERO, ZERO,
1285     $                            WORK( IU+1 ), LDWRKU )
1286*
1287*                    Generate Q in A
1288*                    (Workspace: need N*N+2*N, prefer N*N+N+N*NB)
1289*
1290                     CALL DORGQR( M, N, N, A, LDA, WORK( ITAU ),
1291     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
1292                     IE = ITAU
1293                     ITAUQ = IE + N
1294                     ITAUP = ITAUQ + N
1295                     IWORK = ITAUP + N
1296*
1297*                    Bidiagonalize R in WORK(IU), copying result to VT
1298*                    (Workspace: need N*N+4*N, prefer N*N+3*N+2*N*NB)
1299*
1300                     CALL DGEBRD( N, N, WORK( IU ), LDWRKU, S,
1301     $                            WORK( IE ), WORK( ITAUQ ),
1302     $                            WORK( ITAUP ), WORK( IWORK ),
1303     $                            LWORK-IWORK+1, IERR )
1304                     CALL DLACPY( 'U', N, N, WORK( IU ), LDWRKU, VT,
1305     $                            LDVT )
1306*
1307*                    Generate left bidiagonalizing vectors in WORK(IU)
1308*                    (Workspace: need N*N+4*N, prefer N*N+3*N+N*NB)
1309*
1310                     CALL DORGBR( 'Q', N, N, N, WORK( IU ), LDWRKU,
1311     $                            WORK( ITAUQ ), WORK( IWORK ),
1312     $                            LWORK-IWORK+1, IERR )
1313*
1314*                    Generate right bidiagonalizing vectors in VT
1315*                    (Workspace: need N*N+4*N-1,
1316*                                prefer N*N+3*N+(N-1)*NB)
1317*
1318                     CALL DORGBR( 'P', N, N, N, VT, LDVT, WORK( ITAUP ),
1319     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
1320                     IWORK = IE + N
1321*
1322*                    Perform bidiagonal QR iteration, computing left
1323*                    singular vectors of R in WORK(IU) and computing
1324*                    right singular vectors of R in VT
1325*                    (Workspace: need N*N+BDSPAC)
1326*
1327                     CALL DBDSQR( 'U', N, N, N, 0, S, WORK( IE ), VT,
1328     $                            LDVT, WORK( IU ), LDWRKU, DUM, 1,
1329     $                            WORK( IWORK ), INFO )
1330*
1331*                    Multiply Q in A by left singular vectors of R in
1332*                    WORK(IU), storing result in U
1333*                    (Workspace: need N*N)
1334*
1335                     CALL DGEMM( 'N', 'N', M, N, N, ONE, A, LDA,
1336     $                           WORK( IU ), LDWRKU, ZERO, U, LDU )
1337*
1338                  ELSE
1339*
1340*                    Insufficient workspace for a fast algorithm
1341*
1342                     ITAU = 1
1343                     IWORK = ITAU + N
1344*
1345*                    Compute A=Q*R, copying result to U
1346*                    (Workspace: need 2*N, prefer N+N*NB)
1347*
1348                     CALL DGEQRF( M, N, A, LDA, WORK( ITAU ),
1349     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
1350                     CALL DLACPY( 'L', M, N, A, LDA, U, LDU )
1351*
1352*                    Generate Q in U
1353*                    (Workspace: need 2*N, prefer N+N*NB)
1354*
1355                     CALL DORGQR( M, N, N, U, LDU, WORK( ITAU ),
1356     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
1357*
1358*                    Copy R to VT, zeroing out below it
1359*
1360                     CALL DLACPY( 'U', N, N, A, LDA, VT, LDVT )
1361                     CALL DLASET( 'L', N-1, N-1, ZERO, ZERO, VT( 2, 1 ),
1362     $                            LDVT )
1363                     IE = ITAU
1364                     ITAUQ = IE + N
1365                     ITAUP = ITAUQ + N
1366                     IWORK = ITAUP + N
1367*
1368*                    Bidiagonalize R in VT
1369*                    (Workspace: need 4*N, prefer 3*N+2*N*NB)
1370*
1371                     CALL DGEBRD( N, N, VT, LDVT, S, WORK( IE ),
1372     $                            WORK( ITAUQ ), WORK( ITAUP ),
1373     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
1374*
1375*                    Multiply Q in U by left bidiagonalizing vectors
1376*                    in VT
1377*                    (Workspace: need 3*N+M, prefer 3*N+M*NB)
1378*
1379                     CALL DORMBR( 'Q', 'R', 'N', M, N, N, VT, LDVT,
1380     $                            WORK( ITAUQ ), U, LDU, WORK( IWORK ),
1381     $                            LWORK-IWORK+1, IERR )
1382*
1383*                    Generate right bidiagonalizing vectors in VT
1384*                    (Workspace: need 4*N-1, prefer 3*N+(N-1)*NB)
1385*
1386                     CALL DORGBR( 'P', N, N, N, VT, LDVT, WORK( ITAUP ),
1387     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
1388                     IWORK = IE + N
1389*
1390*                    Perform bidiagonal QR iteration, computing left
1391*                    singular vectors of A in U and computing right
1392*                    singular vectors of A in VT
1393*                    (Workspace: need BDSPAC)
1394*
1395                     CALL DBDSQR( 'U', N, N, M, 0, S, WORK( IE ), VT,
1396     $                            LDVT, U, LDU, DUM, 1, WORK( IWORK ),
1397     $                            INFO )
1398*
1399                  END IF
1400*
1401               END IF
1402*
1403            ELSE IF( WNTUA ) THEN
1404*
1405               IF( WNTVN ) THEN
1406*
1407*                 Path 7 (M much larger than N, JOBU='A', JOBVT='N')
1408*                 M left singular vectors to be computed in U and
1409*                 no right singular vectors to be computed
1410*
1411                  IF( LWORK.GE.N*N+MAX( N+M, 4*N, BDSPAC ) ) THEN
1412*
1413*                    Sufficient workspace for a fast algorithm
1414*
1415                     IR = 1
1416                     IF( LWORK.GE.WRKBL+LDA*N ) THEN
1417*
1418*                       WORK(IR) is LDA by N
1419*
1420                        LDWRKR = LDA
1421                     ELSE
1422*
1423*                       WORK(IR) is N by N
1424*
1425                        LDWRKR = N
1426                     END IF
1427                     ITAU = IR + LDWRKR*N
1428                     IWORK = ITAU + N
1429*
1430*                    Compute A=Q*R, copying result to U
1431*                    (Workspace: need N*N+2*N, prefer N*N+N+N*NB)
1432*
1433                     CALL DGEQRF( M, N, A, LDA, WORK( ITAU ),
1434     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
1435                     CALL DLACPY( 'L', M, N, A, LDA, U, LDU )
1436*
1437*                    Copy R to WORK(IR), zeroing out below it
1438*
1439                     CALL DLACPY( 'U', N, N, A, LDA, WORK( IR ),
1440     $                            LDWRKR )
1441                     CALL DLASET( 'L', N-1, N-1, ZERO, ZERO,
1442     $                            WORK( IR+1 ), LDWRKR )
1443*
1444*                    Generate Q in U
1445*                    (Workspace: need N*N+N+M, prefer N*N+N+M*NB)
1446*
1447                     CALL DORGQR( M, M, N, U, LDU, WORK( ITAU ),
1448     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
1449                     IE = ITAU
1450                     ITAUQ = IE + N
1451                     ITAUP = ITAUQ + N
1452                     IWORK = ITAUP + N
1453*
1454*                    Bidiagonalize R in WORK(IR)
1455*                    (Workspace: need N*N+4*N, prefer N*N+3*N+2*N*NB)
1456*
1457                     CALL DGEBRD( N, N, WORK( IR ), LDWRKR, S,
1458     $                            WORK( IE ), WORK( ITAUQ ),
1459     $                            WORK( ITAUP ), WORK( IWORK ),
1460     $                            LWORK-IWORK+1, IERR )
1461*
1462*                    Generate left bidiagonalizing vectors in WORK(IR)
1463*                    (Workspace: need N*N+4*N, prefer N*N+3*N+N*NB)
1464*
1465                     CALL DORGBR( 'Q', N, N, N, WORK( IR ), LDWRKR,
1466     $                            WORK( ITAUQ ), WORK( IWORK ),
1467     $                            LWORK-IWORK+1, IERR )
1468                     IWORK = IE + N
1469*
1470*                    Perform bidiagonal QR iteration, computing left
1471*                    singular vectors of R in WORK(IR)
1472*                    (Workspace: need N*N+BDSPAC)
1473*
1474                     CALL DBDSQR( 'U', N, 0, N, 0, S, WORK( IE ), DUM,
1475     $                            1, WORK( IR ), LDWRKR, DUM, 1,
1476     $                            WORK( IWORK ), INFO )
1477*
1478*                    Multiply Q in U by left singular vectors of R in
1479*                    WORK(IR), storing result in A
1480*                    (Workspace: need N*N)
1481*
1482                     CALL DGEMM( 'N', 'N', M, N, N, ONE, U, LDU,
1483     $                           WORK( IR ), LDWRKR, ZERO, A, LDA )
1484*
1485*                    Copy left singular vectors of A from A to U
1486*
1487                     CALL DLACPY( 'F', M, N, A, LDA, U, LDU )
1488*
1489                  ELSE
1490*
1491*                    Insufficient workspace for a fast algorithm
1492*
1493                     ITAU = 1
1494                     IWORK = ITAU + N
1495*
1496*                    Compute A=Q*R, copying result to U
1497*                    (Workspace: need 2*N, prefer N+N*NB)
1498*
1499                     CALL DGEQRF( M, N, A, LDA, WORK( ITAU ),
1500     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
1501                     CALL DLACPY( 'L', M, N, A, LDA, U, LDU )
1502*
1503*                    Generate Q in U
1504*                    (Workspace: need N+M, prefer N+M*NB)
1505*
1506                     CALL DORGQR( M, M, N, U, LDU, WORK( ITAU ),
1507     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
1508                     IE = ITAU
1509                     ITAUQ = IE + N
1510                     ITAUP = ITAUQ + N
1511                     IWORK = ITAUP + N
1512*
1513*                    Zero out below R in A
1514*
1515                     CALL DLASET( 'L', N-1, N-1, ZERO, ZERO, A( 2, 1 ),
1516     $                            LDA )
1517*
1518*                    Bidiagonalize R in A
1519*                    (Workspace: need 4*N, prefer 3*N+2*N*NB)
1520*
1521                     CALL DGEBRD( N, N, A, LDA, S, WORK( IE ),
1522     $                            WORK( ITAUQ ), WORK( ITAUP ),
1523     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
1524*
1525*                    Multiply Q in U by left bidiagonalizing vectors
1526*                    in A
1527*                    (Workspace: need 3*N+M, prefer 3*N+M*NB)
1528*
1529                     CALL DORMBR( 'Q', 'R', 'N', M, N, N, A, LDA,
1530     $                            WORK( ITAUQ ), U, LDU, WORK( IWORK ),
1531     $                            LWORK-IWORK+1, IERR )
1532                     IWORK = IE + N
1533*
1534*                    Perform bidiagonal QR iteration, computing left
1535*                    singular vectors of A in U
1536*                    (Workspace: need BDSPAC)
1537*
1538                     CALL DBDSQR( 'U', N, 0, M, 0, S, WORK( IE ), DUM,
1539     $                            1, U, LDU, DUM, 1, WORK( IWORK ),
1540     $                            INFO )
1541*
1542                  END IF
1543*
1544               ELSE IF( WNTVO ) THEN
1545*
1546*                 Path 8 (M much larger than N, JOBU='A', JOBVT='O')
1547*                 M left singular vectors to be computed in U and
1548*                 N right singular vectors to be overwritten on A
1549*
1550                  IF( LWORK.GE.2*N*N+MAX( N+M, 4*N, BDSPAC ) ) THEN
1551*
1552*                    Sufficient workspace for a fast algorithm
1553*
1554                     IU = 1
1555                     IF( LWORK.GE.WRKBL+2*LDA*N ) THEN
1556*
1557*                       WORK(IU) is LDA by N and WORK(IR) is LDA by N
1558*
1559                        LDWRKU = LDA
1560                        IR = IU + LDWRKU*N
1561                        LDWRKR = LDA
1562                     ELSE IF( LWORK.GE.WRKBL+( LDA+N )*N ) THEN
1563*
1564*                       WORK(IU) is LDA by N and WORK(IR) is N by N
1565*
1566                        LDWRKU = LDA
1567                        IR = IU + LDWRKU*N
1568                        LDWRKR = N
1569                     ELSE
1570*
1571*                       WORK(IU) is N by N and WORK(IR) is N by N
1572*
1573                        LDWRKU = N
1574                        IR = IU + LDWRKU*N
1575                        LDWRKR = N
1576                     END IF
1577                     ITAU = IR + LDWRKR*N
1578                     IWORK = ITAU + N
1579*
1580*                    Compute A=Q*R, copying result to U
1581*                    (Workspace: need 2*N*N+2*N, prefer 2*N*N+N+N*NB)
1582*
1583                     CALL DGEQRF( M, N, A, LDA, WORK( ITAU ),
1584     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
1585                     CALL DLACPY( 'L', M, N, A, LDA, U, LDU )
1586*
1587*                    Generate Q in U
1588*                    (Workspace: need 2*N*N+N+M, prefer 2*N*N+N+M*NB)
1589*
1590                     CALL DORGQR( M, M, N, U, LDU, WORK( ITAU ),
1591     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
1592*
1593*                    Copy R to WORK(IU), zeroing out below it
1594*
1595                     CALL DLACPY( 'U', N, N, A, LDA, WORK( IU ),
1596     $                            LDWRKU )
1597                     CALL DLASET( 'L', N-1, N-1, ZERO, ZERO,
1598     $                            WORK( IU+1 ), LDWRKU )
1599                     IE = ITAU
1600                     ITAUQ = IE + N
1601                     ITAUP = ITAUQ + N
1602                     IWORK = ITAUP + N
1603*
1604*                    Bidiagonalize R in WORK(IU), copying result to
1605*                    WORK(IR)
1606*                    (Workspace: need 2*N*N+4*N,
1607*                                prefer 2*N*N+3*N+2*N*NB)
1608*
1609                     CALL DGEBRD( N, N, WORK( IU ), LDWRKU, S,
1610     $                            WORK( IE ), WORK( ITAUQ ),
1611     $                            WORK( ITAUP ), WORK( IWORK ),
1612     $                            LWORK-IWORK+1, IERR )
1613                     CALL DLACPY( 'U', N, N, WORK( IU ), LDWRKU,
1614     $                            WORK( IR ), LDWRKR )
1615*
1616*                    Generate left bidiagonalizing vectors in WORK(IU)
1617*                    (Workspace: need 2*N*N+4*N, prefer 2*N*N+3*N+N*NB)
1618*
1619                     CALL DORGBR( 'Q', N, N, N, WORK( IU ), LDWRKU,
1620     $                            WORK( ITAUQ ), WORK( IWORK ),
1621     $                            LWORK-IWORK+1, IERR )
1622*
1623*                    Generate right bidiagonalizing vectors in WORK(IR)
1624*                    (Workspace: need 2*N*N+4*N-1,
1625*                                prefer 2*N*N+3*N+(N-1)*NB)
1626*
1627                     CALL DORGBR( 'P', N, N, N, WORK( IR ), LDWRKR,
1628     $                            WORK( ITAUP ), WORK( IWORK ),
1629     $                            LWORK-IWORK+1, IERR )
1630                     IWORK = IE + N
1631*
1632*                    Perform bidiagonal QR iteration, computing left
1633*                    singular vectors of R in WORK(IU) and computing
1634*                    right singular vectors of R in WORK(IR)
1635*                    (Workspace: need 2*N*N+BDSPAC)
1636*
1637                     CALL DBDSQR( 'U', N, N, N, 0, S, WORK( IE ),
1638     $                            WORK( IR ), LDWRKR, WORK( IU ),
1639     $                            LDWRKU, DUM, 1, WORK( IWORK ), INFO )
1640*
1641*                    Multiply Q in U by left singular vectors of R in
1642*                    WORK(IU), storing result in A
1643*                    (Workspace: need N*N)
1644*
1645                     CALL DGEMM( 'N', 'N', M, N, N, ONE, U, LDU,
1646     $                           WORK( IU ), LDWRKU, ZERO, A, LDA )
1647*
1648*                    Copy left singular vectors of A from A to U
1649*
1650                     CALL DLACPY( 'F', M, N, A, LDA, U, LDU )
1651*
1652*                    Copy right singular vectors of R from WORK(IR) to A
1653*
1654                     CALL DLACPY( 'F', N, N, WORK( IR ), LDWRKR, A,
1655     $                            LDA )
1656*
1657                  ELSE
1658*
1659*                    Insufficient workspace for a fast algorithm
1660*
1661                     ITAU = 1
1662                     IWORK = ITAU + N
1663*
1664*                    Compute A=Q*R, copying result to U
1665*                    (Workspace: need 2*N, prefer N+N*NB)
1666*
1667                     CALL DGEQRF( M, N, A, LDA, WORK( ITAU ),
1668     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
1669                     CALL DLACPY( 'L', M, N, A, LDA, U, LDU )
1670*
1671*                    Generate Q in U
1672*                    (Workspace: need N+M, prefer N+M*NB)
1673*
1674                     CALL DORGQR( M, M, N, U, LDU, WORK( ITAU ),
1675     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
1676                     IE = ITAU
1677                     ITAUQ = IE + N
1678                     ITAUP = ITAUQ + N
1679                     IWORK = ITAUP + N
1680*
1681*                    Zero out below R in A
1682*
1683                     CALL DLASET( 'L', N-1, N-1, ZERO, ZERO, A( 2, 1 ),
1684     $                            LDA )
1685*
1686*                    Bidiagonalize R in A
1687*                    (Workspace: need 4*N, prefer 3*N+2*N*NB)
1688*
1689                     CALL DGEBRD( N, N, A, LDA, S, WORK( IE ),
1690     $                            WORK( ITAUQ ), WORK( ITAUP ),
1691     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
1692*
1693*                    Multiply Q in U by left bidiagonalizing vectors
1694*                    in A
1695*                    (Workspace: need 3*N+M, prefer 3*N+M*NB)
1696*
1697                     CALL DORMBR( 'Q', 'R', 'N', M, N, N, A, LDA,
1698     $                            WORK( ITAUQ ), U, LDU, WORK( IWORK ),
1699     $                            LWORK-IWORK+1, IERR )
1700*
1701*                    Generate right bidiagonalizing vectors in A
1702*                    (Workspace: need 4*N-1, prefer 3*N+(N-1)*NB)
1703*
1704                     CALL DORGBR( 'P', N, N, N, A, LDA, WORK( ITAUP ),
1705     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
1706                     IWORK = IE + N
1707*
1708*                    Perform bidiagonal QR iteration, computing left
1709*                    singular vectors of A in U and computing right
1710*                    singular vectors of A in A
1711*                    (Workspace: need BDSPAC)
1712*
1713                     CALL DBDSQR( 'U', N, N, M, 0, S, WORK( IE ), A,
1714     $                            LDA, U, LDU, DUM, 1, WORK( IWORK ),
1715     $                            INFO )
1716*
1717                  END IF
1718*
1719               ELSE IF( WNTVAS ) THEN
1720*
1721*                 Path 9 (M much larger than N, JOBU='A', JOBVT='S'
1722*                         or 'A')
1723*                 M left singular vectors to be computed in U and
1724*                 N right singular vectors to be computed in VT
1725*
1726                  IF( LWORK.GE.N*N+MAX( N+M, 4*N, BDSPAC ) ) THEN
1727*
1728*                    Sufficient workspace for a fast algorithm
1729*
1730                     IU = 1
1731                     IF( LWORK.GE.WRKBL+LDA*N ) THEN
1732*
1733*                       WORK(IU) is LDA by N
1734*
1735                        LDWRKU = LDA
1736                     ELSE
1737*
1738*                       WORK(IU) is N by N
1739*
1740                        LDWRKU = N
1741                     END IF
1742                     ITAU = IU + LDWRKU*N
1743                     IWORK = ITAU + N
1744*
1745*                    Compute A=Q*R, copying result to U
1746*                    (Workspace: need N*N+2*N, prefer N*N+N+N*NB)
1747*
1748                     CALL DGEQRF( M, N, A, LDA, WORK( ITAU ),
1749     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
1750                     CALL DLACPY( 'L', M, N, A, LDA, U, LDU )
1751*
1752*                    Generate Q in U
1753*                    (Workspace: need N*N+N+M, prefer N*N+N+M*NB)
1754*
1755                     CALL DORGQR( M, M, N, U, LDU, WORK( ITAU ),
1756     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
1757*
1758*                    Copy R to WORK(IU), zeroing out below it
1759*
1760                     CALL DLACPY( 'U', N, N, A, LDA, WORK( IU ),
1761     $                            LDWRKU )
1762                     CALL DLASET( 'L', N-1, N-1, ZERO, ZERO,
1763     $                            WORK( IU+1 ), LDWRKU )
1764                     IE = ITAU
1765                     ITAUQ = IE + N
1766                     ITAUP = ITAUQ + N
1767                     IWORK = ITAUP + N
1768*
1769*                    Bidiagonalize R in WORK(IU), copying result to VT
1770*                    (Workspace: need N*N+4*N, prefer N*N+3*N+2*N*NB)
1771*
1772                     CALL DGEBRD( N, N, WORK( IU ), LDWRKU, S,
1773     $                            WORK( IE ), WORK( ITAUQ ),
1774     $                            WORK( ITAUP ), WORK( IWORK ),
1775     $                            LWORK-IWORK+1, IERR )
1776                     CALL DLACPY( 'U', N, N, WORK( IU ), LDWRKU, VT,
1777     $                            LDVT )
1778*
1779*                    Generate left bidiagonalizing vectors in WORK(IU)
1780*                    (Workspace: need N*N+4*N, prefer N*N+3*N+N*NB)
1781*
1782                     CALL DORGBR( 'Q', N, N, N, WORK( IU ), LDWRKU,
1783     $                            WORK( ITAUQ ), WORK( IWORK ),
1784     $                            LWORK-IWORK+1, IERR )
1785*
1786*                    Generate right bidiagonalizing vectors in VT
1787*                    (Workspace: need N*N+4*N-1,
1788*                                prefer N*N+3*N+(N-1)*NB)
1789*
1790                     CALL DORGBR( 'P', N, N, N, VT, LDVT, WORK( ITAUP ),
1791     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
1792                     IWORK = IE + N
1793*
1794*                    Perform bidiagonal QR iteration, computing left
1795*                    singular vectors of R in WORK(IU) and computing
1796*                    right singular vectors of R in VT
1797*                    (Workspace: need N*N+BDSPAC)
1798*
1799                     CALL DBDSQR( 'U', N, N, N, 0, S, WORK( IE ), VT,
1800     $                            LDVT, WORK( IU ), LDWRKU, DUM, 1,
1801     $                            WORK( IWORK ), INFO )
1802*
1803*                    Multiply Q in U by left singular vectors of R in
1804*                    WORK(IU), storing result in A
1805*                    (Workspace: need N*N)
1806*
1807                     CALL DGEMM( 'N', 'N', M, N, N, ONE, U, LDU,
1808     $                           WORK( IU ), LDWRKU, ZERO, A, LDA )
1809*
1810*                    Copy left singular vectors of A from A to U
1811*
1812                     CALL DLACPY( 'F', M, N, A, LDA, U, LDU )
1813*
1814                  ELSE
1815*
1816*                    Insufficient workspace for a fast algorithm
1817*
1818                     ITAU = 1
1819                     IWORK = ITAU + N
1820*
1821*                    Compute A=Q*R, copying result to U
1822*                    (Workspace: need 2*N, prefer N+N*NB)
1823*
1824                     CALL DGEQRF( M, N, A, LDA, WORK( ITAU ),
1825     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
1826                     CALL DLACPY( 'L', M, N, A, LDA, U, LDU )
1827*
1828*                    Generate Q in U
1829*                    (Workspace: need N+M, prefer N+M*NB)
1830*
1831                     CALL DORGQR( M, M, N, U, LDU, WORK( ITAU ),
1832     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
1833*
1834*                    Copy R from A to VT, zeroing out below it
1835*
1836                     CALL DLACPY( 'U', N, N, A, LDA, VT, LDVT )
1837                     CALL DLASET( 'L', N-1, N-1, ZERO, ZERO, VT( 2, 1 ),
1838     $                            LDVT )
1839                     IE = ITAU
1840                     ITAUQ = IE + N
1841                     ITAUP = ITAUQ + N
1842                     IWORK = ITAUP + N
1843*
1844*                    Bidiagonalize R in VT
1845*                    (Workspace: need 4*N, prefer 3*N+2*N*NB)
1846*
1847                     CALL DGEBRD( N, N, VT, LDVT, S, WORK( IE ),
1848     $                            WORK( ITAUQ ), WORK( ITAUP ),
1849     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
1850*
1851*                    Multiply Q in U by left bidiagonalizing vectors
1852*                    in VT
1853*                    (Workspace: need 3*N+M, prefer 3*N+M*NB)
1854*
1855                     CALL DORMBR( 'Q', 'R', 'N', M, N, N, VT, LDVT,
1856     $                            WORK( ITAUQ ), U, LDU, WORK( IWORK ),
1857     $                            LWORK-IWORK+1, IERR )
1858*
1859*                    Generate right bidiagonalizing vectors in VT
1860*                    (Workspace: need 4*N-1, prefer 3*N+(N-1)*NB)
1861*
1862                     CALL DORGBR( 'P', N, N, N, VT, LDVT, WORK( ITAUP ),
1863     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
1864                     IWORK = IE + N
1865*
1866*                    Perform bidiagonal QR iteration, computing left
1867*                    singular vectors of A in U and computing right
1868*                    singular vectors of A in VT
1869*                    (Workspace: need BDSPAC)
1870*
1871                     CALL DBDSQR( 'U', N, N, M, 0, S, WORK( IE ), VT,
1872     $                            LDVT, U, LDU, DUM, 1, WORK( IWORK ),
1873     $                            INFO )
1874*
1875                  END IF
1876*
1877               END IF
1878*
1879            END IF
1880*
1881         ELSE
1882*
1883*           M .LT. MNTHR
1884*
1885*           Path 10 (M at least N, but not much larger)
1886*           Reduce to bidiagonal form without QR decomposition
1887*
1888            IE = 1
1889            ITAUQ = IE + N
1890            ITAUP = ITAUQ + N
1891            IWORK = ITAUP + N
1892*
1893*           Bidiagonalize A
1894*           (Workspace: need 3*N+M, prefer 3*N+(M+N)*NB)
1895*
1896            CALL DGEBRD( M, N, A, LDA, S, WORK( IE ), WORK( ITAUQ ),
1897     $                   WORK( ITAUP ), WORK( IWORK ), LWORK-IWORK+1,
1898     $                   IERR )
1899            IF( WNTUAS ) THEN
1900*
1901*              If left singular vectors desired in U, copy result to U
1902*              and generate left bidiagonalizing vectors in U
1903*              (Workspace: need 3*N+NCU, prefer 3*N+NCU*NB)
1904*
1905               CALL DLACPY( 'L', M, N, A, LDA, U, LDU )
1906               IF( WNTUS )
1907     $            NCU = N
1908               IF( WNTUA )
1909     $            NCU = M
1910               CALL DORGBR( 'Q', M, NCU, N, U, LDU, WORK( ITAUQ ),
1911     $                      WORK( IWORK ), LWORK-IWORK+1, IERR )
1912            END IF
1913            IF( WNTVAS ) THEN
1914*
1915*              If right singular vectors desired in VT, copy result to
1916*              VT and generate right bidiagonalizing vectors in VT
1917*              (Workspace: need 4*N-1, prefer 3*N+(N-1)*NB)
1918*
1919               CALL DLACPY( 'U', N, N, A, LDA, VT, LDVT )
1920               CALL DORGBR( 'P', N, N, N, VT, LDVT, WORK( ITAUP ),
1921     $                      WORK( IWORK ), LWORK-IWORK+1, IERR )
1922            END IF
1923            IF( WNTUO ) THEN
1924*
1925*              If left singular vectors desired in A, generate left
1926*              bidiagonalizing vectors in A
1927*              (Workspace: need 4*N, prefer 3*N+N*NB)
1928*
1929               CALL DORGBR( 'Q', M, N, N, A, LDA, WORK( ITAUQ ),
1930     $                      WORK( IWORK ), LWORK-IWORK+1, IERR )
1931            END IF
1932            IF( WNTVO ) THEN
1933*
1934*              If right singular vectors desired in A, generate right
1935*              bidiagonalizing vectors in A
1936*              (Workspace: need 4*N-1, prefer 3*N+(N-1)*NB)
1937*
1938               CALL DORGBR( 'P', N, N, N, A, LDA, WORK( ITAUP ),
1939     $                      WORK( IWORK ), LWORK-IWORK+1, IERR )
1940            END IF
1941            IWORK = IE + N
1942            IF( WNTUAS .OR. WNTUO )
1943     $         NRU = M
1944            IF( WNTUN )
1945     $         NRU = 0
1946            IF( WNTVAS .OR. WNTVO )
1947     $         NCVT = N
1948            IF( WNTVN )
1949     $         NCVT = 0
1950            IF( ( .NOT.WNTUO ) .AND. ( .NOT.WNTVO ) ) THEN
1951*
1952*              Perform bidiagonal QR iteration, if desired, computing
1953*              left singular vectors in U and computing right singular
1954*              vectors in VT
1955*              (Workspace: need BDSPAC)
1956*
1957               CALL DBDSQR( 'U', N, NCVT, NRU, 0, S, WORK( IE ), VT,
1958     $                      LDVT, U, LDU, DUM, 1, WORK( IWORK ), INFO )
1959            ELSE IF( ( .NOT.WNTUO ) .AND. WNTVO ) THEN
1960*
1961*              Perform bidiagonal QR iteration, if desired, computing
1962*              left singular vectors in U and computing right singular
1963*              vectors in A
1964*              (Workspace: need BDSPAC)
1965*
1966               CALL DBDSQR( 'U', N, NCVT, NRU, 0, S, WORK( IE ), A, LDA,
1967     $                      U, LDU, DUM, 1, WORK( IWORK ), INFO )
1968            ELSE
1969*
1970*              Perform bidiagonal QR iteration, if desired, computing
1971*              left singular vectors in A and computing right singular
1972*              vectors in VT
1973*              (Workspace: need BDSPAC)
1974*
1975               CALL DBDSQR( 'U', N, NCVT, NRU, 0, S, WORK( IE ), VT,
1976     $                      LDVT, A, LDA, DUM, 1, WORK( IWORK ), INFO )
1977            END IF
1978*
1979         END IF
1980*
1981      ELSE
1982*
1983*        A has more columns than rows. If A has sufficiently more
1984*        columns than rows, first reduce using the LQ decomposition (if
1985*        sufficient workspace available)
1986*
1987         IF( N.GE.MNTHR ) THEN
1988*
1989            IF( WNTVN ) THEN
1990*
1991*              Path 1t(N much larger than M, JOBVT='N')
1992*              No right singular vectors to be computed
1993*
1994               ITAU = 1
1995               IWORK = ITAU + M
1996*
1997*              Compute A=L*Q
1998*              (Workspace: need 2*M, prefer M+M*NB)
1999*
2000               CALL DGELQF( M, N, A, LDA, WORK( ITAU ), WORK( IWORK ),
2001     $                      LWORK-IWORK+1, IERR )
2002*
2003*              Zero out above L
2004*
2005               CALL DLASET( 'U', M-1, M-1, ZERO, ZERO, A( 1, 2 ), LDA )
2006               IE = 1
2007               ITAUQ = IE + M
2008               ITAUP = ITAUQ + M
2009               IWORK = ITAUP + M
2010*
2011*              Bidiagonalize L in A
2012*              (Workspace: need 4*M, prefer 3*M+2*M*NB)
2013*
2014               CALL DGEBRD( M, M, A, LDA, S, WORK( IE ), WORK( ITAUQ ),
2015     $                      WORK( ITAUP ), WORK( IWORK ), LWORK-IWORK+1,
2016     $                      IERR )
2017               IF( WNTUO .OR. WNTUAS ) THEN
2018*
2019*                 If left singular vectors desired, generate Q
2020*                 (Workspace: need 4*M, prefer 3*M+M*NB)
2021*
2022                  CALL DORGBR( 'Q', M, M, M, A, LDA, WORK( ITAUQ ),
2023     $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
2024               END IF
2025               IWORK = IE + M
2026               NRU = 0
2027               IF( WNTUO .OR. WNTUAS )
2028     $            NRU = M
2029*
2030*              Perform bidiagonal QR iteration, computing left singular
2031*              vectors of A in A if desired
2032*              (Workspace: need BDSPAC)
2033*
2034               CALL DBDSQR( 'U', M, 0, NRU, 0, S, WORK( IE ), DUM, 1, A,
2035     $                      LDA, DUM, 1, WORK( IWORK ), INFO )
2036*
2037*              If left singular vectors desired in U, copy them there
2038*
2039               IF( WNTUAS )
2040     $            CALL DLACPY( 'F', M, M, A, LDA, U, LDU )
2041*
2042            ELSE IF( WNTVO .AND. WNTUN ) THEN
2043*
2044*              Path 2t(N much larger than M, JOBU='N', JOBVT='O')
2045*              M right singular vectors to be overwritten on A and
2046*              no left singular vectors to be computed
2047*
2048               IF( LWORK.GE.M*M+MAX( 4*M, BDSPAC ) ) THEN
2049*
2050*                 Sufficient workspace for a fast algorithm
2051*
2052                  IR = 1
2053                  IF( LWORK.GE.MAX( WRKBL, LDA*N+M )+LDA*M ) THEN
2054*
2055*                    WORK(IU) is LDA by N and WORK(IR) is LDA by M
2056*
2057                     LDWRKU = LDA
2058                     CHUNK = N
2059                     LDWRKR = LDA
2060                  ELSE IF( LWORK.GE.MAX( WRKBL, LDA*N+M )+M*M ) THEN
2061*
2062*                    WORK(IU) is LDA by N and WORK(IR) is M by M
2063*
2064                     LDWRKU = LDA
2065                     CHUNK = N
2066                     LDWRKR = M
2067                  ELSE
2068*
2069*                    WORK(IU) is M by CHUNK and WORK(IR) is M by M
2070*
2071                     LDWRKU = M
2072                     CHUNK = ( LWORK-M*M-M ) / M
2073                     LDWRKR = M
2074                  END IF
2075                  ITAU = IR + LDWRKR*M
2076                  IWORK = ITAU + M
2077*
2078*                 Compute A=L*Q
2079*                 (Workspace: need M*M+2*M, prefer M*M+M+M*NB)
2080*
2081                  CALL DGELQF( M, N, A, LDA, WORK( ITAU ),
2082     $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
2083*
2084*                 Copy L to WORK(IR) and zero out above it
2085*
2086                  CALL DLACPY( 'L', M, M, A, LDA, WORK( IR ), LDWRKR )
2087                  CALL DLASET( 'U', M-1, M-1, ZERO, ZERO,
2088     $                         WORK( IR+LDWRKR ), LDWRKR )
2089*
2090*                 Generate Q in A
2091*                 (Workspace: need M*M+2*M, prefer M*M+M+M*NB)
2092*
2093                  CALL DORGLQ( M, N, M, A, LDA, WORK( ITAU ),
2094     $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
2095                  IE = ITAU
2096                  ITAUQ = IE + M
2097                  ITAUP = ITAUQ + M
2098                  IWORK = ITAUP + M
2099*
2100*                 Bidiagonalize L in WORK(IR)
2101*                 (Workspace: need M*M+4*M, prefer M*M+3*M+2*M*NB)
2102*
2103                  CALL DGEBRD( M, M, WORK( IR ), LDWRKR, S, WORK( IE ),
2104     $                         WORK( ITAUQ ), WORK( ITAUP ),
2105     $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
2106*
2107*                 Generate right vectors bidiagonalizing L
2108*                 (Workspace: need M*M+4*M-1, prefer M*M+3*M+(M-1)*NB)
2109*
2110                  CALL DORGBR( 'P', M, M, M, WORK( IR ), LDWRKR,
2111     $                         WORK( ITAUP ), WORK( IWORK ),
2112     $                         LWORK-IWORK+1, IERR )
2113                  IWORK = IE + M
2114*
2115*                 Perform bidiagonal QR iteration, computing right
2116*                 singular vectors of L in WORK(IR)
2117*                 (Workspace: need M*M+BDSPAC)
2118*
2119                  CALL DBDSQR( 'U', M, M, 0, 0, S, WORK( IE ),
2120     $                         WORK( IR ), LDWRKR, DUM, 1, DUM, 1,
2121     $                         WORK( IWORK ), INFO )
2122                  IU = IE + M
2123*
2124*                 Multiply right singular vectors of L in WORK(IR) by Q
2125*                 in A, storing result in WORK(IU) and copying to A
2126*                 (Workspace: need M*M+2*M, prefer M*M+M*N+M)
2127*
2128                  DO 30 I = 1, N, CHUNK
2129                     BLK = MIN( N-I+1, CHUNK )
2130                     CALL DGEMM( 'N', 'N', M, BLK, M, ONE, WORK( IR ),
2131     $                           LDWRKR, A( 1, I ), LDA, ZERO,
2132     $                           WORK( IU ), LDWRKU )
2133                     CALL DLACPY( 'F', M, BLK, WORK( IU ), LDWRKU,
2134     $                            A( 1, I ), LDA )
2135   30             CONTINUE
2136*
2137               ELSE
2138*
2139*                 Insufficient workspace for a fast algorithm
2140*
2141                  IE = 1
2142                  ITAUQ = IE + M
2143                  ITAUP = ITAUQ + M
2144                  IWORK = ITAUP + M
2145*
2146*                 Bidiagonalize A
2147*                 (Workspace: need 3*M+N, prefer 3*M+(M+N)*NB)
2148*
2149                  CALL DGEBRD( M, N, A, LDA, S, WORK( IE ),
2150     $                         WORK( ITAUQ ), WORK( ITAUP ),
2151     $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
2152*
2153*                 Generate right vectors bidiagonalizing A
2154*                 (Workspace: need 4*M, prefer 3*M+M*NB)
2155*
2156                  CALL DORGBR( 'P', M, N, M, A, LDA, WORK( ITAUP ),
2157     $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
2158                  IWORK = IE + M
2159*
2160*                 Perform bidiagonal QR iteration, computing right
2161*                 singular vectors of A in A
2162*                 (Workspace: need BDSPAC)
2163*
2164                  CALL DBDSQR( 'L', M, N, 0, 0, S, WORK( IE ), A, LDA,
2165     $                         DUM, 1, DUM, 1, WORK( IWORK ), INFO )
2166*
2167               END IF
2168*
2169            ELSE IF( WNTVO .AND. WNTUAS ) THEN
2170*
2171*              Path 3t(N much larger than M, JOBU='S' or 'A', JOBVT='O')
2172*              M right singular vectors to be overwritten on A and
2173*              M left singular vectors to be computed in U
2174*
2175               IF( LWORK.GE.M*M+MAX( 4*M, BDSPAC ) ) THEN
2176*
2177*                 Sufficient workspace for a fast algorithm
2178*
2179                  IR = 1
2180                  IF( LWORK.GE.MAX( WRKBL, LDA*N+M )+LDA*M ) THEN
2181*
2182*                    WORK(IU) is LDA by N and WORK(IR) is LDA by M
2183*
2184                     LDWRKU = LDA
2185                     CHUNK = N
2186                     LDWRKR = LDA
2187                  ELSE IF( LWORK.GE.MAX( WRKBL, LDA*N+M )+M*M ) THEN
2188*
2189*                    WORK(IU) is LDA by N and WORK(IR) is M by M
2190*
2191                     LDWRKU = LDA
2192                     CHUNK = N
2193                     LDWRKR = M
2194                  ELSE
2195*
2196*                    WORK(IU) is M by CHUNK and WORK(IR) is M by M
2197*
2198                     LDWRKU = M
2199                     CHUNK = ( LWORK-M*M-M ) / M
2200                     LDWRKR = M
2201                  END IF
2202                  ITAU = IR + LDWRKR*M
2203                  IWORK = ITAU + M
2204*
2205*                 Compute A=L*Q
2206*                 (Workspace: need M*M+2*M, prefer M*M+M+M*NB)
2207*
2208                  CALL DGELQF( M, N, A, LDA, WORK( ITAU ),
2209     $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
2210*
2211*                 Copy L to U, zeroing about above it
2212*
2213                  CALL DLACPY( 'L', M, M, A, LDA, U, LDU )
2214                  CALL DLASET( 'U', M-1, M-1, ZERO, ZERO, U( 1, 2 ),
2215     $                         LDU )
2216*
2217*                 Generate Q in A
2218*                 (Workspace: need M*M+2*M, prefer M*M+M+M*NB)
2219*
2220                  CALL DORGLQ( M, N, M, A, LDA, WORK( ITAU ),
2221     $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
2222                  IE = ITAU
2223                  ITAUQ = IE + M
2224                  ITAUP = ITAUQ + M
2225                  IWORK = ITAUP + M
2226*
2227*                 Bidiagonalize L in U, copying result to WORK(IR)
2228*                 (Workspace: need M*M+4*M, prefer M*M+3*M+2*M*NB)
2229*
2230                  CALL DGEBRD( M, M, U, LDU, S, WORK( IE ),
2231     $                         WORK( ITAUQ ), WORK( ITAUP ),
2232     $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
2233                  CALL DLACPY( 'U', M, M, U, LDU, WORK( IR ), LDWRKR )
2234*
2235*                 Generate right vectors bidiagonalizing L in WORK(IR)
2236*                 (Workspace: need M*M+4*M-1, prefer M*M+3*M+(M-1)*NB)
2237*
2238                  CALL DORGBR( 'P', M, M, M, WORK( IR ), LDWRKR,
2239     $                         WORK( ITAUP ), WORK( IWORK ),
2240     $                         LWORK-IWORK+1, IERR )
2241*
2242*                 Generate left vectors bidiagonalizing L in U
2243*                 (Workspace: need M*M+4*M, prefer M*M+3*M+M*NB)
2244*
2245                  CALL DORGBR( 'Q', M, M, M, U, LDU, WORK( ITAUQ ),
2246     $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
2247                  IWORK = IE + M
2248*
2249*                 Perform bidiagonal QR iteration, computing left
2250*                 singular vectors of L in U, and computing right
2251*                 singular vectors of L in WORK(IR)
2252*                 (Workspace: need M*M+BDSPAC)
2253*
2254                  CALL DBDSQR( 'U', M, M, M, 0, S, WORK( IE ),
2255     $                         WORK( IR ), LDWRKR, U, LDU, DUM, 1,
2256     $                         WORK( IWORK ), INFO )
2257                  IU = IE + M
2258*
2259*                 Multiply right singular vectors of L in WORK(IR) by Q
2260*                 in A, storing result in WORK(IU) and copying to A
2261*                 (Workspace: need M*M+2*M, prefer M*M+M*N+M))
2262*
2263                  DO 40 I = 1, N, CHUNK
2264                     BLK = MIN( N-I+1, CHUNK )
2265                     CALL DGEMM( 'N', 'N', M, BLK, M, ONE, WORK( IR ),
2266     $                           LDWRKR, A( 1, I ), LDA, ZERO,
2267     $                           WORK( IU ), LDWRKU )
2268                     CALL DLACPY( 'F', M, BLK, WORK( IU ), LDWRKU,
2269     $                            A( 1, I ), LDA )
2270   40             CONTINUE
2271*
2272               ELSE
2273*
2274*                 Insufficient workspace for a fast algorithm
2275*
2276                  ITAU = 1
2277                  IWORK = ITAU + M
2278*
2279*                 Compute A=L*Q
2280*                 (Workspace: need 2*M, prefer M+M*NB)
2281*
2282                  CALL DGELQF( M, N, A, LDA, WORK( ITAU ),
2283     $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
2284*
2285*                 Copy L to U, zeroing out above it
2286*
2287                  CALL DLACPY( 'L', M, M, A, LDA, U, LDU )
2288                  CALL DLASET( 'U', M-1, M-1, ZERO, ZERO, U( 1, 2 ),
2289     $                         LDU )
2290*
2291*                 Generate Q in A
2292*                 (Workspace: need 2*M, prefer M+M*NB)
2293*
2294                  CALL DORGLQ( M, N, M, A, LDA, WORK( ITAU ),
2295     $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
2296                  IE = ITAU
2297                  ITAUQ = IE + M
2298                  ITAUP = ITAUQ + M
2299                  IWORK = ITAUP + M
2300*
2301*                 Bidiagonalize L in U
2302*                 (Workspace: need 4*M, prefer 3*M+2*M*NB)
2303*
2304                  CALL DGEBRD( M, M, U, LDU, S, WORK( IE ),
2305     $                         WORK( ITAUQ ), WORK( ITAUP ),
2306     $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
2307*
2308*                 Multiply right vectors bidiagonalizing L by Q in A
2309*                 (Workspace: need 3*M+N, prefer 3*M+N*NB)
2310*
2311                  CALL DORMBR( 'P', 'L', 'T', M, N, M, U, LDU,
2312     $                         WORK( ITAUP ), A, LDA, WORK( IWORK ),
2313     $                         LWORK-IWORK+1, IERR )
2314*
2315*                 Generate left vectors bidiagonalizing L in U
2316*                 (Workspace: need 4*M, prefer 3*M+M*NB)
2317*
2318                  CALL DORGBR( 'Q', M, M, M, U, LDU, WORK( ITAUQ ),
2319     $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
2320                  IWORK = IE + M
2321*
2322*                 Perform bidiagonal QR iteration, computing left
2323*                 singular vectors of A in U and computing right
2324*                 singular vectors of A in A
2325*                 (Workspace: need BDSPAC)
2326*
2327                  CALL DBDSQR( 'U', M, N, M, 0, S, WORK( IE ), A, LDA,
2328     $                         U, LDU, DUM, 1, WORK( IWORK ), INFO )
2329*
2330               END IF
2331*
2332            ELSE IF( WNTVS ) THEN
2333*
2334               IF( WNTUN ) THEN
2335*
2336*                 Path 4t(N much larger than M, JOBU='N', JOBVT='S')
2337*                 M right singular vectors to be computed in VT and
2338*                 no left singular vectors to be computed
2339*
2340                  IF( LWORK.GE.M*M+MAX( 4*M, BDSPAC ) ) THEN
2341*
2342*                    Sufficient workspace for a fast algorithm
2343*
2344                     IR = 1
2345                     IF( LWORK.GE.WRKBL+LDA*M ) THEN
2346*
2347*                       WORK(IR) is LDA by M
2348*
2349                        LDWRKR = LDA
2350                     ELSE
2351*
2352*                       WORK(IR) is M by M
2353*
2354                        LDWRKR = M
2355                     END IF
2356                     ITAU = IR + LDWRKR*M
2357                     IWORK = ITAU + M
2358*
2359*                    Compute A=L*Q
2360*                    (Workspace: need M*M+2*M, prefer M*M+M+M*NB)
2361*
2362                     CALL DGELQF( M, N, A, LDA, WORK( ITAU ),
2363     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
2364*
2365*                    Copy L to WORK(IR), zeroing out above it
2366*
2367                     CALL DLACPY( 'L', M, M, A, LDA, WORK( IR ),
2368     $                            LDWRKR )
2369                     CALL DLASET( 'U', M-1, M-1, ZERO, ZERO,
2370     $                            WORK( IR+LDWRKR ), LDWRKR )
2371*
2372*                    Generate Q in A
2373*                    (Workspace: need M*M+2*M, prefer M*M+M+M*NB)
2374*
2375                     CALL DORGLQ( M, N, M, A, LDA, WORK( ITAU ),
2376     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
2377                     IE = ITAU
2378                     ITAUQ = IE + M
2379                     ITAUP = ITAUQ + M
2380                     IWORK = ITAUP + M
2381*
2382*                    Bidiagonalize L in WORK(IR)
2383*                    (Workspace: need M*M+4*M, prefer M*M+3*M+2*M*NB)
2384*
2385                     CALL DGEBRD( M, M, WORK( IR ), LDWRKR, S,
2386     $                            WORK( IE ), WORK( ITAUQ ),
2387     $                            WORK( ITAUP ), WORK( IWORK ),
2388     $                            LWORK-IWORK+1, IERR )
2389*
2390*                    Generate right vectors bidiagonalizing L in
2391*                    WORK(IR)
2392*                    (Workspace: need M*M+4*M, prefer M*M+3*M+(M-1)*NB)
2393*
2394                     CALL DORGBR( 'P', M, M, M, WORK( IR ), LDWRKR,
2395     $                            WORK( ITAUP ), WORK( IWORK ),
2396     $                            LWORK-IWORK+1, IERR )
2397                     IWORK = IE + M
2398*
2399*                    Perform bidiagonal QR iteration, computing right
2400*                    singular vectors of L in WORK(IR)
2401*                    (Workspace: need M*M+BDSPAC)
2402*
2403                     CALL DBDSQR( 'U', M, M, 0, 0, S, WORK( IE ),
2404     $                            WORK( IR ), LDWRKR, DUM, 1, DUM, 1,
2405     $                            WORK( IWORK ), INFO )
2406*
2407*                    Multiply right singular vectors of L in WORK(IR) by
2408*                    Q in A, storing result in VT
2409*                    (Workspace: need M*M)
2410*
2411                     CALL DGEMM( 'N', 'N', M, N, M, ONE, WORK( IR ),
2412     $                           LDWRKR, A, LDA, ZERO, VT, LDVT )
2413*
2414                  ELSE
2415*
2416*                    Insufficient workspace for a fast algorithm
2417*
2418                     ITAU = 1
2419                     IWORK = ITAU + M
2420*
2421*                    Compute A=L*Q
2422*                    (Workspace: need 2*M, prefer M+M*NB)
2423*
2424                     CALL DGELQF( M, N, A, LDA, WORK( ITAU ),
2425     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
2426*
2427*                    Copy result to VT
2428*
2429                     CALL DLACPY( 'U', M, N, A, LDA, VT, LDVT )
2430*
2431*                    Generate Q in VT
2432*                    (Workspace: need 2*M, prefer M+M*NB)
2433*
2434                     CALL DORGLQ( M, N, M, VT, LDVT, WORK( ITAU ),
2435     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
2436                     IE = ITAU
2437                     ITAUQ = IE + M
2438                     ITAUP = ITAUQ + M
2439                     IWORK = ITAUP + M
2440*
2441*                    Zero out above L in A
2442*
2443                     CALL DLASET( 'U', M-1, M-1, ZERO, ZERO, A( 1, 2 ),
2444     $                            LDA )
2445*
2446*                    Bidiagonalize L in A
2447*                    (Workspace: need 4*M, prefer 3*M+2*M*NB)
2448*
2449                     CALL DGEBRD( M, M, A, LDA, S, WORK( IE ),
2450     $                            WORK( ITAUQ ), WORK( ITAUP ),
2451     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
2452*
2453*                    Multiply right vectors bidiagonalizing L by Q in VT
2454*                    (Workspace: need 3*M+N, prefer 3*M+N*NB)
2455*
2456                     CALL DORMBR( 'P', 'L', 'T', M, N, M, A, LDA,
2457     $                            WORK( ITAUP ), VT, LDVT,
2458     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
2459                     IWORK = IE + M
2460*
2461*                    Perform bidiagonal QR iteration, computing right
2462*                    singular vectors of A in VT
2463*                    (Workspace: need BDSPAC)
2464*
2465                     CALL DBDSQR( 'U', M, N, 0, 0, S, WORK( IE ), VT,
2466     $                            LDVT, DUM, 1, DUM, 1, WORK( IWORK ),
2467     $                            INFO )
2468*
2469                  END IF
2470*
2471               ELSE IF( WNTUO ) THEN
2472*
2473*                 Path 5t(N much larger than M, JOBU='O', JOBVT='S')
2474*                 M right singular vectors to be computed in VT and
2475*                 M left singular vectors to be overwritten on A
2476*
2477                  IF( LWORK.GE.2*M*M+MAX( 4*M, BDSPAC ) ) THEN
2478*
2479*                    Sufficient workspace for a fast algorithm
2480*
2481                     IU = 1
2482                     IF( LWORK.GE.WRKBL+2*LDA*M ) THEN
2483*
2484*                       WORK(IU) is LDA by M and WORK(IR) is LDA by M
2485*
2486                        LDWRKU = LDA
2487                        IR = IU + LDWRKU*M
2488                        LDWRKR = LDA
2489                     ELSE IF( LWORK.GE.WRKBL+( LDA+M )*M ) THEN
2490*
2491*                       WORK(IU) is LDA by M and WORK(IR) is M by M
2492*
2493                        LDWRKU = LDA
2494                        IR = IU + LDWRKU*M
2495                        LDWRKR = M
2496                     ELSE
2497*
2498*                       WORK(IU) is M by M and WORK(IR) is M by M
2499*
2500                        LDWRKU = M
2501                        IR = IU + LDWRKU*M
2502                        LDWRKR = M
2503                     END IF
2504                     ITAU = IR + LDWRKR*M
2505                     IWORK = ITAU + M
2506*
2507*                    Compute A=L*Q
2508*                    (Workspace: need 2*M*M+2*M, prefer 2*M*M+M+M*NB)
2509*
2510                     CALL DGELQF( M, N, A, LDA, WORK( ITAU ),
2511     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
2512*
2513*                    Copy L to WORK(IU), zeroing out below it
2514*
2515                     CALL DLACPY( 'L', M, M, A, LDA, WORK( IU ),
2516     $                            LDWRKU )
2517                     CALL DLASET( 'U', M-1, M-1, ZERO, ZERO,
2518     $                            WORK( IU+LDWRKU ), LDWRKU )
2519*
2520*                    Generate Q in A
2521*                    (Workspace: need 2*M*M+2*M, prefer 2*M*M+M+M*NB)
2522*
2523                     CALL DORGLQ( M, N, M, A, LDA, WORK( ITAU ),
2524     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
2525                     IE = ITAU
2526                     ITAUQ = IE + M
2527                     ITAUP = ITAUQ + M
2528                     IWORK = ITAUP + M
2529*
2530*                    Bidiagonalize L in WORK(IU), copying result to
2531*                    WORK(IR)
2532*                    (Workspace: need 2*M*M+4*M,
2533*                                prefer 2*M*M+3*M+2*M*NB)
2534*
2535                     CALL DGEBRD( M, M, WORK( IU ), LDWRKU, S,
2536     $                            WORK( IE ), WORK( ITAUQ ),
2537     $                            WORK( ITAUP ), WORK( IWORK ),
2538     $                            LWORK-IWORK+1, IERR )
2539                     CALL DLACPY( 'L', M, M, WORK( IU ), LDWRKU,
2540     $                            WORK( IR ), LDWRKR )
2541*
2542*                    Generate right bidiagonalizing vectors in WORK(IU)
2543*                    (Workspace: need 2*M*M+4*M-1,
2544*                                prefer 2*M*M+3*M+(M-1)*NB)
2545*
2546                     CALL DORGBR( 'P', M, M, M, WORK( IU ), LDWRKU,
2547     $                            WORK( ITAUP ), WORK( IWORK ),
2548     $                            LWORK-IWORK+1, IERR )
2549*
2550*                    Generate left bidiagonalizing vectors in WORK(IR)
2551*                    (Workspace: need 2*M*M+4*M, prefer 2*M*M+3*M+M*NB)
2552*
2553                     CALL DORGBR( 'Q', M, M, M, WORK( IR ), LDWRKR,
2554     $                            WORK( ITAUQ ), WORK( IWORK ),
2555     $                            LWORK-IWORK+1, IERR )
2556                     IWORK = IE + M
2557*
2558*                    Perform bidiagonal QR iteration, computing left
2559*                    singular vectors of L in WORK(IR) and computing
2560*                    right singular vectors of L in WORK(IU)
2561*                    (Workspace: need 2*M*M+BDSPAC)
2562*
2563                     CALL DBDSQR( 'U', M, M, M, 0, S, WORK( IE ),
2564     $                            WORK( IU ), LDWRKU, WORK( IR ),
2565     $                            LDWRKR, DUM, 1, WORK( IWORK ), INFO )
2566*
2567*                    Multiply right singular vectors of L in WORK(IU) by
2568*                    Q in A, storing result in VT
2569*                    (Workspace: need M*M)
2570*
2571                     CALL DGEMM( 'N', 'N', M, N, M, ONE, WORK( IU ),
2572     $                           LDWRKU, A, LDA, ZERO, VT, LDVT )
2573*
2574*                    Copy left singular vectors of L to A
2575*                    (Workspace: need M*M)
2576*
2577                     CALL DLACPY( 'F', M, M, WORK( IR ), LDWRKR, A,
2578     $                            LDA )
2579*
2580                  ELSE
2581*
2582*                    Insufficient workspace for a fast algorithm
2583*
2584                     ITAU = 1
2585                     IWORK = ITAU + M
2586*
2587*                    Compute A=L*Q, copying result to VT
2588*                    (Workspace: need 2*M, prefer M+M*NB)
2589*
2590                     CALL DGELQF( M, N, A, LDA, WORK( ITAU ),
2591     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
2592                     CALL DLACPY( 'U', M, N, A, LDA, VT, LDVT )
2593*
2594*                    Generate Q in VT
2595*                    (Workspace: need 2*M, prefer M+M*NB)
2596*
2597                     CALL DORGLQ( M, N, M, VT, LDVT, WORK( ITAU ),
2598     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
2599                     IE = ITAU
2600                     ITAUQ = IE + M
2601                     ITAUP = ITAUQ + M
2602                     IWORK = ITAUP + M
2603*
2604*                    Zero out above L in A
2605*
2606                     CALL DLASET( 'U', M-1, M-1, ZERO, ZERO, A( 1, 2 ),
2607     $                            LDA )
2608*
2609*                    Bidiagonalize L in A
2610*                    (Workspace: need 4*M, prefer 3*M+2*M*NB)
2611*
2612                     CALL DGEBRD( M, M, A, LDA, S, WORK( IE ),
2613     $                            WORK( ITAUQ ), WORK( ITAUP ),
2614     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
2615*
2616*                    Multiply right vectors bidiagonalizing L by Q in VT
2617*                    (Workspace: need 3*M+N, prefer 3*M+N*NB)
2618*
2619                     CALL DORMBR( 'P', 'L', 'T', M, N, M, A, LDA,
2620     $                            WORK( ITAUP ), VT, LDVT,
2621     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
2622*
2623*                    Generate left bidiagonalizing vectors of L in A
2624*                    (Workspace: need 4*M, prefer 3*M+M*NB)
2625*
2626                     CALL DORGBR( 'Q', M, M, M, A, LDA, WORK( ITAUQ ),
2627     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
2628                     IWORK = IE + M
2629*
2630*                    Perform bidiagonal QR iteration, compute left
2631*                    singular vectors of A in A and compute right
2632*                    singular vectors of A in VT
2633*                    (Workspace: need BDSPAC)
2634*
2635                     CALL DBDSQR( 'U', M, N, M, 0, S, WORK( IE ), VT,
2636     $                            LDVT, A, LDA, DUM, 1, WORK( IWORK ),
2637     $                            INFO )
2638*
2639                  END IF
2640*
2641               ELSE IF( WNTUAS ) THEN
2642*
2643*                 Path 6t(N much larger than M, JOBU='S' or 'A',
2644*                         JOBVT='S')
2645*                 M right singular vectors to be computed in VT and
2646*                 M left singular vectors to be computed in U
2647*
2648                  IF( LWORK.GE.M*M+MAX( 4*M, BDSPAC ) ) THEN
2649*
2650*                    Sufficient workspace for a fast algorithm
2651*
2652                     IU = 1
2653                     IF( LWORK.GE.WRKBL+LDA*M ) THEN
2654*
2655*                       WORK(IU) is LDA by N
2656*
2657                        LDWRKU = LDA
2658                     ELSE
2659*
2660*                       WORK(IU) is LDA by M
2661*
2662                        LDWRKU = M
2663                     END IF
2664                     ITAU = IU + LDWRKU*M
2665                     IWORK = ITAU + M
2666*
2667*                    Compute A=L*Q
2668*                    (Workspace: need M*M+2*M, prefer M*M+M+M*NB)
2669*
2670                     CALL DGELQF( M, N, A, LDA, WORK( ITAU ),
2671     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
2672*
2673*                    Copy L to WORK(IU), zeroing out above it
2674*
2675                     CALL DLACPY( 'L', M, M, A, LDA, WORK( IU ),
2676     $                            LDWRKU )
2677                     CALL DLASET( 'U', M-1, M-1, ZERO, ZERO,
2678     $                            WORK( IU+LDWRKU ), LDWRKU )
2679*
2680*                    Generate Q in A
2681*                    (Workspace: need M*M+2*M, prefer M*M+M+M*NB)
2682*
2683                     CALL DORGLQ( M, N, M, A, LDA, WORK( ITAU ),
2684     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
2685                     IE = ITAU
2686                     ITAUQ = IE + M
2687                     ITAUP = ITAUQ + M
2688                     IWORK = ITAUP + M
2689*
2690*                    Bidiagonalize L in WORK(IU), copying result to U
2691*                    (Workspace: need M*M+4*M, prefer M*M+3*M+2*M*NB)
2692*
2693                     CALL DGEBRD( M, M, WORK( IU ), LDWRKU, S,
2694     $                            WORK( IE ), WORK( ITAUQ ),
2695     $                            WORK( ITAUP ), WORK( IWORK ),
2696     $                            LWORK-IWORK+1, IERR )
2697                     CALL DLACPY( 'L', M, M, WORK( IU ), LDWRKU, U,
2698     $                            LDU )
2699*
2700*                    Generate right bidiagonalizing vectors in WORK(IU)
2701*                    (Workspace: need M*M+4*M-1,
2702*                                prefer M*M+3*M+(M-1)*NB)
2703*
2704                     CALL DORGBR( 'P', M, M, M, WORK( IU ), LDWRKU,
2705     $                            WORK( ITAUP ), WORK( IWORK ),
2706     $                            LWORK-IWORK+1, IERR )
2707*
2708*                    Generate left bidiagonalizing vectors in U
2709*                    (Workspace: need M*M+4*M, prefer M*M+3*M+M*NB)
2710*
2711                     CALL DORGBR( 'Q', M, M, M, U, LDU, WORK( ITAUQ ),
2712     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
2713                     IWORK = IE + M
2714*
2715*                    Perform bidiagonal QR iteration, computing left
2716*                    singular vectors of L in U and computing right
2717*                    singular vectors of L in WORK(IU)
2718*                    (Workspace: need M*M+BDSPAC)
2719*
2720                     CALL DBDSQR( 'U', M, M, M, 0, S, WORK( IE ),
2721     $                            WORK( IU ), LDWRKU, U, LDU, DUM, 1,
2722     $                            WORK( IWORK ), INFO )
2723*
2724*                    Multiply right singular vectors of L in WORK(IU) by
2725*                    Q in A, storing result in VT
2726*                    (Workspace: need M*M)
2727*
2728                     CALL DGEMM( 'N', 'N', M, N, M, ONE, WORK( IU ),
2729     $                           LDWRKU, A, LDA, ZERO, VT, LDVT )
2730*
2731                  ELSE
2732*
2733*                    Insufficient workspace for a fast algorithm
2734*
2735                     ITAU = 1
2736                     IWORK = ITAU + M
2737*
2738*                    Compute A=L*Q, copying result to VT
2739*                    (Workspace: need 2*M, prefer M+M*NB)
2740*
2741                     CALL DGELQF( M, N, A, LDA, WORK( ITAU ),
2742     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
2743                     CALL DLACPY( 'U', M, N, A, LDA, VT, LDVT )
2744*
2745*                    Generate Q in VT
2746*                    (Workspace: need 2*M, prefer M+M*NB)
2747*
2748                     CALL DORGLQ( M, N, M, VT, LDVT, WORK( ITAU ),
2749     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
2750*
2751*                    Copy L to U, zeroing out above it
2752*
2753                     CALL DLACPY( 'L', M, M, A, LDA, U, LDU )
2754                     CALL DLASET( 'U', M-1, M-1, ZERO, ZERO, U( 1, 2 ),
2755     $                            LDU )
2756                     IE = ITAU
2757                     ITAUQ = IE + M
2758                     ITAUP = ITAUQ + M
2759                     IWORK = ITAUP + M
2760*
2761*                    Bidiagonalize L in U
2762*                    (Workspace: need 4*M, prefer 3*M+2*M*NB)
2763*
2764                     CALL DGEBRD( M, M, U, LDU, S, WORK( IE ),
2765     $                            WORK( ITAUQ ), WORK( ITAUP ),
2766     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
2767*
2768*                    Multiply right bidiagonalizing vectors in U by Q
2769*                    in VT
2770*                    (Workspace: need 3*M+N, prefer 3*M+N*NB)
2771*
2772                     CALL DORMBR( 'P', 'L', 'T', M, N, M, U, LDU,
2773     $                            WORK( ITAUP ), VT, LDVT,
2774     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
2775*
2776*                    Generate left bidiagonalizing vectors in U
2777*                    (Workspace: need 4*M, prefer 3*M+M*NB)
2778*
2779                     CALL DORGBR( 'Q', M, M, M, U, LDU, WORK( ITAUQ ),
2780     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
2781                     IWORK = IE + M
2782*
2783*                    Perform bidiagonal QR iteration, computing left
2784*                    singular vectors of A in U and computing right
2785*                    singular vectors of A in VT
2786*                    (Workspace: need BDSPAC)
2787*
2788                     CALL DBDSQR( 'U', M, N, M, 0, S, WORK( IE ), VT,
2789     $                            LDVT, U, LDU, DUM, 1, WORK( IWORK ),
2790     $                            INFO )
2791*
2792                  END IF
2793*
2794               END IF
2795*
2796            ELSE IF( WNTVA ) THEN
2797*
2798               IF( WNTUN ) THEN
2799*
2800*                 Path 7t(N much larger than M, JOBU='N', JOBVT='A')
2801*                 N right singular vectors to be computed in VT and
2802*                 no left singular vectors to be computed
2803*
2804                  IF( LWORK.GE.M*M+MAX( N+M, 4*M, BDSPAC ) ) THEN
2805*
2806*                    Sufficient workspace for a fast algorithm
2807*
2808                     IR = 1
2809                     IF( LWORK.GE.WRKBL+LDA*M ) THEN
2810*
2811*                       WORK(IR) is LDA by M
2812*
2813                        LDWRKR = LDA
2814                     ELSE
2815*
2816*                       WORK(IR) is M by M
2817*
2818                        LDWRKR = M
2819                     END IF
2820                     ITAU = IR + LDWRKR*M
2821                     IWORK = ITAU + M
2822*
2823*                    Compute A=L*Q, copying result to VT
2824*                    (Workspace: need M*M+2*M, prefer M*M+M+M*NB)
2825*
2826                     CALL DGELQF( M, N, A, LDA, WORK( ITAU ),
2827     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
2828                     CALL DLACPY( 'U', M, N, A, LDA, VT, LDVT )
2829*
2830*                    Copy L to WORK(IR), zeroing out above it
2831*
2832                     CALL DLACPY( 'L', M, M, A, LDA, WORK( IR ),
2833     $                            LDWRKR )
2834                     CALL DLASET( 'U', M-1, M-1, ZERO, ZERO,
2835     $                            WORK( IR+LDWRKR ), LDWRKR )
2836*
2837*                    Generate Q in VT
2838*                    (Workspace: need M*M+M+N, prefer M*M+M+N*NB)
2839*
2840                     CALL DORGLQ( N, N, M, VT, LDVT, WORK( ITAU ),
2841     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
2842                     IE = ITAU
2843                     ITAUQ = IE + M
2844                     ITAUP = ITAUQ + M
2845                     IWORK = ITAUP + M
2846*
2847*                    Bidiagonalize L in WORK(IR)
2848*                    (Workspace: need M*M+4*M, prefer M*M+3*M+2*M*NB)
2849*
2850                     CALL DGEBRD( M, M, WORK( IR ), LDWRKR, S,
2851     $                            WORK( IE ), WORK( ITAUQ ),
2852     $                            WORK( ITAUP ), WORK( IWORK ),
2853     $                            LWORK-IWORK+1, IERR )
2854*
2855*                    Generate right bidiagonalizing vectors in WORK(IR)
2856*                    (Workspace: need M*M+4*M-1,
2857*                                prefer M*M+3*M+(M-1)*NB)
2858*
2859                     CALL DORGBR( 'P', M, M, M, WORK( IR ), LDWRKR,
2860     $                            WORK( ITAUP ), WORK( IWORK ),
2861     $                            LWORK-IWORK+1, IERR )
2862                     IWORK = IE + M
2863*
2864*                    Perform bidiagonal QR iteration, computing right
2865*                    singular vectors of L in WORK(IR)
2866*                    (Workspace: need M*M+BDSPAC)
2867*
2868                     CALL DBDSQR( 'U', M, M, 0, 0, S, WORK( IE ),
2869     $                            WORK( IR ), LDWRKR, DUM, 1, DUM, 1,
2870     $                            WORK( IWORK ), INFO )
2871*
2872*                    Multiply right singular vectors of L in WORK(IR) by
2873*                    Q in VT, storing result in A
2874*                    (Workspace: need M*M)
2875*
2876                     CALL DGEMM( 'N', 'N', M, N, M, ONE, WORK( IR ),
2877     $                           LDWRKR, VT, LDVT, ZERO, A, LDA )
2878*
2879*                    Copy right singular vectors of A from A to VT
2880*
2881                     CALL DLACPY( 'F', M, N, A, LDA, VT, LDVT )
2882*
2883                  ELSE
2884*
2885*                    Insufficient workspace for a fast algorithm
2886*
2887                     ITAU = 1
2888                     IWORK = ITAU + M
2889*
2890*                    Compute A=L*Q, copying result to VT
2891*                    (Workspace: need 2*M, prefer M+M*NB)
2892*
2893                     CALL DGELQF( M, N, A, LDA, WORK( ITAU ),
2894     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
2895                     CALL DLACPY( 'U', M, N, A, LDA, VT, LDVT )
2896*
2897*                    Generate Q in VT
2898*                    (Workspace: need M+N, prefer M+N*NB)
2899*
2900                     CALL DORGLQ( N, N, M, VT, LDVT, WORK( ITAU ),
2901     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
2902                     IE = ITAU
2903                     ITAUQ = IE + M
2904                     ITAUP = ITAUQ + M
2905                     IWORK = ITAUP + M
2906*
2907*                    Zero out above L in A
2908*
2909                     CALL DLASET( 'U', M-1, M-1, ZERO, ZERO, A( 1, 2 ),
2910     $                            LDA )
2911*
2912*                    Bidiagonalize L in A
2913*                    (Workspace: need 4*M, prefer 3*M+2*M*NB)
2914*
2915                     CALL DGEBRD( M, M, A, LDA, S, WORK( IE ),
2916     $                            WORK( ITAUQ ), WORK( ITAUP ),
2917     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
2918*
2919*                    Multiply right bidiagonalizing vectors in A by Q
2920*                    in VT
2921*                    (Workspace: need 3*M+N, prefer 3*M+N*NB)
2922*
2923                     CALL DORMBR( 'P', 'L', 'T', M, N, M, A, LDA,
2924     $                            WORK( ITAUP ), VT, LDVT,
2925     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
2926                     IWORK = IE + M
2927*
2928*                    Perform bidiagonal QR iteration, computing right
2929*                    singular vectors of A in VT
2930*                    (Workspace: need BDSPAC)
2931*
2932                     CALL DBDSQR( 'U', M, N, 0, 0, S, WORK( IE ), VT,
2933     $                            LDVT, DUM, 1, DUM, 1, WORK( IWORK ),
2934     $                            INFO )
2935*
2936                  END IF
2937*
2938               ELSE IF( WNTUO ) THEN
2939*
2940*                 Path 8t(N much larger than M, JOBU='O', JOBVT='A')
2941*                 N right singular vectors to be computed in VT and
2942*                 M left singular vectors to be overwritten on A
2943*
2944                  IF( LWORK.GE.2*M*M+MAX( N+M, 4*M, BDSPAC ) ) THEN
2945*
2946*                    Sufficient workspace for a fast algorithm
2947*
2948                     IU = 1
2949                     IF( LWORK.GE.WRKBL+2*LDA*M ) THEN
2950*
2951*                       WORK(IU) is LDA by M and WORK(IR) is LDA by M
2952*
2953                        LDWRKU = LDA
2954                        IR = IU + LDWRKU*M
2955                        LDWRKR = LDA
2956                     ELSE IF( LWORK.GE.WRKBL+( LDA+M )*M ) THEN
2957*
2958*                       WORK(IU) is LDA by M and WORK(IR) is M by M
2959*
2960                        LDWRKU = LDA
2961                        IR = IU + LDWRKU*M
2962                        LDWRKR = M
2963                     ELSE
2964*
2965*                       WORK(IU) is M by M and WORK(IR) is M by M
2966*
2967                        LDWRKU = M
2968                        IR = IU + LDWRKU*M
2969                        LDWRKR = M
2970                     END IF
2971                     ITAU = IR + LDWRKR*M
2972                     IWORK = ITAU + M
2973*
2974*                    Compute A=L*Q, copying result to VT
2975*                    (Workspace: need 2*M*M+2*M, prefer 2*M*M+M+M*NB)
2976*
2977                     CALL DGELQF( M, N, A, LDA, WORK( ITAU ),
2978     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
2979                     CALL DLACPY( 'U', M, N, A, LDA, VT, LDVT )
2980*
2981*                    Generate Q in VT
2982*                    (Workspace: need 2*M*M+M+N, prefer 2*M*M+M+N*NB)
2983*
2984                     CALL DORGLQ( N, N, M, VT, LDVT, WORK( ITAU ),
2985     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
2986*
2987*                    Copy L to WORK(IU), zeroing out above it
2988*
2989                     CALL DLACPY( 'L', M, M, A, LDA, WORK( IU ),
2990     $                            LDWRKU )
2991                     CALL DLASET( 'U', M-1, M-1, ZERO, ZERO,
2992     $                            WORK( IU+LDWRKU ), LDWRKU )
2993                     IE = ITAU
2994                     ITAUQ = IE + M
2995                     ITAUP = ITAUQ + M
2996                     IWORK = ITAUP + M
2997*
2998*                    Bidiagonalize L in WORK(IU), copying result to
2999*                    WORK(IR)
3000*                    (Workspace: need 2*M*M+4*M,
3001*                                prefer 2*M*M+3*M+2*M*NB)
3002*
3003                     CALL DGEBRD( M, M, WORK( IU ), LDWRKU, S,
3004     $                            WORK( IE ), WORK( ITAUQ ),
3005     $                            WORK( ITAUP ), WORK( IWORK ),
3006     $                            LWORK-IWORK+1, IERR )
3007                     CALL DLACPY( 'L', M, M, WORK( IU ), LDWRKU,
3008     $                            WORK( IR ), LDWRKR )
3009*
3010*                    Generate right bidiagonalizing vectors in WORK(IU)
3011*                    (Workspace: need 2*M*M+4*M-1,
3012*                                prefer 2*M*M+3*M+(M-1)*NB)
3013*
3014                     CALL DORGBR( 'P', M, M, M, WORK( IU ), LDWRKU,
3015     $                            WORK( ITAUP ), WORK( IWORK ),
3016     $                            LWORK-IWORK+1, IERR )
3017*
3018*                    Generate left bidiagonalizing vectors in WORK(IR)
3019*                    (Workspace: need 2*M*M+4*M, prefer 2*M*M+3*M+M*NB)
3020*
3021                     CALL DORGBR( 'Q', M, M, M, WORK( IR ), LDWRKR,
3022     $                            WORK( ITAUQ ), WORK( IWORK ),
3023     $                            LWORK-IWORK+1, IERR )
3024                     IWORK = IE + M
3025*
3026*                    Perform bidiagonal QR iteration, computing left
3027*                    singular vectors of L in WORK(IR) and computing
3028*                    right singular vectors of L in WORK(IU)
3029*                    (Workspace: need 2*M*M+BDSPAC)
3030*
3031                     CALL DBDSQR( 'U', M, M, M, 0, S, WORK( IE ),
3032     $                            WORK( IU ), LDWRKU, WORK( IR ),
3033     $                            LDWRKR, DUM, 1, WORK( IWORK ), INFO )
3034*
3035*                    Multiply right singular vectors of L in WORK(IU) by
3036*                    Q in VT, storing result in A
3037*                    (Workspace: need M*M)
3038*
3039                     CALL DGEMM( 'N', 'N', M, N, M, ONE, WORK( IU ),
3040     $                           LDWRKU, VT, LDVT, ZERO, A, LDA )
3041*
3042*                    Copy right singular vectors of A from A to VT
3043*
3044                     CALL DLACPY( 'F', M, N, A, LDA, VT, LDVT )
3045*
3046*                    Copy left singular vectors of A from WORK(IR) to A
3047*
3048                     CALL DLACPY( 'F', M, M, WORK( IR ), LDWRKR, A,
3049     $                            LDA )
3050*
3051                  ELSE
3052*
3053*                    Insufficient workspace for a fast algorithm
3054*
3055                     ITAU = 1
3056                     IWORK = ITAU + M
3057*
3058*                    Compute A=L*Q, copying result to VT
3059*                    (Workspace: need 2*M, prefer M+M*NB)
3060*
3061                     CALL DGELQF( M, N, A, LDA, WORK( ITAU ),
3062     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
3063                     CALL DLACPY( 'U', M, N, A, LDA, VT, LDVT )
3064*
3065*                    Generate Q in VT
3066*                    (Workspace: need M+N, prefer M+N*NB)
3067*
3068                     CALL DORGLQ( N, N, M, VT, LDVT, WORK( ITAU ),
3069     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
3070                     IE = ITAU
3071                     ITAUQ = IE + M
3072                     ITAUP = ITAUQ + M
3073                     IWORK = ITAUP + M
3074*
3075*                    Zero out above L in A
3076*
3077                     CALL DLASET( 'U', M-1, M-1, ZERO, ZERO, A( 1, 2 ),
3078     $                            LDA )
3079*
3080*                    Bidiagonalize L in A
3081*                    (Workspace: need 4*M, prefer 3*M+2*M*NB)
3082*
3083                     CALL DGEBRD( M, M, A, LDA, S, WORK( IE ),
3084     $                            WORK( ITAUQ ), WORK( ITAUP ),
3085     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
3086*
3087*                    Multiply right bidiagonalizing vectors in A by Q
3088*                    in VT
3089*                    (Workspace: need 3*M+N, prefer 3*M+N*NB)
3090*
3091                     CALL DORMBR( 'P', 'L', 'T', M, N, M, A, LDA,
3092     $                            WORK( ITAUP ), VT, LDVT,
3093     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
3094*
3095*                    Generate left bidiagonalizing vectors in A
3096*                    (Workspace: need 4*M, prefer 3*M+M*NB)
3097*
3098                     CALL DORGBR( 'Q', M, M, M, A, LDA, WORK( ITAUQ ),
3099     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
3100                     IWORK = IE + M
3101*
3102*                    Perform bidiagonal QR iteration, computing left
3103*                    singular vectors of A in A and computing right
3104*                    singular vectors of A in VT
3105*                    (Workspace: need BDSPAC)
3106*
3107                     CALL DBDSQR( 'U', M, N, M, 0, S, WORK( IE ), VT,
3108     $                            LDVT, A, LDA, DUM, 1, WORK( IWORK ),
3109     $                            INFO )
3110*
3111                  END IF
3112*
3113               ELSE IF( WNTUAS ) THEN
3114*
3115*                 Path 9t(N much larger than M, JOBU='S' or 'A',
3116*                         JOBVT='A')
3117*                 N right singular vectors to be computed in VT and
3118*                 M left singular vectors to be computed in U
3119*
3120                  IF( LWORK.GE.M*M+MAX( N+M, 4*M, BDSPAC ) ) THEN
3121*
3122*                    Sufficient workspace for a fast algorithm
3123*
3124                     IU = 1
3125                     IF( LWORK.GE.WRKBL+LDA*M ) THEN
3126*
3127*                       WORK(IU) is LDA by M
3128*
3129                        LDWRKU = LDA
3130                     ELSE
3131*
3132*                       WORK(IU) is M by M
3133*
3134                        LDWRKU = M
3135                     END IF
3136                     ITAU = IU + LDWRKU*M
3137                     IWORK = ITAU + M
3138*
3139*                    Compute A=L*Q, copying result to VT
3140*                    (Workspace: need M*M+2*M, prefer M*M+M+M*NB)
3141*
3142                     CALL DGELQF( M, N, A, LDA, WORK( ITAU ),
3143     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
3144                     CALL DLACPY( 'U', M, N, A, LDA, VT, LDVT )
3145*
3146*                    Generate Q in VT
3147*                    (Workspace: need M*M+M+N, prefer M*M+M+N*NB)
3148*
3149                     CALL DORGLQ( N, N, M, VT, LDVT, WORK( ITAU ),
3150     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
3151*
3152*                    Copy L to WORK(IU), zeroing out above it
3153*
3154                     CALL DLACPY( 'L', M, M, A, LDA, WORK( IU ),
3155     $                            LDWRKU )
3156                     CALL DLASET( 'U', M-1, M-1, ZERO, ZERO,
3157     $                            WORK( IU+LDWRKU ), LDWRKU )
3158                     IE = ITAU
3159                     ITAUQ = IE + M
3160                     ITAUP = ITAUQ + M
3161                     IWORK = ITAUP + M
3162*
3163*                    Bidiagonalize L in WORK(IU), copying result to U
3164*                    (Workspace: need M*M+4*M, prefer M*M+3*M+2*M*NB)
3165*
3166                     CALL DGEBRD( M, M, WORK( IU ), LDWRKU, S,
3167     $                            WORK( IE ), WORK( ITAUQ ),
3168     $                            WORK( ITAUP ), WORK( IWORK ),
3169     $                            LWORK-IWORK+1, IERR )
3170                     CALL DLACPY( 'L', M, M, WORK( IU ), LDWRKU, U,
3171     $                            LDU )
3172*
3173*                    Generate right bidiagonalizing vectors in WORK(IU)
3174*                    (Workspace: need M*M+4*M, prefer M*M+3*M+(M-1)*NB)
3175*
3176                     CALL DORGBR( 'P', M, M, M, WORK( IU ), LDWRKU,
3177     $                            WORK( ITAUP ), WORK( IWORK ),
3178     $                            LWORK-IWORK+1, IERR )
3179*
3180*                    Generate left bidiagonalizing vectors in U
3181*                    (Workspace: need M*M+4*M, prefer M*M+3*M+M*NB)
3182*
3183                     CALL DORGBR( 'Q', M, M, M, U, LDU, WORK( ITAUQ ),
3184     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
3185                     IWORK = IE + M
3186*
3187*                    Perform bidiagonal QR iteration, computing left
3188*                    singular vectors of L in U and computing right
3189*                    singular vectors of L in WORK(IU)
3190*                    (Workspace: need M*M+BDSPAC)
3191*
3192                     CALL DBDSQR( 'U', M, M, M, 0, S, WORK( IE ),
3193     $                            WORK( IU ), LDWRKU, U, LDU, DUM, 1,
3194     $                            WORK( IWORK ), INFO )
3195*
3196*                    Multiply right singular vectors of L in WORK(IU) by
3197*                    Q in VT, storing result in A
3198*                    (Workspace: need M*M)
3199*
3200                     CALL DGEMM( 'N', 'N', M, N, M, ONE, WORK( IU ),
3201     $                           LDWRKU, VT, LDVT, ZERO, A, LDA )
3202*
3203*                    Copy right singular vectors of A from A to VT
3204*
3205                     CALL DLACPY( 'F', M, N, A, LDA, VT, LDVT )
3206*
3207                  ELSE
3208*
3209*                    Insufficient workspace for a fast algorithm
3210*
3211                     ITAU = 1
3212                     IWORK = ITAU + M
3213*
3214*                    Compute A=L*Q, copying result to VT
3215*                    (Workspace: need 2*M, prefer M+M*NB)
3216*
3217                     CALL DGELQF( M, N, A, LDA, WORK( ITAU ),
3218     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
3219                     CALL DLACPY( 'U', M, N, A, LDA, VT, LDVT )
3220*
3221*                    Generate Q in VT
3222*                    (Workspace: need M+N, prefer M+N*NB)
3223*
3224                     CALL DORGLQ( N, N, M, VT, LDVT, WORK( ITAU ),
3225     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
3226*
3227*                    Copy L to U, zeroing out above it
3228*
3229                     CALL DLACPY( 'L', M, M, A, LDA, U, LDU )
3230                     CALL DLASET( 'U', M-1, M-1, ZERO, ZERO, U( 1, 2 ),
3231     $                            LDU )
3232                     IE = ITAU
3233                     ITAUQ = IE + M
3234                     ITAUP = ITAUQ + M
3235                     IWORK = ITAUP + M
3236*
3237*                    Bidiagonalize L in U
3238*                    (Workspace: need 4*M, prefer 3*M+2*M*NB)
3239*
3240                     CALL DGEBRD( M, M, U, LDU, S, WORK( IE ),
3241     $                            WORK( ITAUQ ), WORK( ITAUP ),
3242     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
3243*
3244*                    Multiply right bidiagonalizing vectors in U by Q
3245*                    in VT
3246*                    (Workspace: need 3*M+N, prefer 3*M+N*NB)
3247*
3248                     CALL DORMBR( 'P', 'L', 'T', M, N, M, U, LDU,
3249     $                            WORK( ITAUP ), VT, LDVT,
3250     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
3251*
3252*                    Generate left bidiagonalizing vectors in U
3253*                    (Workspace: need 4*M, prefer 3*M+M*NB)
3254*
3255                     CALL DORGBR( 'Q', M, M, M, U, LDU, WORK( ITAUQ ),
3256     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
3257                     IWORK = IE + M
3258*
3259*                    Perform bidiagonal QR iteration, computing left
3260*                    singular vectors of A in U and computing right
3261*                    singular vectors of A in VT
3262*                    (Workspace: need BDSPAC)
3263*
3264                     CALL DBDSQR( 'U', M, N, M, 0, S, WORK( IE ), VT,
3265     $                            LDVT, U, LDU, DUM, 1, WORK( IWORK ),
3266     $                            INFO )
3267*
3268                  END IF
3269*
3270               END IF
3271*
3272            END IF
3273*
3274         ELSE
3275*
3276*           N .LT. MNTHR
3277*
3278*           Path 10t(N greater than M, but not much larger)
3279*           Reduce to bidiagonal form without LQ decomposition
3280*
3281            IE = 1
3282            ITAUQ = IE + M
3283            ITAUP = ITAUQ + M
3284            IWORK = ITAUP + M
3285*
3286*           Bidiagonalize A
3287*           (Workspace: need 3*M+N, prefer 3*M+(M+N)*NB)
3288*
3289            CALL DGEBRD( M, N, A, LDA, S, WORK( IE ), WORK( ITAUQ ),
3290     $                   WORK( ITAUP ), WORK( IWORK ), LWORK-IWORK+1,
3291     $                   IERR )
3292            IF( WNTUAS ) THEN
3293*
3294*              If left singular vectors desired in U, copy result to U
3295*              and generate left bidiagonalizing vectors in U
3296*              (Workspace: need 4*M-1, prefer 3*M+(M-1)*NB)
3297*
3298               CALL DLACPY( 'L', M, M, A, LDA, U, LDU )
3299               CALL DORGBR( 'Q', M, M, N, U, LDU, WORK( ITAUQ ),
3300     $                      WORK( IWORK ), LWORK-IWORK+1, IERR )
3301            END IF
3302            IF( WNTVAS ) THEN
3303*
3304*              If right singular vectors desired in VT, copy result to
3305*              VT and generate right bidiagonalizing vectors in VT
3306*              (Workspace: need 3*M+NRVT, prefer 3*M+NRVT*NB)
3307*
3308               CALL DLACPY( 'U', M, N, A, LDA, VT, LDVT )
3309               IF( WNTVA )
3310     $            NRVT = N
3311               IF( WNTVS )
3312     $            NRVT = M
3313               CALL DORGBR( 'P', NRVT, N, M, VT, LDVT, WORK( ITAUP ),
3314     $                      WORK( IWORK ), LWORK-IWORK+1, IERR )
3315            END IF
3316            IF( WNTUO ) THEN
3317*
3318*              If left singular vectors desired in A, generate left
3319*              bidiagonalizing vectors in A
3320*              (Workspace: need 4*M-1, prefer 3*M+(M-1)*NB)
3321*
3322               CALL DORGBR( 'Q', M, M, N, A, LDA, WORK( ITAUQ ),
3323     $                      WORK( IWORK ), LWORK-IWORK+1, IERR )
3324            END IF
3325            IF( WNTVO ) THEN
3326*
3327*              If right singular vectors desired in A, generate right
3328*              bidiagonalizing vectors in A
3329*              (Workspace: need 4*M, prefer 3*M+M*NB)
3330*
3331               CALL DORGBR( 'P', M, N, M, A, LDA, WORK( ITAUP ),
3332     $                      WORK( IWORK ), LWORK-IWORK+1, IERR )
3333            END IF
3334            IWORK = IE + M
3335            IF( WNTUAS .OR. WNTUO )
3336     $         NRU = M
3337            IF( WNTUN )
3338     $         NRU = 0
3339            IF( WNTVAS .OR. WNTVO )
3340     $         NCVT = N
3341            IF( WNTVN )
3342     $         NCVT = 0
3343            IF( ( .NOT.WNTUO ) .AND. ( .NOT.WNTVO ) ) THEN
3344*
3345*              Perform bidiagonal QR iteration, if desired, computing
3346*              left singular vectors in U and computing right singular
3347*              vectors in VT
3348*              (Workspace: need BDSPAC)
3349*
3350               CALL DBDSQR( 'L', M, NCVT, NRU, 0, S, WORK( IE ), VT,
3351     $                      LDVT, U, LDU, DUM, 1, WORK( IWORK ), INFO )
3352            ELSE IF( ( .NOT.WNTUO ) .AND. WNTVO ) THEN
3353*
3354*              Perform bidiagonal QR iteration, if desired, computing
3355*              left singular vectors in U and computing right singular
3356*              vectors in A
3357*              (Workspace: need BDSPAC)
3358*
3359               CALL DBDSQR( 'L', M, NCVT, NRU, 0, S, WORK( IE ), A, LDA,
3360     $                      U, LDU, DUM, 1, WORK( IWORK ), INFO )
3361            ELSE
3362*
3363*              Perform bidiagonal QR iteration, if desired, computing
3364*              left singular vectors in A and computing right singular
3365*              vectors in VT
3366*              (Workspace: need BDSPAC)
3367*
3368               CALL DBDSQR( 'L', M, NCVT, NRU, 0, S, WORK( IE ), VT,
3369     $                      LDVT, A, LDA, DUM, 1, WORK( IWORK ), INFO )
3370            END IF
3371*
3372         END IF
3373*
3374      END IF
3375*
3376*     If DBDSQR failed to converge, copy unconverged superdiagonals
3377*     to WORK( 2:MINMN )
3378*
3379      IF( INFO.NE.0 ) THEN
3380         IF( IE.GT.2 ) THEN
3381            DO 50 I = 1, MINMN - 1
3382               WORK( I+1 ) = WORK( I+IE-1 )
3383   50       CONTINUE
3384         END IF
3385         IF( IE.LT.2 ) THEN
3386            DO 60 I = MINMN - 1, 1, -1
3387               WORK( I+1 ) = WORK( I+IE-1 )
3388   60       CONTINUE
3389         END IF
3390      END IF
3391*
3392*     Undo scaling if necessary
3393*
3394      IF( ISCL.EQ.1 ) THEN
3395         IF( ANRM.GT.BIGNUM )
3396     $      CALL DLASCL( 'G', 0, 0, BIGNUM, ANRM, MINMN, 1, S, MINMN,
3397     $                   IERR )
3398         IF( INFO.NE.0 .AND. ANRM.GT.BIGNUM )
3399     $      CALL DLASCL( 'G', 0, 0, BIGNUM, ANRM, MINMN-1, 1, WORK( 2 ),
3400     $                   MINMN, IERR )
3401         IF( ANRM.LT.SMLNUM )
3402     $      CALL DLASCL( 'G', 0, 0, SMLNUM, ANRM, MINMN, 1, S, MINMN,
3403     $                   IERR )
3404         IF( INFO.NE.0 .AND. ANRM.LT.SMLNUM )
3405     $      CALL DLASCL( 'G', 0, 0, SMLNUM, ANRM, MINMN-1, 1, WORK( 2 ),
3406     $                   MINMN, IERR )
3407      END IF
3408*
3409*     Return optimal workspace in WORK(1)
3410*
3411      WORK( 1 ) = MAXWRK
3412*
3413      RETURN
3414*
3415*     End of DGESVD
3416*
3417      END
3418